github.com/matrixorigin/matrixone@v1.2.0/pkg/vectorize/moarray/external.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package moarray
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/vectorize/momath"
    21  	"gonum.org/v1/gonum/mat"
    22  	"math"
    23  )
    24  
    25  // These functions are exposed externally via SQL API.
    26  
    27  func Add[T types.RealNumbers](v1, v2 []T) ([]T, error) {
    28  	vec, err := ToGonumVectors[T](v1, v2)
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  
    33  	vec[0].AddVec(vec[0], vec[1])
    34  	return ToMoArray[T](vec[0])
    35  }
    36  
    37  func Subtract[T types.RealNumbers](v1, v2 []T) ([]T, error) {
    38  	vec, err := ToGonumVectors[T](v1, v2)
    39  	if err != nil {
    40  		return nil, err
    41  	}
    42  
    43  	vec[0].SubVec(vec[0], vec[1])
    44  	return ToMoArray[T](vec[0])
    45  }
    46  
    47  func Multiply[T types.RealNumbers](v1, v2 []T) ([]T, error) {
    48  	vec, err := ToGonumVectors[T](v1, v2)
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  
    53  	vec[0].MulElemVec(vec[0], vec[1])
    54  	return ToMoArray[T](vec[0])
    55  }
    56  
    57  func Divide[T types.RealNumbers](v1, v2 []T) ([]T, error) {
    58  	// pre-check for division by zero
    59  	for i := 0; i < len(v2); i++ {
    60  		if v2[i] == 0 {
    61  			return nil, moerr.NewDivByZeroNoCtx()
    62  		}
    63  	}
    64  
    65  	vec, err := ToGonumVectors[T](v1, v2)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	vec[0].DivElemVec(vec[0], vec[1])
    71  	return ToMoArray[T](vec[0])
    72  }
    73  
    74  // Compare returns an integer comparing two arrays/vectors lexicographically.
    75  // TODO: this function might not be correct. we need to compare using tolerance for float values.
    76  // TODO: need to check if we need len(v1)==len(v2) check.
    77  func Compare[T types.RealNumbers](v1, v2 []T) int {
    78  	minLen := len(v1)
    79  	if len(v2) < minLen {
    80  		minLen = len(v2)
    81  	}
    82  
    83  	for i := 0; i < minLen; i++ {
    84  		if v1[i] < v2[i] {
    85  			return -1
    86  		} else if v1[i] > v2[i] {
    87  			return 1
    88  		}
    89  	}
    90  
    91  	if len(v1) < len(v2) {
    92  		return -1
    93  	} else if len(v1) > len(v2) {
    94  		return 1
    95  	}
    96  	return 0
    97  }
    98  
    99  /* ------------ [START] Performance critical functions. ------- */
   100  
   101  func InnerProduct[T types.RealNumbers](v1, v2 []T) (float64, error) {
   102  
   103  	vec, err := ToGonumVectors[T](v1, v2)
   104  	if err != nil {
   105  		return 0, err
   106  	}
   107  
   108  	return mat.Dot(vec[0], vec[1]), nil
   109  }
   110  
   111  func L2Distance[T types.RealNumbers](v1, v2 []T) (float64, error) {
   112  	if len(v1) != len(v2) {
   113  		return 0, moerr.NewArrayInvalidOpNoCtx(len(v1), len(v2))
   114  	}
   115  	var sumOfSquares T
   116  	for i := range v1 {
   117  		difference := v1[i] - v2[i]
   118  		sumOfSquares += difference * difference
   119  	}
   120  	return math.Sqrt(float64(sumOfSquares)), nil
   121  }
   122  
   123  func CosineDistance[T types.RealNumbers](v1, v2 []T) (float64, error) {
   124  	cosineSimilarity, err := CosineSimilarity[T](v1, v2)
   125  	if err != nil {
   126  		return 0, err
   127  	}
   128  
   129  	return 1 - cosineSimilarity, nil
   130  }
   131  
   132  func CosineSimilarity[T types.RealNumbers](v1, v2 []T) (float64, error) {
   133  
   134  	vec, err := ToGonumVectors[T](v1, v2)
   135  	if err != nil {
   136  		return 0, err
   137  	}
   138  
   139  	dotProduct := mat.Dot(vec[0], vec[1])
   140  
   141  	normVec1 := mat.Norm(vec[0], 2)
   142  	normVec2 := mat.Norm(vec[1], 2)
   143  
   144  	if normVec1 == 0 || normVec2 == 0 {
   145  		return 0, moerr.NewInternalErrorNoCtx("cosine_similarity: one of the vectors is zero")
   146  	}
   147  
   148  	cosineSimilarity := dotProduct / (normVec1 * normVec2)
   149  
   150  	// Handle precision issues. Clamp the cosine_similarity to the range [-1, 1].
   151  	if cosineSimilarity > 1.0 {
   152  		cosineSimilarity = 1.0
   153  	} else if cosineSimilarity < -1.0 {
   154  		cosineSimilarity = -1.0
   155  	}
   156  
   157  	// NOTE: Downcast the float64 cosine_similarity to float32 and check if it is
   158  	// 1.0 or -1.0 to avoid precision issue.
   159  	//
   160  	//  Example for corner case:
   161  	// - cosine_similarity(a,a) = 1:
   162  	// - Without downcasting check, we get the following results:
   163  	//   cosine_similarity( [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ,
   164  	//					    [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ) =   0.9999999999999998
   165  	// - With downcasting, we get the following results:
   166  	//   cosine_similarity( [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ,
   167  	//					    [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ) =   1
   168  	//
   169  	//  Reason:
   170  	// The reason for this check is
   171  	// 1. gonums mat.Dot, mat.Norm returns float64. In other databases, we mostly do float32 operations.
   172  	// 2. float64 operations are not exact.
   173  	// mysql> select 76586261.65813679/(8751.35770370157 *8751.35770370157);
   174  	//+-----------------------------------------------------------+
   175  	//| 76586261.65813679 / (8751.35770370157 * 8751.35770370157) |
   176  	//+-----------------------------------------------------------+
   177  	//|                                            1.000000000000 |
   178  	//+-----------------------------------------------------------+
   179  	//mysql> select cast(76586261.65813679 as double)/(8751.35770370157 * 8751.35770370157);
   180  	//+---------------------------------------------------------------------------+
   181  	//| cast(76586261.65813679 as double) / (8751.35770370157 * 8751.35770370157) |
   182  	//+---------------------------------------------------------------------------+
   183  	//|                                                        0.9999999999999996 |
   184  	//+---------------------------------------------------------------------------+
   185  	// 3. We only need to handle the case for 1.0 and -1.0 with float32 precision.
   186  	//    Rest of the cases can have float64 precision.
   187  	cosineSimilarityF32 := float32(cosineSimilarity)
   188  	if cosineSimilarityF32 == 1 {
   189  		cosineSimilarity = 1
   190  	} else if cosineSimilarityF32 == -1 {
   191  		cosineSimilarity = -1
   192  	}
   193  
   194  	return cosineSimilarity, nil
   195  }
   196  
   197  func NormalizeL2[T types.RealNumbers](v1 []T) ([]T, error) {
   198  
   199  	if len(v1) == 0 {
   200  		return nil, moerr.NewInternalErrorNoCtx("cannot normalize empty vector")
   201  	}
   202  
   203  	// Compute the norm of the vector
   204  	var sumSquares float64
   205  	for _, val := range v1 {
   206  		sumSquares += float64(val) * float64(val)
   207  	}
   208  	norm := math.Sqrt(sumSquares)
   209  	if norm == 0 {
   210  		return v1, nil
   211  	}
   212  
   213  	// Divide each element by the norm
   214  	normalized := make([]T, len(v1))
   215  	for i, val := range v1 {
   216  		normalized[i] = T(float64(val) / norm)
   217  	}
   218  
   219  	return normalized, nil
   220  }
   221  
   222  // L1Norm returns l1 distance to origin.
   223  func L1Norm[T types.RealNumbers](v []T) (float64, error) {
   224  	vec := ToGonumVector[T](v)
   225  
   226  	return mat.Norm(vec, 1), nil
   227  }
   228  
   229  // L2Norm returns l2 distance to origin.
   230  func L2Norm[T types.RealNumbers](v []T) (float64, error) {
   231  	vec := ToGonumVector[T](v)
   232  
   233  	return mat.Norm(vec, 2), nil
   234  }
   235  
   236  func ScalarOp[T types.RealNumbers](v []T, operation string, scalar float64) ([]T, error) {
   237  	vec := ToGonumVector[T](v)
   238  	switch operation {
   239  	case "+", "-":
   240  		//TODO: optimize this in future.
   241  		scalarVec := make([]float64, vec.Len())
   242  		if operation == "+" {
   243  			for i := range scalarVec {
   244  				scalarVec[i] = scalar
   245  			}
   246  		} else {
   247  			for i := range scalarVec {
   248  				scalarVec[i] = -scalar
   249  			}
   250  		}
   251  		scalarDenseVec := mat.NewVecDense(vec.Len(), scalarVec)
   252  		vec.AddVec(vec, scalarDenseVec)
   253  	case "*", "/":
   254  		var scale float64
   255  		if operation == "/" {
   256  			if scalar == 0 {
   257  				return nil, moerr.NewDivByZeroNoCtx()
   258  			}
   259  			scale = float64(1) / scalar
   260  		} else {
   261  			scale = scalar
   262  		}
   263  		vec.ScaleVec(scale, vec)
   264  	default:
   265  		return nil, moerr.NewInternalErrorNoCtx("scale_vector: invalid operation")
   266  	}
   267  	return ToMoArray[T](vec)
   268  }
   269  
   270  /* ------------ [END] Performance critical functions. ------- */
   271  
   272  /* ------------ [START] mat.VecDense not supported functions ------- */
   273  
   274  func Abs[T types.RealNumbers](v []T) (res []T, err error) {
   275  	n := len(v)
   276  	res = make([]T, n)
   277  	for i := 0; i < n; i++ {
   278  		res[i], err = momath.AbsSigned[T](v[i])
   279  		if err != nil {
   280  			return nil, err
   281  		}
   282  	}
   283  	return res, nil
   284  }
   285  
   286  func Sqrt[T types.RealNumbers](v []T) (res []float64, err error) {
   287  	n := len(v)
   288  	res = make([]float64, n)
   289  	for i := 0; i < n; i++ {
   290  		res[i], err = momath.Sqrt(float64(v[i]))
   291  		if err != nil {
   292  			return nil, err
   293  		}
   294  	}
   295  	return res, nil
   296  }
   297  
   298  func Summation[T types.RealNumbers](v []T) (float64, error) {
   299  	n := len(v)
   300  	var sum float64 = 0
   301  	for i := 0; i < n; i++ {
   302  		sum += float64(v[i])
   303  	}
   304  	return sum, nil
   305  }
   306  
   307  func Cast[I types.RealNumbers, O types.RealNumbers](in []I) (out []O, err error) {
   308  	n := len(in)
   309  
   310  	out = make([]O, n)
   311  	for i := 0; i < n; i++ {
   312  		out[i] = O(in[i])
   313  	}
   314  
   315  	return out, nil
   316  }
   317  
   318  /** Slice Array **/
   319  
   320  // SubArrayFromLeft Slice from left to right, starting from 0
   321  func SubArrayFromLeft[T types.RealNumbers](s []T, offset int64) []T {
   322  	totalLen := int64(len(s))
   323  	if offset > totalLen {
   324  		return []T{}
   325  	}
   326  	return s[offset:]
   327  }
   328  
   329  // SubArrayFromRight Cut slices from right to left, starting from 1
   330  func SubArrayFromRight[T types.RealNumbers](s []T, offset int64) []T {
   331  	totalLen := int64(len(s))
   332  	if offset > totalLen {
   333  		return []T{}
   334  	}
   335  	return s[totalLen-offset:]
   336  }
   337  
   338  // SubArrayFromLeftWithLength Cut the slice with length from left to right, starting from 0
   339  func SubArrayFromLeftWithLength[T types.RealNumbers](s []T, offset int64, length int64) []T {
   340  	if offset < 0 {
   341  		return []T{}
   342  	}
   343  	return subArrayOffsetLen(s, offset, length)
   344  }
   345  
   346  // SubArrayFromRightWithLength From right to left, cut the slice with length from 1
   347  func SubArrayFromRightWithLength[T types.RealNumbers](s []T, offset int64, length int64) []T {
   348  	return subArrayOffsetLen(s, -offset, length)
   349  }
   350  
   351  func subArrayOffsetLen[T types.RealNumbers](s []T, offset int64, length int64) []T {
   352  	totalLen := int64(len(s))
   353  	if offset < 0 {
   354  		offset += totalLen
   355  		if offset < 0 {
   356  			return []T{}
   357  		}
   358  	}
   359  	if offset >= totalLen {
   360  		return []T{}
   361  	}
   362  
   363  	if length <= 0 {
   364  		return []T{}
   365  	} else {
   366  		end := offset + length
   367  		if end > totalLen {
   368  			end = totalLen
   369  		}
   370  		return s[offset:end]
   371  	}
   372  }
   373  
   374  /* ------------ [END] mat.VecDense not supported functions ------- */