github.com/matrixorigin/matrixone@v1.2.0/pkg/vectorize/moarray/external.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package moarray 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/moerr" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/vectorize/momath" 21 "gonum.org/v1/gonum/mat" 22 "math" 23 ) 24 25 // These functions are exposed externally via SQL API. 26 27 func Add[T types.RealNumbers](v1, v2 []T) ([]T, error) { 28 vec, err := ToGonumVectors[T](v1, v2) 29 if err != nil { 30 return nil, err 31 } 32 33 vec[0].AddVec(vec[0], vec[1]) 34 return ToMoArray[T](vec[0]) 35 } 36 37 func Subtract[T types.RealNumbers](v1, v2 []T) ([]T, error) { 38 vec, err := ToGonumVectors[T](v1, v2) 39 if err != nil { 40 return nil, err 41 } 42 43 vec[0].SubVec(vec[0], vec[1]) 44 return ToMoArray[T](vec[0]) 45 } 46 47 func Multiply[T types.RealNumbers](v1, v2 []T) ([]T, error) { 48 vec, err := ToGonumVectors[T](v1, v2) 49 if err != nil { 50 return nil, err 51 } 52 53 vec[0].MulElemVec(vec[0], vec[1]) 54 return ToMoArray[T](vec[0]) 55 } 56 57 func Divide[T types.RealNumbers](v1, v2 []T) ([]T, error) { 58 // pre-check for division by zero 59 for i := 0; i < len(v2); i++ { 60 if v2[i] == 0 { 61 return nil, moerr.NewDivByZeroNoCtx() 62 } 63 } 64 65 vec, err := ToGonumVectors[T](v1, v2) 66 if err != nil { 67 return nil, err 68 } 69 70 vec[0].DivElemVec(vec[0], vec[1]) 71 return ToMoArray[T](vec[0]) 72 } 73 74 // Compare returns an integer comparing two arrays/vectors lexicographically. 75 // TODO: this function might not be correct. we need to compare using tolerance for float values. 76 // TODO: need to check if we need len(v1)==len(v2) check. 77 func Compare[T types.RealNumbers](v1, v2 []T) int { 78 minLen := len(v1) 79 if len(v2) < minLen { 80 minLen = len(v2) 81 } 82 83 for i := 0; i < minLen; i++ { 84 if v1[i] < v2[i] { 85 return -1 86 } else if v1[i] > v2[i] { 87 return 1 88 } 89 } 90 91 if len(v1) < len(v2) { 92 return -1 93 } else if len(v1) > len(v2) { 94 return 1 95 } 96 return 0 97 } 98 99 /* ------------ [START] Performance critical functions. ------- */ 100 101 func InnerProduct[T types.RealNumbers](v1, v2 []T) (float64, error) { 102 103 vec, err := ToGonumVectors[T](v1, v2) 104 if err != nil { 105 return 0, err 106 } 107 108 return mat.Dot(vec[0], vec[1]), nil 109 } 110 111 func L2Distance[T types.RealNumbers](v1, v2 []T) (float64, error) { 112 if len(v1) != len(v2) { 113 return 0, moerr.NewArrayInvalidOpNoCtx(len(v1), len(v2)) 114 } 115 var sumOfSquares T 116 for i := range v1 { 117 difference := v1[i] - v2[i] 118 sumOfSquares += difference * difference 119 } 120 return math.Sqrt(float64(sumOfSquares)), nil 121 } 122 123 func CosineDistance[T types.RealNumbers](v1, v2 []T) (float64, error) { 124 cosineSimilarity, err := CosineSimilarity[T](v1, v2) 125 if err != nil { 126 return 0, err 127 } 128 129 return 1 - cosineSimilarity, nil 130 } 131 132 func CosineSimilarity[T types.RealNumbers](v1, v2 []T) (float64, error) { 133 134 vec, err := ToGonumVectors[T](v1, v2) 135 if err != nil { 136 return 0, err 137 } 138 139 dotProduct := mat.Dot(vec[0], vec[1]) 140 141 normVec1 := mat.Norm(vec[0], 2) 142 normVec2 := mat.Norm(vec[1], 2) 143 144 if normVec1 == 0 || normVec2 == 0 { 145 return 0, moerr.NewInternalErrorNoCtx("cosine_similarity: one of the vectors is zero") 146 } 147 148 cosineSimilarity := dotProduct / (normVec1 * normVec2) 149 150 // Handle precision issues. Clamp the cosine_similarity to the range [-1, 1]. 151 if cosineSimilarity > 1.0 { 152 cosineSimilarity = 1.0 153 } else if cosineSimilarity < -1.0 { 154 cosineSimilarity = -1.0 155 } 156 157 // NOTE: Downcast the float64 cosine_similarity to float32 and check if it is 158 // 1.0 or -1.0 to avoid precision issue. 159 // 160 // Example for corner case: 161 // - cosine_similarity(a,a) = 1: 162 // - Without downcasting check, we get the following results: 163 // cosine_similarity( [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] , 164 // [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ) = 0.9999999999999998 165 // - With downcasting, we get the following results: 166 // cosine_similarity( [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] , 167 // [0.46323407, 23.498016, 563.923, 56.076736, 8732.958] ) = 1 168 // 169 // Reason: 170 // The reason for this check is 171 // 1. gonums mat.Dot, mat.Norm returns float64. In other databases, we mostly do float32 operations. 172 // 2. float64 operations are not exact. 173 // mysql> select 76586261.65813679/(8751.35770370157 *8751.35770370157); 174 //+-----------------------------------------------------------+ 175 //| 76586261.65813679 / (8751.35770370157 * 8751.35770370157) | 176 //+-----------------------------------------------------------+ 177 //| 1.000000000000 | 178 //+-----------------------------------------------------------+ 179 //mysql> select cast(76586261.65813679 as double)/(8751.35770370157 * 8751.35770370157); 180 //+---------------------------------------------------------------------------+ 181 //| cast(76586261.65813679 as double) / (8751.35770370157 * 8751.35770370157) | 182 //+---------------------------------------------------------------------------+ 183 //| 0.9999999999999996 | 184 //+---------------------------------------------------------------------------+ 185 // 3. We only need to handle the case for 1.0 and -1.0 with float32 precision. 186 // Rest of the cases can have float64 precision. 187 cosineSimilarityF32 := float32(cosineSimilarity) 188 if cosineSimilarityF32 == 1 { 189 cosineSimilarity = 1 190 } else if cosineSimilarityF32 == -1 { 191 cosineSimilarity = -1 192 } 193 194 return cosineSimilarity, nil 195 } 196 197 func NormalizeL2[T types.RealNumbers](v1 []T) ([]T, error) { 198 199 if len(v1) == 0 { 200 return nil, moerr.NewInternalErrorNoCtx("cannot normalize empty vector") 201 } 202 203 // Compute the norm of the vector 204 var sumSquares float64 205 for _, val := range v1 { 206 sumSquares += float64(val) * float64(val) 207 } 208 norm := math.Sqrt(sumSquares) 209 if norm == 0 { 210 return v1, nil 211 } 212 213 // Divide each element by the norm 214 normalized := make([]T, len(v1)) 215 for i, val := range v1 { 216 normalized[i] = T(float64(val) / norm) 217 } 218 219 return normalized, nil 220 } 221 222 // L1Norm returns l1 distance to origin. 223 func L1Norm[T types.RealNumbers](v []T) (float64, error) { 224 vec := ToGonumVector[T](v) 225 226 return mat.Norm(vec, 1), nil 227 } 228 229 // L2Norm returns l2 distance to origin. 230 func L2Norm[T types.RealNumbers](v []T) (float64, error) { 231 vec := ToGonumVector[T](v) 232 233 return mat.Norm(vec, 2), nil 234 } 235 236 func ScalarOp[T types.RealNumbers](v []T, operation string, scalar float64) ([]T, error) { 237 vec := ToGonumVector[T](v) 238 switch operation { 239 case "+", "-": 240 //TODO: optimize this in future. 241 scalarVec := make([]float64, vec.Len()) 242 if operation == "+" { 243 for i := range scalarVec { 244 scalarVec[i] = scalar 245 } 246 } else { 247 for i := range scalarVec { 248 scalarVec[i] = -scalar 249 } 250 } 251 scalarDenseVec := mat.NewVecDense(vec.Len(), scalarVec) 252 vec.AddVec(vec, scalarDenseVec) 253 case "*", "/": 254 var scale float64 255 if operation == "/" { 256 if scalar == 0 { 257 return nil, moerr.NewDivByZeroNoCtx() 258 } 259 scale = float64(1) / scalar 260 } else { 261 scale = scalar 262 } 263 vec.ScaleVec(scale, vec) 264 default: 265 return nil, moerr.NewInternalErrorNoCtx("scale_vector: invalid operation") 266 } 267 return ToMoArray[T](vec) 268 } 269 270 /* ------------ [END] Performance critical functions. ------- */ 271 272 /* ------------ [START] mat.VecDense not supported functions ------- */ 273 274 func Abs[T types.RealNumbers](v []T) (res []T, err error) { 275 n := len(v) 276 res = make([]T, n) 277 for i := 0; i < n; i++ { 278 res[i], err = momath.AbsSigned[T](v[i]) 279 if err != nil { 280 return nil, err 281 } 282 } 283 return res, nil 284 } 285 286 func Sqrt[T types.RealNumbers](v []T) (res []float64, err error) { 287 n := len(v) 288 res = make([]float64, n) 289 for i := 0; i < n; i++ { 290 res[i], err = momath.Sqrt(float64(v[i])) 291 if err != nil { 292 return nil, err 293 } 294 } 295 return res, nil 296 } 297 298 func Summation[T types.RealNumbers](v []T) (float64, error) { 299 n := len(v) 300 var sum float64 = 0 301 for i := 0; i < n; i++ { 302 sum += float64(v[i]) 303 } 304 return sum, nil 305 } 306 307 func Cast[I types.RealNumbers, O types.RealNumbers](in []I) (out []O, err error) { 308 n := len(in) 309 310 out = make([]O, n) 311 for i := 0; i < n; i++ { 312 out[i] = O(in[i]) 313 } 314 315 return out, nil 316 } 317 318 /** Slice Array **/ 319 320 // SubArrayFromLeft Slice from left to right, starting from 0 321 func SubArrayFromLeft[T types.RealNumbers](s []T, offset int64) []T { 322 totalLen := int64(len(s)) 323 if offset > totalLen { 324 return []T{} 325 } 326 return s[offset:] 327 } 328 329 // SubArrayFromRight Cut slices from right to left, starting from 1 330 func SubArrayFromRight[T types.RealNumbers](s []T, offset int64) []T { 331 totalLen := int64(len(s)) 332 if offset > totalLen { 333 return []T{} 334 } 335 return s[totalLen-offset:] 336 } 337 338 // SubArrayFromLeftWithLength Cut the slice with length from left to right, starting from 0 339 func SubArrayFromLeftWithLength[T types.RealNumbers](s []T, offset int64, length int64) []T { 340 if offset < 0 { 341 return []T{} 342 } 343 return subArrayOffsetLen(s, offset, length) 344 } 345 346 // SubArrayFromRightWithLength From right to left, cut the slice with length from 1 347 func SubArrayFromRightWithLength[T types.RealNumbers](s []T, offset int64, length int64) []T { 348 return subArrayOffsetLen(s, -offset, length) 349 } 350 351 func subArrayOffsetLen[T types.RealNumbers](s []T, offset int64, length int64) []T { 352 totalLen := int64(len(s)) 353 if offset < 0 { 354 offset += totalLen 355 if offset < 0 { 356 return []T{} 357 } 358 } 359 if offset >= totalLen { 360 return []T{} 361 } 362 363 if length <= 0 { 364 return []T{} 365 } else { 366 end := offset + length 367 if end > totalLen { 368 end = totalLen 369 } 370 return s[offset:end] 371 } 372 } 373 374 /* ------------ [END] mat.VecDense not supported functions ------- */