github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/algos/kmeans/elkans/distance_func.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package elkans
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    19  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/aggexec/algos/kmeans"
    20  	"gonum.org/v1/gonum/mat"
    21  )
    22  
    23  // L2Distance is used for L2Distance distance in Euclidean Kmeans.
    24  func L2Distance(v1, v2 *mat.VecDense) float64 {
    25  	diff := mat.NewVecDense(v1.Len(), nil)
    26  	diff.SubVec(v1, v2)
    27  	return mat.Norm(diff, 2)
    28  }
    29  
    30  //// SphericalDistance is used for InnerProduct and CosineDistance in Spherical Kmeans.
    31  //// NOTE: spherical distance between two points on a sphere is equal to the
    32  //// angular distance between the two points, scaled by pi.
    33  //// Refs:
    34  //// https://en.wikipedia.org/wiki/Great-circle_distance#Vector_version
    35  //func SphericalDistance(v1, v2 *mat.VecDense) float64 {
    36  //	// Compute the dot product of the two vectors.
    37  //	// The dot product of two vectors is a measure of their similarity,
    38  //	// and it can be used to calculate the angle between them.
    39  //	dp := mat.Dot(v1, v2)
    40  //
    41  //	// Prevent NaN with acos with loss of precision.
    42  //	if dp > 1.0 {
    43  //		dp = 1.0
    44  //	} else if dp < -1.0 {
    45  //		dp = -1.0
    46  //	}
    47  //
    48  //	theta := math.Acos(dp)
    49  //
    50  //	//To scale the result to the range [0, 1], we divide by Pi.
    51  //	return theta / math.Pi
    52  //
    53  //	// NOTE:
    54  //	// Cosine distance is a measure of the similarity between two vectors. [Not satisfy triangle inequality]
    55  //	// Angular distance is a measure of the angular separation between two points. [Satisfy triangle inequality]
    56  //	// Spherical distance is a measure of the spatial separation between two points on a sphere. [Satisfy triangle inequality]
    57  //}
    58  
    59  // resolveDistanceFn returns the distance function corresponding to the distance type
    60  // Distance function should satisfy triangle inequality.
    61  // We use
    62  // - L2Distance distance for L2Distance
    63  // - SphericalDistance for InnerProduct and CosineDistance
    64  func resolveDistanceFn(distType kmeans.DistanceType) (kmeans.DistanceFunction, error) {
    65  	var distanceFunction kmeans.DistanceFunction
    66  	switch distType {
    67  	case kmeans.L2Distance:
    68  		distanceFunction = L2Distance
    69  	//case kmeans.InnerProduct, kmeans.CosineDistance:
    70  	//	distanceFunction = SphericalDistance
    71  	default:
    72  		return nil, moerr.NewInternalErrorNoCtx("invalid distance type")
    73  	}
    74  	return distanceFunction, nil
    75  }