github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/aggexec/algos/kmeans/elkans/initializer_test.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package elkans 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/vectorize/moarray" 19 "reflect" 20 "testing" 21 ) 22 23 func TestRandom_InitCentroids(t *testing.T) { 24 type args struct { 25 vectors [][]float64 26 k int 27 } 28 tests := []struct { 29 name string 30 args args 31 wantCentroids [][]float64 32 }{ 33 { 34 name: "TestRandom_InitCentroids", 35 args: args{ 36 vectors: [][]float64{ 37 {1, 2, 3, 4}, 38 {1, 2, 4, 5}, 39 {1, 2, 4, 5}, 40 {1, 2, 3, 4}, 41 {1, 2, 4, 5}, 42 {1, 2, 4, 5}, 43 {10, 2, 4, 5}, 44 {10, 3, 4, 5}, 45 {10, 5, 4, 5}, 46 {10, 2, 4, 5}, 47 {10, 3, 4, 5}, 48 {10, 5, 4, 5}, 49 }, 50 k: 2, 51 }, 52 wantCentroids: [][]float64{ 53 // NOTE: values of random initialization need not be farther apart, it is random. 54 // NOTE: we get the same random values in the test case because we are using a constant seed value. 55 {1, 2, 4, 5}, 56 {1, 2, 3, 4}, 57 }, 58 }, 59 } 60 for _, tt := range tests { 61 t.Run(tt.name, func(t *testing.T) { 62 r := NewRandomInitializer() 63 gonumVectors, _ := moarray.ToGonumVectors[float64](tt.args.vectors...) 64 65 gotCentroids := r.InitCentroids(gonumVectors, tt.args.k) 66 if arrays, _ := moarray.ToMoArrays[float64](gotCentroids); !reflect.DeepEqual(arrays, tt.wantCentroids) { 67 t.Errorf("InitCentroids() = %v, want %v", arrays, tt.wantCentroids) 68 } 69 70 }) 71 } 72 } 73 74 func TestKMeansPlusPlus_InitCentroids(t *testing.T) { 75 type args struct { 76 vectors [][]float64 77 k int 78 } 79 tests := []struct { 80 name string 81 args args 82 wantCentroids [][]float64 83 }{ 84 { 85 name: "TestKMeansPlusPlus_InitCentroids", 86 args: args{ 87 vectors: [][]float64{ 88 {1, 2, 3, 4}, 89 {1, 2, 4, 5}, 90 {1, 2, 4, 5}, 91 {1, 2, 3, 4}, 92 {1, 2, 4, 5}, 93 {1, 2, 4, 5}, 94 {10, 2, 4, 5}, 95 {10, 3, 4, 5}, 96 {10, 5, 4, 5}, 97 {10, 2, 4, 5}, 98 {10, 3, 4, 5}, 99 {10, 5, 4, 5}, 100 }, 101 k: 2, 102 }, 103 // Kmeans++ picked the relatively farthest points as the initial centroids 104 wantCentroids: [][]float64{ 105 {1, 2, 4, 5}, 106 {10, 5, 4, 5}, 107 }, 108 }, 109 } 110 for _, tt := range tests { 111 t.Run(tt.name, func(t *testing.T) { 112 r := NewKMeansPlusPlusInitializer(L2Distance) 113 gonumVectors, _ := moarray.ToGonumVectors[float64](tt.args.vectors...) 114 115 gotCentroids := r.InitCentroids(gonumVectors, tt.args.k) 116 if arrays, _ := moarray.ToMoArrays[float64](gotCentroids); !reflect.DeepEqual(arrays, tt.wantCentroids) { 117 t.Errorf("InitCentroids() = %v, want %v", arrays, tt.wantCentroids) 118 } 119 }) 120 } 121 } 122 123 /* 124 date : 2023-11-20 125 goos: darwin 126 goarch: arm64 127 cpu: Apple M2 Pro 128 rows: 10_000 129 dims: 1024 130 k : 10 131 Benchmark_InitCentroids/RANDOM-10 108 10574740 ns/op 132 Benchmark_InitCentroids/KMEANS++-10 1 1081363458 ns/op 133 */ 134 func Benchmark_InitCentroids(b *testing.B) { 135 rowCnt := 10_000 136 dims := 1024 137 k := 10 138 139 data := make([][]float64, rowCnt) 140 populateRandData(rowCnt, dims, data) 141 142 random := NewRandomInitializer() 143 kmeanspp := NewKMeansPlusPlusInitializer(L2Distance) 144 145 b.Run("RANDOM", func(b *testing.B) { 146 b.ResetTimer() 147 for i := 0; i < b.N; i++ { 148 gonumVectors, _ := moarray.ToGonumVectors[float64](data...) 149 _ = random.InitCentroids(gonumVectors, k) 150 } 151 }) 152 153 b.Run("KMEANS++", func(b *testing.B) { 154 b.ResetTimer() 155 for i := 0; i < b.N; i++ { 156 gonumVectors, _ := moarray.ToGonumVectors[float64](data...) 157 _ = kmeanspp.InitCentroids(gonumVectors, k) 158 } 159 }) 160 }