github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/index_slowdown_bug_intergration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTestBug 13 // +build integrationTestBug 14 15 package hnsw 16 17 import ( 18 "context" 19 "fmt" 20 "math" 21 "math/rand" 22 "runtime" 23 "sync" 24 "testing" 25 "time" 26 27 "github.com/stretchr/testify/require" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 29 "github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers" 30 ) 31 32 func Normalize(v []float32) []float32 { 33 var norm float32 34 for i := range v { 35 norm += v[i] * v[i] 36 } 37 38 norm = float32(math.Sqrt(float64(norm))) 39 for i := range v { 40 v[i] = v[i] / norm 41 } 42 43 return v 44 } 45 46 func TestSlowDownBugAtHighEF(t *testing.T) { 47 dimensions := 256 48 size := 25000 49 efConstruction := 2000 50 maxNeighbors := 100 51 52 vectors := make([][]float32, size) 53 var vectorIndex *hnsw 54 55 t.Run("generate random vectors", func(t *testing.T) { 56 fmt.Printf("generating %d vectors", size) 57 for i := 0; i < size; i++ { 58 vector := make([]float32, dimensions) 59 for j := 0; j < dimensions; j++ { 60 vector[j] = rand.Float32() 61 } 62 vectors[i] = Normalize(vector) 63 } 64 fmt.Printf("done\n") 65 }) 66 67 t.Run("importing into hnsw", func(t *testing.T) { 68 fmt.Printf("importing into hnsw\n") 69 index, err := New(Config{ 70 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 71 ID: "recallbenchmark", 72 MakeCommitLoggerThunk: MakeNoopCommitLogger, 73 DistanceProvider: distancer.NewDotProductProvider(), 74 // DistanceProvider: distancer.NewCosineDistanceProvider(), 75 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 76 return nil, nil 77 }, 78 }, UserConfig{ 79 MaxConnections: maxNeighbors, 80 EFConstruction: efConstruction, 81 }, testinghelpers.NewDummyStore(t)) 82 83 require.Nil(t, err) 84 vectorIndex = index 85 86 workerCount := runtime.GOMAXPROCS(0) 87 // workerCount := 1 88 jobsForWorker := make([][][]float32, workerCount) 89 90 for i, vec := range vectors { 91 workerID := i % workerCount 92 jobsForWorker[workerID] = append(jobsForWorker[workerID], vec) 93 } 94 95 beforeImport := time.Now() 96 wg := &sync.WaitGroup{} 97 for workerID, jobs := range jobsForWorker { 98 wg.Add(1) 99 go func(workerID int, myJobs [][]float32) { 100 defer wg.Done() 101 for i, vec := range myJobs { 102 originalIndex := (i * workerCount) + workerID 103 err := vectorIndex.Add(uint64(originalIndex), vec) 104 require.Nil(t, err) 105 } 106 }(workerID, jobs) 107 } 108 109 wg.Wait() 110 // neighbor := bruteForceCosine(vectors, vectors[0], 2) 111 // dist, _, _ := distancer.NewCosineDistanceProvider().SingleDist(vectors[0], vectors[neighbor[1]]) 112 // fmt.Printf("distance between 0 and %d is %f\n", neighbor[1], dist) 113 fmt.Printf("import took %s\n", time.Since(beforeImport)) 114 // vectorIndex.Dump() 115 116 t.Fail() 117 }) 118 }