github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/graph_integrity_integration_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 //go:build integrationTestSlow || !race 13 14 package hnsw 15 16 import ( 17 "context" 18 "fmt" 19 "math/rand" 20 "runtime" 21 "sync" 22 "testing" 23 24 "github.com/stretchr/testify/assert" 25 "github.com/stretchr/testify/require" 26 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 27 "github.com/weaviate/weaviate/entities/cyclemanager" 28 ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 29 ) 30 31 func TestGraphIntegrity(t *testing.T) { 32 dimensions := 300 33 size := 1000 34 efConstruction := 128 35 maxNeighbors := 64 36 37 vectors := make([][]float32, size) 38 var vectorIndex *hnsw 39 40 t.Run("generate random vectors", func(t *testing.T) { 41 fmt.Printf("generating %d vectors", size) 42 for i := 0; i < size; i++ { 43 vector := make([]float32, dimensions) 44 for j := 0; j < dimensions; j++ { 45 vector[j] = rand.Float32() 46 } 47 vectors[i] = vector 48 } 49 }) 50 51 t.Run("importing into hnsw", func(t *testing.T) { 52 fmt.Printf("importing into hnsw\n") 53 index, err := New(Config{ 54 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 55 ID: "graphintegrity", 56 MakeCommitLoggerThunk: MakeNoopCommitLogger, 57 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 58 return vectors[int(id)], nil 59 }, 60 DistanceProvider: distancer.NewDotProductProvider(), 61 }, ent.UserConfig{ 62 MaxConnections: maxNeighbors, 63 EFConstruction: efConstruction, 64 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil) 65 require.Nil(t, err) 66 vectorIndex = index 67 68 workerCount := runtime.GOMAXPROCS(0) 69 jobsForWorker := make([][][]float32, workerCount) 70 71 for i, vec := range vectors { 72 workerID := i % workerCount 73 jobsForWorker[workerID] = append(jobsForWorker[workerID], vec) 74 } 75 76 wg := &sync.WaitGroup{} 77 for workerID, jobs := range jobsForWorker { 78 wg.Add(1) 79 go func(workerID int, myJobs [][]float32) { 80 defer wg.Done() 81 for i, vec := range myJobs { 82 originalIndex := uint64(i*workerCount) + uint64(workerID) 83 err := vectorIndex.Add(originalIndex, vec) 84 require.Nil(t, err) 85 } 86 }(workerID, jobs) 87 } 88 89 wg.Wait() 90 }) 91 92 for _, node := range vectorIndex.nodes { 93 if node == nil { 94 continue 95 } 96 97 conlen := len(node.connections[0]) 98 99 // it is debatable how much value this test still adds. It used to check 100 // that a lot of connections are present before we had the heuristic. But 101 // with the heuristic it's not uncommon that a node's connections get 102 // reduced to a slow amount of key connections. We have thus set this value 103 // to 1 to make sure that no nodes are entirely unconnected, but it's 104 // questionable if this still adds any value at all 105 requiredMinimum := 1 106 assert.True(t, conlen >= requiredMinimum, fmt.Sprintf( 107 "have %d connections, but want at least %d", conlen, requiredMinimum)) 108 } 109 }