github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/graph_integrity_integration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTestSlow || !race
    13  
    14  package hnsw
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math/rand"
    20  	"runtime"
    21  	"sync"
    22  	"testing"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    27  	"github.com/weaviate/weaviate/entities/cyclemanager"
    28  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    29  )
    30  
    31  func TestGraphIntegrity(t *testing.T) {
    32  	dimensions := 300
    33  	size := 1000
    34  	efConstruction := 128
    35  	maxNeighbors := 64
    36  
    37  	vectors := make([][]float32, size)
    38  	var vectorIndex *hnsw
    39  
    40  	t.Run("generate random vectors", func(t *testing.T) {
    41  		fmt.Printf("generating %d vectors", size)
    42  		for i := 0; i < size; i++ {
    43  			vector := make([]float32, dimensions)
    44  			for j := 0; j < dimensions; j++ {
    45  				vector[j] = rand.Float32()
    46  			}
    47  			vectors[i] = vector
    48  		}
    49  	})
    50  
    51  	t.Run("importing into hnsw", func(t *testing.T) {
    52  		fmt.Printf("importing into hnsw\n")
    53  		index, err := New(Config{
    54  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
    55  			ID:                    "graphintegrity",
    56  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
    57  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
    58  				return vectors[int(id)], nil
    59  			},
    60  			DistanceProvider: distancer.NewDotProductProvider(),
    61  		}, ent.UserConfig{
    62  			MaxConnections: maxNeighbors,
    63  			EFConstruction: efConstruction,
    64  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), nil)
    65  		require.Nil(t, err)
    66  		vectorIndex = index
    67  
    68  		workerCount := runtime.GOMAXPROCS(0)
    69  		jobsForWorker := make([][][]float32, workerCount)
    70  
    71  		for i, vec := range vectors {
    72  			workerID := i % workerCount
    73  			jobsForWorker[workerID] = append(jobsForWorker[workerID], vec)
    74  		}
    75  
    76  		wg := &sync.WaitGroup{}
    77  		for workerID, jobs := range jobsForWorker {
    78  			wg.Add(1)
    79  			go func(workerID int, myJobs [][]float32) {
    80  				defer wg.Done()
    81  				for i, vec := range myJobs {
    82  					originalIndex := uint64(i*workerCount) + uint64(workerID)
    83  					err := vectorIndex.Add(originalIndex, vec)
    84  					require.Nil(t, err)
    85  				}
    86  			}(workerID, jobs)
    87  		}
    88  
    89  		wg.Wait()
    90  	})
    91  
    92  	for _, node := range vectorIndex.nodes {
    93  		if node == nil {
    94  			continue
    95  		}
    96  
    97  		conlen := len(node.connections[0])
    98  
    99  		// it is debatable how much value this test still adds. It used to check
   100  		// that a lot of connections are present before we had the heuristic. But
   101  		// with the heuristic it's not uncommon that a node's connections get
   102  		// reduced to a slow amount of key connections. We have thus set this value
   103  		// to 1 to make sure that no nodes are entirely unconnected, but it's
   104  		// questionable if this still adds any value at all
   105  		requiredMinimum := 1
   106  		assert.True(t, conlen >= requiredMinimum, fmt.Sprintf(
   107  			"have %d connections, but want at least %d", conlen, requiredMinimum))
   108  	}
   109  }