github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/index_slowdown_bug_intergration_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  //go:build integrationTestBug
    13  // +build integrationTestBug
    14  
    15  package hnsw
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"runtime"
    23  	"sync"
    24  	"testing"
    25  	"time"
    26  
    27  	"github.com/stretchr/testify/require"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    29  	"github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers"
    30  )
    31  
    32  func Normalize(v []float32) []float32 {
    33  	var norm float32
    34  	for i := range v {
    35  		norm += v[i] * v[i]
    36  	}
    37  
    38  	norm = float32(math.Sqrt(float64(norm)))
    39  	for i := range v {
    40  		v[i] = v[i] / norm
    41  	}
    42  
    43  	return v
    44  }
    45  
    46  func TestSlowDownBugAtHighEF(t *testing.T) {
    47  	dimensions := 256
    48  	size := 25000
    49  	efConstruction := 2000
    50  	maxNeighbors := 100
    51  
    52  	vectors := make([][]float32, size)
    53  	var vectorIndex *hnsw
    54  
    55  	t.Run("generate random vectors", func(t *testing.T) {
    56  		fmt.Printf("generating %d vectors", size)
    57  		for i := 0; i < size; i++ {
    58  			vector := make([]float32, dimensions)
    59  			for j := 0; j < dimensions; j++ {
    60  				vector[j] = rand.Float32()
    61  			}
    62  			vectors[i] = Normalize(vector)
    63  		}
    64  		fmt.Printf("done\n")
    65  	})
    66  
    67  	t.Run("importing into hnsw", func(t *testing.T) {
    68  		fmt.Printf("importing into hnsw\n")
    69  		index, err := New(Config{
    70  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
    71  			ID:                    "recallbenchmark",
    72  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
    73  			DistanceProvider:      distancer.NewDotProductProvider(),
    74  			// DistanceProvider: distancer.NewCosineDistanceProvider(),
    75  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
    76  				return nil, nil
    77  			},
    78  		}, UserConfig{
    79  			MaxConnections: maxNeighbors,
    80  			EFConstruction: efConstruction,
    81  		}, testinghelpers.NewDummyStore(t))
    82  
    83  		require.Nil(t, err)
    84  		vectorIndex = index
    85  
    86  		workerCount := runtime.GOMAXPROCS(0)
    87  		// workerCount := 1
    88  		jobsForWorker := make([][][]float32, workerCount)
    89  
    90  		for i, vec := range vectors {
    91  			workerID := i % workerCount
    92  			jobsForWorker[workerID] = append(jobsForWorker[workerID], vec)
    93  		}
    94  
    95  		beforeImport := time.Now()
    96  		wg := &sync.WaitGroup{}
    97  		for workerID, jobs := range jobsForWorker {
    98  			wg.Add(1)
    99  			go func(workerID int, myJobs [][]float32) {
   100  				defer wg.Done()
   101  				for i, vec := range myJobs {
   102  					originalIndex := (i * workerCount) + workerID
   103  					err := vectorIndex.Add(uint64(originalIndex), vec)
   104  					require.Nil(t, err)
   105  				}
   106  			}(workerID, jobs)
   107  		}
   108  
   109  		wg.Wait()
   110  		// neighbor := bruteForceCosine(vectors, vectors[0], 2)
   111  		// dist, _, _ := distancer.NewCosineDistanceProvider().SingleDist(vectors[0], vectors[neighbor[1]])
   112  		// fmt.Printf("distance between 0 and %d is %f\n", neighbor[1], dist)
   113  		fmt.Printf("import took %s\n", time.Since(beforeImport))
   114  		// vectorIndex.Dump()
   115  
   116  		t.Fail()
   117  	})
   118  }