github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/delete_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"os"
    18  	"sort"
    19  	"sync"
    20  	"testing"
    21  
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    25  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    26  	"github.com/weaviate/weaviate/adapters/repos/db/vector/common"
    27  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer"
    28  	"github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers"
    29  	"github.com/weaviate/weaviate/entities/cyclemanager"
    30  	"github.com/weaviate/weaviate/entities/storobj"
    31  	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
    32  )
    33  
    34  func TempVectorForIDThunk(vectors [][]float32) func(context.Context, uint64, *common.VectorSlice) ([]float32, error) {
    35  	return func(ctx context.Context, id uint64, container *common.VectorSlice) ([]float32, error) {
    36  		copy(container.Slice, vectors[int(id)])
    37  		return vectors[int(id)], nil
    38  	}
    39  }
    40  
    41  func TestDelete_WithoutCleaningUpTombstones(t *testing.T) {
    42  	vectors := vectorsForDeleteTest()
    43  	var vectorIndex *hnsw
    44  
    45  	store := testinghelpers.NewDummyStore(t)
    46  	defer store.Shutdown(context.Background())
    47  	t.Run("import the test vectors", func(t *testing.T) {
    48  		index, err := New(Config{
    49  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
    50  			ID:                    "delete-test",
    51  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
    52  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
    53  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
    54  				return vectors[int(id)], nil
    55  			},
    56  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
    57  		}, ent.UserConfig{
    58  			MaxConnections: 30,
    59  			EFConstruction: 128,
    60  
    61  			// The actual size does not matter for this test, but if it defaults to
    62  			// zero it will constantly think it's full and needs to be deleted - even
    63  			// after just being deleted, so make sure to use a positive number here.
    64  			VectorCacheMaxObjects: 100000,
    65  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
    66  			cyclemanager.NewCallbackGroupNoop(), store)
    67  		require.Nil(t, err)
    68  		vectorIndex = index
    69  
    70  		for i, vec := range vectors {
    71  			err := vectorIndex.Add(uint64(i), vec)
    72  			require.Nil(t, err)
    73  		}
    74  	})
    75  
    76  	var control []uint64
    77  
    78  	t.Run("vectors are cached correctly", func(t *testing.T) {
    79  		assert.Equal(t, len(vectors), int(vectorIndex.cache.CountVectors()))
    80  	})
    81  
    82  	t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) {
    83  		allowList := helpers.NewAllowList()
    84  		for i := range vectors {
    85  			if i%2 == 0 {
    86  				continue
    87  			}
    88  
    89  			allowList.Insert(uint64(i))
    90  		}
    91  
    92  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList)
    93  		require.Nil(t, err)
    94  		require.True(t, len(res) > 0)
    95  		control = res
    96  	})
    97  
    98  	t.Run("deleting every even element", func(t *testing.T) {
    99  		for i := range vectors {
   100  			if i%2 != 0 {
   101  				continue
   102  			}
   103  
   104  			err := vectorIndex.Delete(uint64(i))
   105  			require.Nil(t, err)
   106  		}
   107  	})
   108  
   109  	t.Run("vector cache holds half the original vectors", func(t *testing.T) {
   110  		vectorIndex.CleanUpTombstonedNodes(neverStop)
   111  		assert.Equal(t, len(vectors)/2, int(vectorIndex.cache.CountVectors()))
   112  	})
   113  
   114  	t.Run("start a search that should only contain the remaining elements", func(t *testing.T) {
   115  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
   116  		require.Nil(t, err)
   117  		require.True(t, len(res) > 0)
   118  
   119  		for _, elem := range res {
   120  			if elem%2 == 0 {
   121  				t.Errorf("search result contained an even element: %d", elem)
   122  			}
   123  		}
   124  
   125  		assert.Equal(t, control, res)
   126  	})
   127  
   128  	t.Run("destroy the index", func(t *testing.T) {
   129  		require.Nil(t, vectorIndex.Drop(context.Background()))
   130  	})
   131  
   132  	t.Run("vector cache holds no vectors", func(t *testing.T) {
   133  		assert.Equal(t, 0, int(vectorIndex.cache.CountVectors()))
   134  	})
   135  }
   136  
   137  func TestDelete_WithCleaningUpTombstonesOnce(t *testing.T) {
   138  	// there is a single bulk clean event after all the deletes
   139  	vectors := vectorsForDeleteTest()
   140  	var vectorIndex *hnsw
   141  
   142  	store := testinghelpers.NewDummyStore(t)
   143  	defer store.Shutdown(context.Background())
   144  
   145  	t.Run("import the test vectors", func(t *testing.T) {
   146  		index, err := New(Config{
   147  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   148  			ID:                    "delete-test",
   149  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
   150  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
   151  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
   152  				return vectors[int(id)], nil
   153  			},
   154  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
   155  		}, ent.UserConfig{
   156  			MaxConnections: 30,
   157  			EFConstruction: 128,
   158  
   159  			// The actual size does not matter for this test, but if it defaults to
   160  			// zero it will constantly think it's full and needs to be deleted - even
   161  			// after just being deleted, so make sure to use a positive number here.
   162  			VectorCacheMaxObjects: 100000,
   163  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   164  			cyclemanager.NewCallbackGroupNoop(), store)
   165  		require.Nil(t, err)
   166  		vectorIndex = index
   167  
   168  		for i, vec := range vectors {
   169  			err := vectorIndex.Add(uint64(i), vec)
   170  			require.Nil(t, err)
   171  		}
   172  	})
   173  
   174  	var control []uint64
   175  	var bfControl []uint64
   176  
   177  	t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) {
   178  		allowList := helpers.NewAllowList()
   179  		for i := range vectors {
   180  			if i%2 == 0 {
   181  				continue
   182  			}
   183  
   184  			allowList.Insert(uint64(i))
   185  		}
   186  
   187  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList)
   188  		require.Nil(t, err)
   189  		require.True(t, len(res) > 0)
   190  		require.Len(t, res, 20)
   191  		control = res
   192  	})
   193  
   194  	t.Run("brute force control", func(t *testing.T) {
   195  		bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100)
   196  		bfControl = make([]uint64, len(bf))
   197  		i := 0
   198  		for _, elem := range bf {
   199  			if elem%2 == 0 {
   200  				continue
   201  			}
   202  
   203  			bfControl[i] = elem
   204  			i++
   205  		}
   206  
   207  		if i > 20 {
   208  			i = 20
   209  		}
   210  
   211  		bfControl = bfControl[:i]
   212  		assert.Equal(t, bfControl, control, "control should match bf control")
   213  	})
   214  
   215  	fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID)
   216  	t.Run("deleting every even element", func(t *testing.T) {
   217  		for i := range vectors {
   218  			if i%2 != 0 {
   219  				continue
   220  			}
   221  
   222  			err := vectorIndex.Delete(uint64(i))
   223  			require.Nil(t, err)
   224  		}
   225  	})
   226  
   227  	t.Run("running the cleanup", func(t *testing.T) {
   228  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   229  		require.Nil(t, err)
   230  	})
   231  
   232  	t.Run("start a search that should only contain the remaining elements", func(t *testing.T) {
   233  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
   234  		require.Nil(t, err)
   235  		require.True(t, len(res) > 0)
   236  
   237  		for _, elem := range res {
   238  			if elem%2 == 0 {
   239  				t.Errorf("search result contained an even element: %d", elem)
   240  			}
   241  		}
   242  
   243  		assert.Equal(t, control, res)
   244  	})
   245  
   246  	t.Run("verify the graph no longer has any tombstones", func(t *testing.T) {
   247  		assert.Len(t, vectorIndex.tombstones, 0)
   248  	})
   249  
   250  	t.Run("destroy the index", func(t *testing.T) {
   251  		require.Nil(t, vectorIndex.Drop(context.Background()))
   252  	})
   253  }
   254  
   255  func TestDelete_WithCleaningUpTombstonesInBetween(t *testing.T) {
   256  	// there is a single bulk clean event after all the deletes
   257  	vectors := vectorsForDeleteTest()
   258  	var vectorIndex *hnsw
   259  	store := testinghelpers.NewDummyStore(t)
   260  	defer store.Shutdown(context.Background())
   261  
   262  	t.Run("import the test vectors", func(t *testing.T) {
   263  		index, err := New(Config{
   264  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   265  			ID:                    "delete-test",
   266  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
   267  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
   268  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
   269  				return vectors[int(id)], nil
   270  			},
   271  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
   272  		}, ent.UserConfig{
   273  			MaxConnections: 30,
   274  			EFConstruction: 128,
   275  
   276  			// The actual size does not matter for this test, but if it defaults to
   277  			// zero it will constantly think it's full and needs to be deleted - even
   278  			// after just being deleted, so make sure to use a positive number here.
   279  			VectorCacheMaxObjects: 100000,
   280  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   281  			cyclemanager.NewCallbackGroupNoop(), store)
   282  		// makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179
   283  		index.randFunc = func() float64 { return 0.1 }
   284  
   285  		require.Nil(t, err)
   286  		vectorIndex = index
   287  
   288  		for i, vec := range vectors {
   289  			err := vectorIndex.Add(uint64(i), vec)
   290  			require.Nil(t, err)
   291  		}
   292  	})
   293  
   294  	var control []uint64
   295  
   296  	t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) {
   297  		allowList := helpers.NewAllowList()
   298  		for i := range vectors {
   299  			if i%2 == 0 {
   300  				continue
   301  			}
   302  
   303  			allowList.Insert(uint64(i))
   304  		}
   305  
   306  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList)
   307  		require.Nil(t, err)
   308  		require.True(t, len(res) > 0)
   309  
   310  		control = res
   311  	})
   312  
   313  	t.Run("deleting every even element", func(t *testing.T) {
   314  		for i := range vectors {
   315  			if i%10 == 0 {
   316  				// occasionally run clean up
   317  				err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   318  				require.Nil(t, err)
   319  			}
   320  
   321  			if i%2 != 0 {
   322  				continue
   323  			}
   324  
   325  			err := vectorIndex.Delete(uint64(i))
   326  			require.Nil(t, err)
   327  		}
   328  
   329  		// finally run one final cleanup
   330  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   331  		require.Nil(t, err)
   332  	})
   333  
   334  	t.Run("start a search that should only contain the remaining elements", func(t *testing.T) {
   335  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
   336  		require.Nil(t, err)
   337  		require.True(t, len(res) > 0)
   338  
   339  		for _, elem := range res {
   340  			if elem%2 == 0 {
   341  				t.Errorf("search result contained an even element: %d", elem)
   342  			}
   343  		}
   344  
   345  		assert.Equal(t, control, res)
   346  	})
   347  
   348  	t.Run("verify the graph no longer has any tombstones", func(t *testing.T) {
   349  		assert.Len(t, vectorIndex.tombstones, 0)
   350  	})
   351  
   352  	t.Run("delete the remaining elements", func(t *testing.T) {
   353  		for i := range vectors {
   354  			if i%2 == 0 {
   355  				continue
   356  			}
   357  
   358  			err := vectorIndex.Delete(uint64(i))
   359  			require.Nil(t, err)
   360  		}
   361  
   362  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   363  		require.Nil(t, err)
   364  	})
   365  
   366  	t.Run("try to insert again and search", func(t *testing.T) {
   367  		for i := 0; i < 5; i++ {
   368  			err := vectorIndex.Add(uint64(i), vectors[i])
   369  			require.Nil(t, err)
   370  		}
   371  
   372  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
   373  		require.Nil(t, err)
   374  		assert.ElementsMatch(t, []uint64{0, 1, 2, 3, 4}, res)
   375  	})
   376  
   377  	t.Run("destroy the index", func(t *testing.T) {
   378  		require.Nil(t, vectorIndex.Drop(context.Background()))
   379  	})
   380  
   381  	store.Shutdown(context.Background())
   382  }
   383  
   384  func createIndexImportAllVectorsAndDeleteEven(t *testing.T, vectors [][]float32, store *lsmkv.Store) (index *hnsw, remainingResult []uint64) {
   385  	index, err := New(Config{
   386  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   387  		ID:                    "delete-test",
   388  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
   389  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
   390  		VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
   391  			return vectors[int(id)], nil
   392  		},
   393  		TempVectorForIDThunk: TempVectorForIDThunk(vectors),
   394  	}, ent.UserConfig{
   395  		MaxConnections: 30,
   396  		EFConstruction: 128,
   397  
   398  		// The actual size does not matter for this test, but if it defaults to
   399  		// zero it will constantly think it's full and needs to be deleted - even
   400  		// after just being deleted, so make sure to use a positive number here.
   401  		VectorCacheMaxObjects: 100000,
   402  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   403  		cyclemanager.NewCallbackGroupNoop(), store)
   404  	require.Nil(t, err)
   405  
   406  	// makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179
   407  	index.randFunc = func() float64 { return 0.1 }
   408  
   409  	// to speed up test execution, size of nodes array is decreased
   410  	// from default 25k to little over number of vectors
   411  	index.nodes = make([]*vertex, int(1.2*float64(len(vectors))))
   412  
   413  	for i, vec := range vectors {
   414  		err := index.Add(uint64(i), vec)
   415  		require.Nil(t, err)
   416  	}
   417  
   418  	for i := range vectors {
   419  		if i%2 != 0 {
   420  			continue
   421  		}
   422  		err := index.Delete(uint64(i))
   423  		require.Nil(t, err)
   424  	}
   425  
   426  	res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil)
   427  	require.Nil(t, err)
   428  	require.True(t, len(res) > 0)
   429  
   430  	for _, elem := range res {
   431  		if elem%2 == 0 {
   432  			t.Errorf("search result contained an even element: %d", elem)
   433  		}
   434  	}
   435  
   436  	return index, res
   437  }
   438  
   439  func genStopAtFunc(i int) func() bool {
   440  	counter := 0
   441  	mutex := &sync.Mutex{}
   442  	return func() bool {
   443  		mutex.Lock()
   444  		defer mutex.Unlock()
   445  		if counter < i {
   446  			counter++
   447  			return false
   448  		}
   449  
   450  		return true
   451  	}
   452  }
   453  
   454  func TestDelete_WithCleaningUpTombstonesStopped(t *testing.T) {
   455  	vectors := vectorsForDeleteTest()
   456  	var index *hnsw
   457  	var possibleStopsCount int
   458  	// due to not yet resolved bug (https://semi-technology.atlassian.net/browse/WEAVIATE-179)
   459  	// db can return less vectors than are actually stored after tombstones cleanup
   460  	// controlRemainingResult contains all odd vectors (before cleanup was performed)
   461  	// controlRemainingResultAfterCleanup contains most of odd vectors (after cleanup was performed)
   462  	//
   463  	// this test verifies if partial cleanup will not change search output, therefore depending on
   464  	// where cleanup method was stopped, subset of controlRemainingResult is expected, though all
   465  	// vectors from controlRemainingResultAfterCleanup should be returned
   466  	// TODO to be simplified after fixing WEAVIATE-179, all results should be the same
   467  	var controlRemainingResult []uint64
   468  	var controlRemainingResultAfterCleanup []uint64
   469  	store := testinghelpers.NewDummyStore(t)
   470  	defer store.Shutdown(context.Background())
   471  
   472  	t.Run("create control index", func(t *testing.T) {
   473  		index, controlRemainingResult = createIndexImportAllVectorsAndDeleteEven(t, vectors, store)
   474  	})
   475  
   476  	t.Run("count all cleanup tombstones stops", func(t *testing.T) {
   477  		counter := 0
   478  		mutex := &sync.Mutex{}
   479  		countingStopFunc := func() bool {
   480  			mutex.Lock()
   481  			counter++
   482  			mutex.Unlock()
   483  			return false
   484  		}
   485  
   486  		err := index.CleanUpTombstonedNodes(countingStopFunc)
   487  		require.Nil(t, err)
   488  
   489  		possibleStopsCount = counter
   490  	})
   491  
   492  	t.Run("search remaining elements after cleanup", func(t *testing.T) {
   493  		res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil)
   494  		require.Nil(t, err)
   495  		require.True(t, len(res) > 0)
   496  
   497  		for _, elem := range res {
   498  			if elem%2 == 0 {
   499  				t.Errorf("search result contained an even element: %d", elem)
   500  			}
   501  		}
   502  		controlRemainingResultAfterCleanup = res
   503  	})
   504  
   505  	t.Run("destroy the control index", func(t *testing.T) {
   506  		require.Nil(t, index.Drop(context.Background()))
   507  	})
   508  
   509  	for i := 0; i < possibleStopsCount; i++ {
   510  		index, _ = createIndexImportAllVectorsAndDeleteEven(t, vectors, store)
   511  
   512  		t.Run("stop cleanup at place", func(t *testing.T) {
   513  			require.Nil(t, index.CleanUpTombstonedNodes(genStopAtFunc(i)))
   514  		})
   515  
   516  		t.Run("search remaining elements after partial cleanup", func(t *testing.T) {
   517  			res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil)
   518  			require.Nil(t, err)
   519  			require.Subset(t, controlRemainingResult, res)
   520  			require.Subset(t, res, controlRemainingResultAfterCleanup)
   521  		})
   522  
   523  		t.Run("run complete cleanup", func(t *testing.T) {
   524  			require.Nil(t, index.CleanUpTombstonedNodes(neverStop))
   525  		})
   526  
   527  		t.Run("search remaining elements after complete cleanup", func(t *testing.T) {
   528  			res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil)
   529  			require.Nil(t, err)
   530  			require.Subset(t, controlRemainingResult, res)
   531  			require.Subset(t, res, controlRemainingResultAfterCleanup)
   532  		})
   533  
   534  		t.Run("destroy the index", func(t *testing.T) {
   535  			require.Nil(t, index.Drop(context.Background()))
   536  		})
   537  	}
   538  }
   539  
   540  func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce(t *testing.T) {
   541  	var (
   542  		vectorIndex *hnsw
   543  		// there is a single bulk clean event after all the deletes
   544  		vectors    = vectorsForDeleteTest()
   545  		rootPath   = t.TempDir()
   546  		userConfig = ent.UserConfig{
   547  			MaxConnections: 30,
   548  			EFConstruction: 128,
   549  
   550  			// The actual size does not matter for this test, but if it defaults to
   551  			// zero it will constantly think it's full and needs to be deleted - even
   552  			// after just being deleted, so make sure to use a positive number here.
   553  			VectorCacheMaxObjects: 100000,
   554  			PQ: ent.PQConfig{
   555  				Enabled: true,
   556  				Encoder: ent.PQEncoder{
   557  					Type:         ent.PQEncoderTypeTile,
   558  					Distribution: ent.PQEncoderDistributionNormal,
   559  				},
   560  			},
   561  		}
   562  	)
   563  	store := testinghelpers.NewDummyStore(t)
   564  	defer store.Shutdown(context.Background())
   565  
   566  	t.Run("import the test vectors", func(t *testing.T) {
   567  		index, err := New(Config{
   568  			RootPath:              rootPath,
   569  			ID:                    "delete-test",
   570  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
   571  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
   572  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
   573  				if int(id) >= len(vectors) {
   574  					return nil, storobj.NewErrNotFoundf(id, "out of range")
   575  				}
   576  				return vectors[int(id)], nil
   577  			},
   578  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
   579  		}, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   580  			cyclemanager.NewCallbackGroupNoop(), store)
   581  		require.Nil(t, err)
   582  		vectorIndex = index
   583  
   584  		for i, vec := range vectors {
   585  			err := vectorIndex.Add(uint64(i), vec)
   586  			require.Nil(t, err)
   587  		}
   588  		cfg := ent.PQConfig{
   589  			Enabled: true,
   590  			Encoder: ent.PQEncoder{
   591  				Type:         ent.PQEncoderTypeTile,
   592  				Distribution: ent.PQEncoderDistributionLogNormal,
   593  			},
   594  			BitCompression: false,
   595  			Segments:       3,
   596  			Centroids:      256,
   597  		}
   598  		userConfig.PQ = cfg
   599  		index.compress(userConfig)
   600  	})
   601  
   602  	var control []uint64
   603  	var bfControl []uint64
   604  
   605  	t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) {
   606  		allowList := helpers.NewAllowList()
   607  		for i := range vectors {
   608  			if i%2 == 0 {
   609  				continue
   610  			}
   611  
   612  			allowList.Insert(uint64(i))
   613  		}
   614  
   615  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList)
   616  		require.Nil(t, err)
   617  		require.True(t, len(res) > 0)
   618  		require.Len(t, res, 20)
   619  		control = res
   620  	})
   621  
   622  	t.Run("brute force control", func(t *testing.T) {
   623  		bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100)
   624  		bfControl = make([]uint64, len(bf))
   625  		i := 0
   626  		for _, elem := range bf {
   627  			if elem%2 == 0 {
   628  				continue
   629  			}
   630  
   631  			bfControl[i] = elem
   632  			i++
   633  		}
   634  
   635  		if i > 20 {
   636  			i = 20
   637  		}
   638  
   639  		bfControl = bfControl[:i]
   640  		recall := float32(testinghelpers.MatchesInLists(bfControl, control)) / float32(len(bfControl))
   641  		fmt.Println(recall)
   642  		assert.True(t, recall > 0.6, "control should match bf control")
   643  	})
   644  
   645  	fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID)
   646  	t.Run("deleting every even element", func(t *testing.T) {
   647  		for i := range vectors {
   648  			if i%2 != 0 {
   649  				continue
   650  			}
   651  
   652  			err := vectorIndex.Delete(uint64(i))
   653  			require.Nil(t, err)
   654  		}
   655  	})
   656  
   657  	t.Run("running the cleanup", func(t *testing.T) {
   658  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   659  		require.Nil(t, err)
   660  	})
   661  
   662  	t.Run("start a search that should only contain the remaining elements", func(t *testing.T) {
   663  		res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
   664  		require.Nil(t, err)
   665  		require.True(t, len(res) > 0)
   666  
   667  		for _, elem := range res {
   668  			if elem%2 == 0 {
   669  				t.Errorf("search result contained an even element: %d", elem)
   670  			}
   671  		}
   672  
   673  		recall := float32(testinghelpers.MatchesInLists(res, control)) / float32(len(control))
   674  		assert.True(t, recall > 0.6)
   675  	})
   676  
   677  	t.Run("verify the graph no longer has any tombstones", func(t *testing.T) {
   678  		assert.Len(t, vectorIndex.tombstones, 0)
   679  	})
   680  
   681  	t.Run("destroy the index", func(t *testing.T) {
   682  		require.Nil(t, vectorIndex.Drop(context.Background()))
   683  	})
   684  }
   685  
   686  func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce_DoesNotCrash(t *testing.T) {
   687  	var (
   688  		vectorIndex *hnsw
   689  		// there is a single bulk clean event after all the deletes
   690  		vectors    = vectorsForDeleteTest()
   691  		rootPath   = t.TempDir()
   692  		userConfig = ent.UserConfig{
   693  			MaxConnections: 30,
   694  			EFConstruction: 128,
   695  
   696  			// The actual size does not matter for this test, but if it defaults to
   697  			// zero it will constantly think it's full and needs to be deleted - even
   698  			// after just being deleted, so make sure to use a positive number here.
   699  			VectorCacheMaxObjects: 100000,
   700  			PQ:                    ent.PQConfig{Enabled: true, Encoder: ent.PQEncoder{Type: "tile", Distribution: "normal"}},
   701  		}
   702  	)
   703  
   704  	store := testinghelpers.NewDummyStore(t)
   705  	defer store.Shutdown(context.Background())
   706  
   707  	t.Run("import the test vectors", func(t *testing.T) {
   708  		index, err := New(Config{
   709  			RootPath:              rootPath,
   710  			ID:                    "delete-test",
   711  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
   712  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
   713  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
   714  				return vectors[int(id%uint64(len(vectors)))], nil
   715  			},
   716  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
   717  		}, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   718  			cyclemanager.NewCallbackGroupNoop(), store)
   719  		require.Nil(t, err)
   720  		vectorIndex = index
   721  
   722  		for i, vec := range vectors {
   723  			err := vectorIndex.Add(uint64(i), vec)
   724  			require.Nil(t, err)
   725  		}
   726  		cfg := ent.PQConfig{
   727  			Enabled: true,
   728  			Encoder: ent.PQEncoder{
   729  				Type:         ent.PQEncoderTypeTile,
   730  				Distribution: ent.PQEncoderDistributionLogNormal,
   731  			},
   732  			BitCompression: false,
   733  			Segments:       3,
   734  			Centroids:      256,
   735  		}
   736  		userConfig.PQ = cfg
   737  		index.compress(userConfig)
   738  		for i := len(vectors); i < 1000; i++ {
   739  			err := vectorIndex.Add(uint64(i), vectors[i%len(vectors)])
   740  			require.Nil(t, err)
   741  		}
   742  	})
   743  
   744  	t.Run("deleting every even element", func(t *testing.T) {
   745  		for i := range vectors {
   746  			if i%2 != 0 {
   747  				continue
   748  			}
   749  
   750  			err := vectorIndex.Delete(uint64(i))
   751  			require.Nil(t, err)
   752  		}
   753  	})
   754  
   755  	t.Run("running the cleanup", func(t *testing.T) {
   756  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
   757  		require.Nil(t, err)
   758  	})
   759  
   760  	t.Run("verify the graph no longer has any tombstones", func(t *testing.T) {
   761  		assert.Len(t, vectorIndex.tombstones, 0)
   762  	})
   763  
   764  	t.Run("destroy the index", func(t *testing.T) {
   765  		require.Nil(t, vectorIndex.Drop(context.Background()))
   766  	})
   767  }
   768  
   769  // we need a certain number of elements so that we can make sure that nodes
   770  // from all layers will eventually be deleted, otherwise our test only tests
   771  // edge cases which aren't very common in real life, but ignore the most common
   772  // deletes
   773  func vectorsForDeleteTest() [][]float32 {
   774  	return [][]float32{
   775  		{0.27335858, 0.42670676, 0.12599982},
   776  		{0.34369454, 0.78510034, 0.78000546},
   777  		{0.2342731, 0.076864816, 0.6405078},
   778  		{0.07597838, 0.7752282, 0.87022865},
   779  		{0.78632426, 0.06902865, 0.7423889},
   780  		{0.3055758, 0.3901508, 0.9399572},
   781  		{0.48687622, 0.26338226, 0.06495104},
   782  		{0.5384028, 0.35410047, 0.8821815},
   783  		{0.25123185, 0.62722564, 0.86443096},
   784  		{0.58484185, 0.13103616, 0.4034975},
   785  		{0.0019696166, 0.46822622, 0.42492124},
   786  		{0.42401955, 0.8278863, 0.5952888},
   787  		{0.15367928, 0.70778894, 0.0070928824},
   788  		{0.95760256, 0.45898128, 0.1541115},
   789  		{0.9125976, 0.9021616, 0.21607016},
   790  		{0.9876307, 0.5243228, 0.37294936},
   791  		{0.8194746, 0.56142205, 0.5130103},
   792  		{0.805065, 0.62250346, 0.63715476},
   793  		{0.9969276, 0.5115748, 0.18916714},
   794  		{0.16419733, 0.15029702, 0.36020836},
   795  		{0.9660323, 0.35887036, 0.6072966},
   796  		{0.72765416, 0.27891788, 0.9094314},
   797  		{0.8626208, 0.3540126, 0.3100354},
   798  		{0.7153876, 0.17094712, 0.7801294},
   799  		{0.23180388, 0.107446484, 0.69542855},
   800  		{0.54731685, 0.8949827, 0.68316746},
   801  		{0.15049729, 0.1293767, 0.0574729},
   802  		{0.89379513, 0.67022973, 0.57360715},
   803  		{0.725353, 0.25326362, 0.44264215},
   804  		{0.2568602, 0.4986094, 0.9759933},
   805  		{0.7300015, 0.70019704, 0.49546525},
   806  		{0.54314494, 0.2004176, 0.63803226},
   807  		{0.6180191, 0.5260845, 0.9373999},
   808  		{0.63356537, 0.81430644, 0.78373694},
   809  		{0.69995105, 0.84198904, 0.17851257},
   810  		{0.5197941, 0.11502675, 0.95129955},
   811  		{0.15791401, 0.07516741, 0.113447875},
   812  		{0.06811827, 0.4450082, 0.98595786},
   813  		{0.7153448, 0.41833848, 0.06332495},
   814  		{0.6704102, 0.28931814, 0.031580303},
   815  		{0.47773632, 0.73334247, 0.6925025},
   816  		{0.7976896, 0.9499536, 0.6394833},
   817  		{0.3074854, 0.14025249, 0.35961738},
   818  		{0.49956197, 0.093575336, 0.790093},
   819  		{0.4641653, 0.21276893, 0.528895},
   820  		{0.1021849, 0.9416305, 0.46738508},
   821  		{0.3790398, 0.50099677, 0.98233247},
   822  		{0.39650732, 0.020929832, 0.53968865},
   823  		{0.77604437, 0.8554197, 0.24056046},
   824  		{0.07174444, 0.28758526, 0.67587185},
   825  		{0.22292718, 0.66624546, 0.6077909},
   826  		{0.22090498, 0.36197436, 0.40415043},
   827  		{0.04838009, 0.120789215, 0.17928012},
   828  		{0.55166364, 0.3400502, 0.43698996},
   829  		{0.7638108, 0.47014108, 0.23208627},
   830  		{0.9239513, 0.8418566, 0.23518613},
   831  		{0.289589, 0.85010827, 0.055741556},
   832  		{0.32436147, 0.18756394, 0.4217864},
   833  		{0.041671168, 0.37824047, 0.66486764},
   834  		{0.5052222, 0.07982704, 0.64345413},
   835  		{0.62675995, 0.20138603, 0.8231867},
   836  		{0.86306876, 0.9698708, 0.11398846},
   837  		{0.68566775, 0.22026269, 0.13525572},
   838  		{0.57706076, 0.32325208, 0.6122228},
   839  		{0.80035216, 0.18560356, 0.6328281},
   840  		{0.87145543, 0.19380389, 0.8863942},
   841  		{0.33777508, 0.6056442, 0.9110077},
   842  		{0.3961719, 0.49714503, 0.14191929},
   843  		{0.5344362, 0.8166916, 0.75880384},
   844  		{0.015749464, 0.63223976, 0.5470922},
   845  		{0.10512444, 0.2212036, 0.24995685},
   846  		{0.10831311, 0.27044898, 0.8668174},
   847  		{0.3272971, 0.6659298, 0.87119603},
   848  		{0.42913893, 0.14528985, 0.69957525},
   849  		{0.33012474, 0.81964344, 0.092787445},
   850  		{0.093618214, 0.90637344, 0.94406706},
   851  		{0.12161567, 0.75131124, 0.40563175},
   852  		{0.9154454, 0.75925833, 0.8406739},
   853  		{0.81649286, 0.9025715, 0.3105051},
   854  		{0.2927649, 0.22649862, 0.9708593},
   855  		{0.30813727, 0.0079439245, 0.39662006},
   856  		{0.94943213, 0.36778906, 0.217876},
   857  		{0.716794, 0.3811725, 0.18448676},
   858  		{0.66879725, 0.29722908, 0.0031202603},
   859  		{0.11104216, 0.13094379, 0.0787222},
   860  		{0.8508966, 0.86416596, 0.15885831},
   861  		{0.2303136, 0.56660503, 0.17114973},
   862  		{0.8632685, 0.4229249, 0.1936724},
   863  		{0.03060897, 0.35226125, 0.8115969},
   864  	}
   865  }
   866  
   867  func TestDelete_EntrypointIssues(t *testing.T) {
   868  	// This test is motivated by flakyness of other tests. We seemed to have
   869  	// experienced a failure with the following structure
   870  	//
   871  	// Entrypoint: 6
   872  	// Max Level: 1
   873  	// Tombstones map[]
   874  
   875  	// Nodes and Connections:
   876  	// Node 0
   877  	// Level 0: Connections: [1 2 3 4 5 6 7 8]
   878  	// Node 1
   879  	// Level 0: Connections: [0 2 3 4 5 6 7 8]
   880  	// Node 2
   881  	// Level 0: Connections: [1 0 3 4 5 6 7 8]
   882  	// Node 3
   883  	// Level 0: Connections: [2 1 0 4 5 6 7 8]
   884  	// Node 4
   885  	// Level 0: Connections: [3 2 1 0 5 6 7 8]
   886  	// Node 5
   887  	// Level 0: Connections: [3 4 2 1 0 6 7 8]
   888  	// Node 6
   889  	// Level 0: Connections: [4 2 1 3 5 0 7 8]
   890  	// Level 1: Connections: [7]
   891  	// Node 7
   892  	// Level 1: Connections: [6]
   893  	// Level 0: Connections: [6 4 3 5 2 1 0 8]
   894  	// Node 8
   895  	// Level 0: Connections: [7 6 4 3 5 2 1 0]
   896  	//
   897  	// This test aims to rebuild this tree exactly (manually) and verifies that
   898  	// deletion of the old entrypoint (element 6), works without issue
   899  	//
   900  	// The underlying test set can be found in vectors_for_test.go
   901  
   902  	index, err := New(Config{
   903  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
   904  		ID:                    "delete-entrypoint-test",
   905  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
   906  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
   907  		VectorForIDThunk:      testVectorForID,
   908  	}, ent.UserConfig{
   909  		MaxConnections: 30,
   910  		EFConstruction: 128,
   911  
   912  		// The actual size does not matter for this test, but if it defaults to
   913  		// zero it will constantly think it's full and needs to be deleted - even
   914  		// after just being deleted, so make sure to use a positive number here.
   915  		VectorCacheMaxObjects: 100000,
   916  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
   917  		cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t))
   918  	require.Nil(t, err)
   919  
   920  	// manually build the index
   921  	index.entryPointID = 6
   922  	index.currentMaximumLayer = 1
   923  	index.nodes = make([]*vertex, 50)
   924  	index.nodes[0] = &vertex{
   925  		id: 0,
   926  		connections: [][]uint64{
   927  			{1, 2, 3, 4, 5, 6, 7, 8},
   928  		},
   929  	}
   930  	index.nodes[1] = &vertex{
   931  		id: 1,
   932  		connections: [][]uint64{
   933  			{0, 2, 3, 4, 5, 6, 7, 8},
   934  		},
   935  	}
   936  	index.nodes[2] = &vertex{
   937  		id: 2,
   938  		connections: [][]uint64{
   939  			{1, 0, 3, 4, 5, 6, 7, 8},
   940  		},
   941  	}
   942  	index.nodes[3] = &vertex{
   943  		id: 3,
   944  		connections: [][]uint64{
   945  			{2, 1, 0, 4, 5, 6, 7, 8},
   946  		},
   947  	}
   948  	index.nodes[4] = &vertex{
   949  		id: 4,
   950  		connections: [][]uint64{
   951  			{3, 2, 1, 0, 5, 6, 7, 8},
   952  		},
   953  	}
   954  	index.nodes[5] = &vertex{
   955  		id: 5,
   956  		connections: [][]uint64{
   957  			{3, 4, 2, 1, 0, 6, 7, 8},
   958  		},
   959  	}
   960  	index.nodes[6] = &vertex{
   961  		id: 6,
   962  		connections: [][]uint64{
   963  			{4, 3, 1, 3, 5, 0, 7, 8},
   964  			{7},
   965  		},
   966  		level: 1,
   967  	}
   968  	index.nodes[7] = &vertex{
   969  		id: 7,
   970  		connections: [][]uint64{
   971  			{6, 4, 3, 5, 2, 1, 0, 8},
   972  			{6},
   973  		},
   974  		level: 1,
   975  	}
   976  	index.nodes[8] = &vertex{
   977  		id: 8,
   978  		connections: [][]uint64{
   979  			8: {7, 6, 4, 3, 5, 2, 1, 0},
   980  		},
   981  	}
   982  
   983  	dumpIndex(index, "before delete")
   984  
   985  	t.Run("delete some elements and permanently delete tombstoned elements",
   986  		func(t *testing.T) {
   987  			err := index.Delete(6)
   988  			require.Nil(t, err)
   989  			err = index.Delete(8)
   990  			require.Nil(t, err)
   991  
   992  			err = index.CleanUpTombstonedNodes(neverStop)
   993  			require.Nil(t, err)
   994  		})
   995  
   996  	dumpIndex(index, "after delete")
   997  
   998  	expectedResults := []uint64{
   999  		3, 5, 4, // cluster 2
  1000  		7,       // cluster 3 with element 6 and 8 deleted
  1001  		2, 1, 0, // cluster 1
  1002  	}
  1003  
  1004  	t.Run("verify that the results are correct", func(t *testing.T) {
  1005  		position := 3
  1006  		res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil)
  1007  		require.Nil(t, err)
  1008  		assert.Equal(t, expectedResults, res)
  1009  	})
  1010  
  1011  	// t.Fail()
  1012  	t.Run("destroy the index", func(t *testing.T) {
  1013  		require.Nil(t, index.Drop(context.Background()))
  1014  	})
  1015  }
  1016  
  1017  func TestDelete_MoreEntrypointIssues(t *testing.T) {
  1018  	vectors := [][]float32{
  1019  		{7, 1},
  1020  		{8, 2},
  1021  		{23, 14},
  1022  		{6.5, -1},
  1023  	}
  1024  
  1025  	vecForID := func(ctx context.Context, id uint64) ([]float32, error) {
  1026  		return vectors[int(id)], nil
  1027  	}
  1028  	// This test is motivated by flakyness of other tests. We seemed to have
  1029  	// experienced a failure with the following structure
  1030  	//
  1031  	// ID: thing_geoupdatetestclass_single_location
  1032  	// Entrypoint: 2
  1033  	// Max Level: 1
  1034  	// Tombstones map[0:{} 1:{}]
  1035  	//
  1036  	// Nodes and Connections:
  1037  	//   Node 0
  1038  	//     Level 0: Connections: [1]
  1039  	//   Node 1
  1040  	//     Level 0: Connections: [0 2]
  1041  	//     Level 1: Connections: [2]
  1042  	//   Node 2
  1043  	//     Level 1: Connections: [1]
  1044  	//     Level 0: Connections: [1]
  1045  
  1046  	index, err := New(Config{
  1047  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1048  		ID:                    "more-delete-entrypoint-flakyness-test",
  1049  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1050  		DistanceProvider:      distancer.NewGeoProvider(),
  1051  		VectorForIDThunk:      vecForID,
  1052  		TempVectorForIDThunk:  TempVectorForIDThunk(vectors),
  1053  	}, ent.UserConfig{
  1054  		MaxConnections: 30,
  1055  		EFConstruction: 128,
  1056  
  1057  		// The actual size does not matter for this test, but if it defaults to
  1058  		// zero it will constantly think it's full and needs to be deleted - even
  1059  		// after just being deleted, so make sure to use a positive number here.
  1060  		VectorCacheMaxObjects: 100000,
  1061  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1062  		cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t))
  1063  	require.Nil(t, err)
  1064  
  1065  	// manually build the index
  1066  	index.entryPointID = 2
  1067  	index.currentMaximumLayer = 1
  1068  	index.tombstones = map[uint64]struct{}{
  1069  		0: {},
  1070  		1: {},
  1071  	}
  1072  	index.nodes = make([]*vertex, 50)
  1073  	index.nodes[0] = &vertex{
  1074  		id: 0,
  1075  		connections: [][]uint64{
  1076  			0: {1},
  1077  		},
  1078  	}
  1079  	index.nodes[1] = &vertex{
  1080  		id: 1,
  1081  		connections: [][]uint64{
  1082  			0: {0, 2},
  1083  			1: {2},
  1084  		},
  1085  	}
  1086  	index.nodes[2] = &vertex{
  1087  		id: 2,
  1088  		connections: [][]uint64{
  1089  			0: {1},
  1090  			1: {1},
  1091  		},
  1092  	}
  1093  
  1094  	dumpIndex(index, "before adding another element")
  1095  	t.Run("adding a third element", func(t *testing.T) {
  1096  		vec, _ := testVectorForID(context.TODO(), 3)
  1097  		index.Add(3, vec)
  1098  	})
  1099  
  1100  	expectedResults := []uint64{
  1101  		3, 2,
  1102  	}
  1103  
  1104  	t.Run("verify that the results are correct", func(t *testing.T) {
  1105  		position := 3
  1106  		res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil)
  1107  		require.Nil(t, err)
  1108  		assert.Equal(t, expectedResults, res)
  1109  	})
  1110  
  1111  	t.Run("destroy the index", func(t *testing.T) {
  1112  		require.Nil(t, index.Drop(context.Background()))
  1113  	})
  1114  }
  1115  
  1116  func TestDelete_TombstonedEntrypoint(t *testing.T) {
  1117  	vecForID := func(ctx context.Context, id uint64) ([]float32, error) {
  1118  		// always return same vec  for all elements
  1119  		return []float32{0.1, 0.2}, nil
  1120  	}
  1121  	index, err := New(Config{
  1122  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1123  		ID:                    "tombstoned-entrypoint-test",
  1124  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1125  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
  1126  		VectorForIDThunk:      vecForID,
  1127  		TempVectorForIDThunk:  TempVectorForIDThunk([][]float32{{0.1, 0.2}}),
  1128  	}, ent.UserConfig{
  1129  		MaxConnections: 30,
  1130  		EFConstruction: 128,
  1131  		// explicitly turn off, so we only focus on the tombstoned periods
  1132  		CleanupIntervalSeconds: 0,
  1133  
  1134  		// The actual size does not matter for this test, but if it defaults to
  1135  		// zero it will constantly think it's full and needs to be deleted - even
  1136  		// after just being deleted, so make sure to use a positive number here.
  1137  		VectorCacheMaxObjects: 100000,
  1138  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1139  		cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t))
  1140  	require.Nil(t, err)
  1141  
  1142  	objVec := []float32{0.1, 0.2}
  1143  	searchVec := []float32{0.05, 0.05}
  1144  
  1145  	require.Nil(t, index.Add(0, objVec))
  1146  	require.Nil(t, index.Delete(0))
  1147  	require.Nil(t, index.Add(1, objVec))
  1148  
  1149  	res, _, err := index.SearchByVector(searchVec, 100, nil)
  1150  	require.Nil(t, err)
  1151  	assert.Equal(t, []uint64{1}, res, "should contain the only result")
  1152  
  1153  	t.Run("destroy the index", func(t *testing.T) {
  1154  		require.Nil(t, index.Drop(context.Background()))
  1155  	})
  1156  }
  1157  
  1158  func TestDelete_Flakyness_gh_1369(t *testing.T) {
  1159  	// parse a snapshot form a flaky test
  1160  	snapshotBefore := []byte(`{"labels":["ran a cleanup cycle"],"id":"delete-test","entrypoint":3,"currentMaximumLayer":3,"tombstones":{},"nodes":[{"id":1,"level":0,"connections":{"0":[11,25,33,3,29,32,5,19,30,7,17,27,21,31,36,34,35,23,15,9,13]}},{"id":3,"level":3,"connections":{"0":[1,29,11,5,25,33,19,32,7,17,30,21,35,31,27,36,23,34,9,15,13],"1":[29,36,13],"2":[29,36],"3":[36]}},{"id":5,"level":0,"connections":{"0":[29,19,7,32,35,21,1,31,3,33,23,25,11,17,36,27,30,9,15,34,13]}},{"id":7,"level":0,"connections":{"0":[32,19,21,31,5,35,23,29,33,36,17,1,9,27,25,30,11,3,15,13,34]}},{"id":9,"level":0,"connections":{"0":[36,23,31,21,15,17,27,7,32,35,30,13,19,33,5,25,29,11,1,34,3]}},{"id":11,"level":0,"connections":{"0":[25,33,1,30,17,3,27,32,34,29,19,7,5,36,15,21,31,23,9,13,35]}},{"id":13,"level":1,"connections":{"0":[15,27,34,36,30,17,9,33,25,31,23,21,11,32,7,1,19,35,5,29,3],"1":[36,29,3]}},{"id":15,"level":0,"connections":{"0":[13,27,36,17,30,9,34,33,31,23,25,21,32,11,7,1,19,35,5,29,3]}},{"id":17,"level":0,"connections":{"0":[27,30,36,33,15,32,25,31,9,11,21,7,23,1,34,13,19,5,29,35,3]}},{"id":19,"level":0,"connections":{"0":[5,7,32,29,35,21,31,23,1,33,17,3,25,36,11,27,9,30,15,34,13]}},{"id":21,"level":0,"connections":{"0":[31,23,7,35,32,19,9,36,5,17,27,33,29,30,15,1,25,11,3,13,34]}},{"id":23,"level":0,"connections":{"0":[31,21,9,35,7,36,32,19,17,5,27,33,15,29,30,25,1,13,11,3,34]}},{"id":25,"level":0,"connections":{"0":[11,33,1,30,17,27,32,3,34,29,7,19,36,5,15,21,31,23,9,13,35]}},{"id":27,"level":0,"connections":{"0":[17,30,36,15,33,25,13,9,34,32,11,31,21,7,23,1,19,5,29,35,3]}},{"id":29,"level":2,"connections":{"0":[5,19,32,7,3,1,33,35,21,25,31,11,23,17,30,36,27,9,15,34,13],"1":[3,36,13],"2":[3,36]}},{"id":30,"level":0,"connections":{"0":[27,17,33,25,15,36,11,34,32,1,13,9,31,7,21,23,19,29,5,3,35]}},{"id":31,"level":0,"connections":{"0":[21,23,7,32,35,9,36,19,17,5,27,33,29,30,15,25,1,11,13,3,34]}},{"id":32,"level":0,"connections":{"0":[7,19,21,31,5,33,29,17,23,1,35,36,25,27,30,11,9,3,15,34,13]}},{"id":33,"level":0,"connections":{"0":[25,11,1,17,30,32,27,7,19,36,29,5,21,31,3,34,15,23,9,35,13]}},{"id":34,"level":0,"connections":{"0":[30,27,15,13,25,17,11,33,36,1,32,9,31,7,21,3,23,19,29,5,35]}},{"id":35,"level":0,"connections":{"0":[21,7,31,23,19,5,32,29,9,36,17,33,1,27,25,30,3,11,15,13,34]}},{"id":36,"level":3,"connections":{"0":[17,9,27,15,31,23,21,30,32,7,33,13,25,19,35,11,34,1,5,29,3],"1":[13,29,3],"2":[29,3],"3":[3]}}]}
  1161  `)
  1162  
  1163  	vectors := vectorsForDeleteTest()
  1164  	vecForID := func(ctx context.Context, id uint64) ([]float32, error) {
  1165  		return vectors[int(id)], nil
  1166  	}
  1167  
  1168  	index, err := NewFromJSONDumpMap(snapshotBefore, vecForID)
  1169  	require.Nil(t, err)
  1170  	index.forbidFlat = true
  1171  
  1172  	var control []uint64
  1173  	t.Run("control search before delete with the respective allow list", func(t *testing.T) {
  1174  		allowList := helpers.NewAllowList()
  1175  		for i := range vectors {
  1176  			if i%2 == 0 {
  1177  				continue
  1178  			}
  1179  
  1180  			allowList.Insert(uint64(i))
  1181  		}
  1182  
  1183  		res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList)
  1184  		require.Nil(t, err)
  1185  		require.True(t, len(res) > 0)
  1186  
  1187  		control = res
  1188  	})
  1189  
  1190  	t.Run("delete the remaining even entries", func(t *testing.T) {
  1191  		require.Nil(t, index.Delete(30))
  1192  		require.Nil(t, index.Delete(32))
  1193  		require.Nil(t, index.Delete(34))
  1194  		require.Nil(t, index.Delete(36))
  1195  	})
  1196  
  1197  	t.Run("verify against control BEFORE Tombstone Cleanup", func(t *testing.T) {
  1198  		res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
  1199  		require.Nil(t, err)
  1200  		require.True(t, len(res) > 0)
  1201  		assert.Equal(t, control, res)
  1202  	})
  1203  
  1204  	t.Run("clean up tombstoned nodes", func(t *testing.T) {
  1205  		require.Nil(t, index.CleanUpTombstonedNodes(neverStop))
  1206  	})
  1207  
  1208  	t.Run("verify against control AFTER Tombstone Cleanup", func(t *testing.T) {
  1209  		res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil)
  1210  		require.Nil(t, err)
  1211  		require.True(t, len(res) > 0)
  1212  		assert.Equal(t, control, res)
  1213  	})
  1214  
  1215  	t.Run("now delete the entrypoint", func(t *testing.T) {
  1216  		require.Nil(t, index.Delete(index.entryPointID))
  1217  	})
  1218  
  1219  	t.Run("clean up tombstoned nodes", func(t *testing.T) {
  1220  		require.Nil(t, index.CleanUpTombstonedNodes(neverStop))
  1221  	})
  1222  
  1223  	t.Run("now delete the entrypoint", func(t *testing.T) {
  1224  		// this verifies that our findNewLocalEntrypoint also works when the global
  1225  		// entrypoint is affected
  1226  		require.Nil(t, index.Delete(index.entryPointID))
  1227  	})
  1228  
  1229  	t.Run("clean up tombstoned nodes", func(t *testing.T) {
  1230  		require.Nil(t, index.CleanUpTombstonedNodes(neverStop))
  1231  	})
  1232  
  1233  	t.Run("destroy the index", func(t *testing.T) {
  1234  		require.Nil(t, index.Drop(context.Background()))
  1235  	})
  1236  }
  1237  
  1238  func bruteForceCosine(vectors [][]float32, query []float32, k int) []uint64 {
  1239  	type distanceAndIndex struct {
  1240  		distance float32
  1241  		index    uint64
  1242  	}
  1243  
  1244  	distances := make([]distanceAndIndex, len(vectors))
  1245  
  1246  	d := distancer.NewCosineDistanceProvider().New(distancer.Normalize(query))
  1247  	for i, vec := range vectors {
  1248  		dist, _, _ := d.Distance(distancer.Normalize(vec))
  1249  		distances[i] = distanceAndIndex{
  1250  			index:    uint64(i),
  1251  			distance: dist,
  1252  		}
  1253  	}
  1254  
  1255  	sort.Slice(distances, func(a, b int) bool {
  1256  		return distances[a].distance < distances[b].distance
  1257  	})
  1258  
  1259  	if len(distances) < k {
  1260  		k = len(distances)
  1261  	}
  1262  
  1263  	out := make([]uint64, k)
  1264  	for i := 0; i < k; i++ {
  1265  		out[i] = distances[i].index
  1266  	}
  1267  
  1268  	return out
  1269  }
  1270  
  1271  func neverStop() bool {
  1272  	return false
  1273  }
  1274  
  1275  // This test simulates what happens when the EP is removed from the
  1276  // VectorForID-serving store
  1277  func Test_DeleteEPVecInUnderlyingObjectStore(t *testing.T) {
  1278  	var vectorIndex *hnsw
  1279  
  1280  	vectors := [][]float32{
  1281  		{1, 1},
  1282  		{2, 2},
  1283  		{3, 3},
  1284  	}
  1285  
  1286  	vectorErrors := []error{
  1287  		nil,
  1288  		nil,
  1289  		nil,
  1290  	}
  1291  	store := testinghelpers.NewDummyStore(t)
  1292  	defer store.Shutdown(context.Background())
  1293  
  1294  	t.Run("import the test vectors", func(t *testing.T) {
  1295  		index, err := New(Config{
  1296  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1297  			ID:                    "delete-ep-in-underlying-store-test",
  1298  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1299  			DistanceProvider:      distancer.NewL2SquaredProvider(),
  1300  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
  1301  				fmt.Printf("vec for pos=%d is %v\n", id, vectors[int(id)])
  1302  				return vectors[int(id)], vectorErrors[int(id)]
  1303  			},
  1304  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
  1305  		}, ent.UserConfig{
  1306  			MaxConnections: 30,
  1307  			EFConstruction: 128,
  1308  
  1309  			// The actual size does not matter for this test, but if it defaults to
  1310  			// zero it will constantly think it's full and needs to be deleted - even
  1311  			// after just being deleted, so make sure to use a positive number here.
  1312  			VectorCacheMaxObjects: 100000,
  1313  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1314  			cyclemanager.NewCallbackGroupNoop(), store)
  1315  		require.Nil(t, err)
  1316  		vectorIndex = index
  1317  
  1318  		for i, vec := range vectors {
  1319  			err := vectorIndex.Add(uint64(i), vec)
  1320  			require.Nil(t, err)
  1321  		}
  1322  
  1323  		fmt.Printf("ep is %d\n", vectorIndex.entryPointID)
  1324  	})
  1325  
  1326  	t.Run("simulate ep vec deletion in object store", func(t *testing.T) {
  1327  		vectors[0] = nil
  1328  		vectorErrors[0] = storobj.NewErrNotFoundf(0, "deleted")
  1329  		vectorIndex.cache.Delete(context.Background(), 0)
  1330  	})
  1331  
  1332  	t.Run("try to insert a fourth vector", func(t *testing.T) {
  1333  		vectors = append(vectors, []float32{4, 4})
  1334  		vectorErrors = append(vectorErrors, nil)
  1335  
  1336  		pos := len(vectors) - 1
  1337  		err := vectorIndex.Add(uint64(pos), vectors[pos])
  1338  		require.Nil(t, err)
  1339  	})
  1340  }
  1341  
  1342  func TestDelete_WithCleaningUpTombstonesOncePreservesMaxConnections(t *testing.T) {
  1343  	// there is a single bulk clean event after all the deletes
  1344  	vectors := vectorsForDeleteTest()
  1345  	var vectorIndex *hnsw
  1346  
  1347  	store := testinghelpers.NewDummyStore(t)
  1348  	defer store.Shutdown(context.Background())
  1349  
  1350  	index, err := New(Config{
  1351  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1352  		ID:                    "delete-test",
  1353  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1354  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
  1355  		VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
  1356  			return vectors[int(id)], nil
  1357  		},
  1358  		TempVectorForIDThunk: TempVectorForIDThunk(vectors),
  1359  	}, ent.UserConfig{
  1360  		MaxConnections: 30,
  1361  		EFConstruction: 128,
  1362  
  1363  		// The actual size does not matter for this test, but if it defaults to
  1364  		// zero it will constantly think it's full and needs to be deleted - even
  1365  		// after just being deleted, so make sure to use a positive number here.
  1366  		VectorCacheMaxObjects: 100000,
  1367  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1368  		cyclemanager.NewCallbackGroupNoop(), store)
  1369  	require.Nil(t, err)
  1370  	vectorIndex = index
  1371  
  1372  	for i, vec := range vectors {
  1373  		err := vectorIndex.Add(uint64(i), vec)
  1374  		require.Nil(t, err)
  1375  	}
  1376  
  1377  	require.Equal(t, 60, index.maximumConnectionsLayerZero)
  1378  	some := false
  1379  	for _, node := range index.nodes {
  1380  		if node == nil {
  1381  			continue
  1382  		}
  1383  		require.LessOrEqual(t, len(node.connections[0]), index.maximumConnectionsLayerZero)
  1384  		some = some || len(node.connections[0]) > index.maximumConnections
  1385  	}
  1386  	require.True(t, some)
  1387  
  1388  	for i := range vectors {
  1389  		if i%2 != 0 {
  1390  			continue
  1391  		}
  1392  
  1393  		err := vectorIndex.Delete(uint64(i))
  1394  		require.Nil(t, err)
  1395  	}
  1396  
  1397  	err = vectorIndex.CleanUpTombstonedNodes(neverStop)
  1398  	require.Nil(t, err)
  1399  	require.Equal(t, 60, index.maximumConnectionsLayerZero)
  1400  	some = false
  1401  	for _, node := range index.nodes {
  1402  		if node == nil {
  1403  			continue
  1404  		}
  1405  		require.LessOrEqual(t, len(node.connections[0]), index.maximumConnectionsLayerZero)
  1406  		some = some || len(node.connections[0]) > index.maximumConnections
  1407  	}
  1408  	require.True(t, some)
  1409  
  1410  	t.Run("destroy the index", func(t *testing.T) {
  1411  		require.Nil(t, vectorIndex.Drop(context.Background()))
  1412  	})
  1413  }
  1414  
  1415  func TestDelete_WithCleaningUpTombstonesOnceRemovesAllRelatedConnections(t *testing.T) {
  1416  	// there is a single bulk clean event after all the deletes
  1417  	vectors := vectorsForDeleteTest()
  1418  	var vectorIndex *hnsw
  1419  	store := testinghelpers.NewDummyStore(t)
  1420  
  1421  	index, err := New(Config{
  1422  		RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1423  		ID:                    "delete-test",
  1424  		MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1425  		DistanceProvider:      distancer.NewCosineDistanceProvider(),
  1426  		VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
  1427  			return vectors[int(id)], nil
  1428  		},
  1429  		TempVectorForIDThunk: TempVectorForIDThunk(vectors),
  1430  	}, ent.UserConfig{
  1431  		MaxConnections: 30,
  1432  		EFConstruction: 128,
  1433  
  1434  		// The actual size does not matter for this test, but if it defaults to
  1435  		// zero it will constantly think it's full and needs to be deleted - even
  1436  		// after just being deleted, so make sure to use a positive number here.
  1437  		VectorCacheMaxObjects: 100000,
  1438  	}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1439  		cyclemanager.NewCallbackGroupNoop(), store)
  1440  	require.Nil(t, err)
  1441  	vectorIndex = index
  1442  
  1443  	for i, vec := range vectors {
  1444  		err := vectorIndex.Add(uint64(i), vec)
  1445  		require.Nil(t, err)
  1446  	}
  1447  
  1448  	for i := range vectors {
  1449  		if i%2 != 0 {
  1450  			continue
  1451  		}
  1452  
  1453  		err := vectorIndex.Delete(uint64(i))
  1454  		require.Nil(t, err)
  1455  	}
  1456  
  1457  	err = vectorIndex.CleanUpTombstonedNodes(neverStop)
  1458  	require.Nil(t, err)
  1459  
  1460  	for i, node := range vectorIndex.nodes {
  1461  		if node == nil {
  1462  			continue
  1463  		}
  1464  		assert.NotEqual(t, 0, i%2)
  1465  		for level, connections := range node.connections {
  1466  			for _, id := range connections {
  1467  				assert.NotEqual(t, uint64(0), id%2)
  1468  				if id%2 == 0 {
  1469  					fmt.Println("at: ", vectorIndex.entryPointID, i, level, id)
  1470  				}
  1471  			}
  1472  		}
  1473  	}
  1474  
  1475  	require.Nil(t, vectorIndex.Drop(context.Background()))
  1476  	store.Shutdown(context.Background())
  1477  }
  1478  
  1479  func TestDelete_WithCleaningUpTombstonesWithHighConcurrency(t *testing.T) {
  1480  	os.Setenv("TOMBSTONE_DELETION_CONCURRENCY", "100")
  1481  	defer os.Unsetenv("TOMBSTONE_DELETION_CONCURRENCY")
  1482  	// there is a single bulk clean event after all the deletes
  1483  	vectors, _ := testinghelpers.RandomVecs(3_000, 1, 1536)
  1484  	var vectorIndex *hnsw
  1485  
  1486  	store := testinghelpers.NewDummyStore(t)
  1487  	defer store.Shutdown(context.Background())
  1488  
  1489  	t.Run("import the test vectors", func(t *testing.T) {
  1490  		index, err := New(Config{
  1491  			RootPath:              "doesnt-matter-as-committlogger-is-mocked-out",
  1492  			ID:                    "delete-test",
  1493  			MakeCommitLoggerThunk: MakeNoopCommitLogger,
  1494  			DistanceProvider:      distancer.NewCosineDistanceProvider(),
  1495  			VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) {
  1496  				return vectors[int(id)], nil
  1497  			},
  1498  			TempVectorForIDThunk: TempVectorForIDThunk(vectors),
  1499  		}, ent.UserConfig{
  1500  			MaxConnections: 30,
  1501  			EFConstruction: 128,
  1502  
  1503  			// The actual size does not matter for this test, but if it defaults to
  1504  			// zero it will constantly think it's full and needs to be deleted - even
  1505  			// after just being deleted, so make sure to use a positive number here.
  1506  			VectorCacheMaxObjects: 100000,
  1507  		}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(),
  1508  			cyclemanager.NewCallbackGroupNoop(), store)
  1509  		require.Nil(t, err)
  1510  		vectorIndex = index
  1511  
  1512  		for i, vec := range vectors {
  1513  			err := vectorIndex.Add(uint64(i), vec)
  1514  			require.Nil(t, err)
  1515  		}
  1516  	})
  1517  
  1518  	fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID)
  1519  	t.Run("deleting elements", func(t *testing.T) {
  1520  		for i := range vectors {
  1521  			if i < 10 {
  1522  				continue
  1523  			}
  1524  
  1525  			err := vectorIndex.Delete(uint64(i))
  1526  			require.Nil(t, err)
  1527  		}
  1528  	})
  1529  
  1530  	fmt.Printf("entrypoint after %d\n", vectorIndex.entryPointID)
  1531  
  1532  	t.Run("running the cleanup", func(t *testing.T) {
  1533  		err := vectorIndex.CleanUpTombstonedNodes(neverStop)
  1534  		require.Nil(t, err)
  1535  	})
  1536  
  1537  	t.Run("verify the graph no longer has any tombstones", func(t *testing.T) {
  1538  		assert.Len(t, vectorIndex.tombstones, 0)
  1539  	})
  1540  
  1541  	t.Run("destroy the index", func(t *testing.T) {
  1542  		require.Nil(t, vectorIndex.Drop(context.Background()))
  1543  	})
  1544  }