github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/delete_test.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "fmt" 17 "os" 18 "sort" 19 "sync" 20 "testing" 21 22 "github.com/stretchr/testify/assert" 23 "github.com/stretchr/testify/require" 24 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 25 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 26 "github.com/weaviate/weaviate/adapters/repos/db/vector/common" 27 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers" 29 "github.com/weaviate/weaviate/entities/cyclemanager" 30 "github.com/weaviate/weaviate/entities/storobj" 31 ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" 32 ) 33 34 func TempVectorForIDThunk(vectors [][]float32) func(context.Context, uint64, *common.VectorSlice) ([]float32, error) { 35 return func(ctx context.Context, id uint64, container *common.VectorSlice) ([]float32, error) { 36 copy(container.Slice, vectors[int(id)]) 37 return vectors[int(id)], nil 38 } 39 } 40 41 func TestDelete_WithoutCleaningUpTombstones(t *testing.T) { 42 vectors := vectorsForDeleteTest() 43 var vectorIndex *hnsw 44 45 store := testinghelpers.NewDummyStore(t) 46 defer store.Shutdown(context.Background()) 47 t.Run("import the test vectors", func(t *testing.T) { 48 index, err := New(Config{ 49 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 50 ID: "delete-test", 51 MakeCommitLoggerThunk: MakeNoopCommitLogger, 52 DistanceProvider: distancer.NewCosineDistanceProvider(), 53 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 54 return vectors[int(id)], nil 55 }, 56 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 57 }, ent.UserConfig{ 58 MaxConnections: 30, 59 EFConstruction: 128, 60 61 // The actual size does not matter for this test, but if it defaults to 62 // zero it will constantly think it's full and needs to be deleted - even 63 // after just being deleted, so make sure to use a positive number here. 64 VectorCacheMaxObjects: 100000, 65 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 66 cyclemanager.NewCallbackGroupNoop(), store) 67 require.Nil(t, err) 68 vectorIndex = index 69 70 for i, vec := range vectors { 71 err := vectorIndex.Add(uint64(i), vec) 72 require.Nil(t, err) 73 } 74 }) 75 76 var control []uint64 77 78 t.Run("vectors are cached correctly", func(t *testing.T) { 79 assert.Equal(t, len(vectors), int(vectorIndex.cache.CountVectors())) 80 }) 81 82 t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { 83 allowList := helpers.NewAllowList() 84 for i := range vectors { 85 if i%2 == 0 { 86 continue 87 } 88 89 allowList.Insert(uint64(i)) 90 } 91 92 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) 93 require.Nil(t, err) 94 require.True(t, len(res) > 0) 95 control = res 96 }) 97 98 t.Run("deleting every even element", func(t *testing.T) { 99 for i := range vectors { 100 if i%2 != 0 { 101 continue 102 } 103 104 err := vectorIndex.Delete(uint64(i)) 105 require.Nil(t, err) 106 } 107 }) 108 109 t.Run("vector cache holds half the original vectors", func(t *testing.T) { 110 vectorIndex.CleanUpTombstonedNodes(neverStop) 111 assert.Equal(t, len(vectors)/2, int(vectorIndex.cache.CountVectors())) 112 }) 113 114 t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { 115 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 116 require.Nil(t, err) 117 require.True(t, len(res) > 0) 118 119 for _, elem := range res { 120 if elem%2 == 0 { 121 t.Errorf("search result contained an even element: %d", elem) 122 } 123 } 124 125 assert.Equal(t, control, res) 126 }) 127 128 t.Run("destroy the index", func(t *testing.T) { 129 require.Nil(t, vectorIndex.Drop(context.Background())) 130 }) 131 132 t.Run("vector cache holds no vectors", func(t *testing.T) { 133 assert.Equal(t, 0, int(vectorIndex.cache.CountVectors())) 134 }) 135 } 136 137 func TestDelete_WithCleaningUpTombstonesOnce(t *testing.T) { 138 // there is a single bulk clean event after all the deletes 139 vectors := vectorsForDeleteTest() 140 var vectorIndex *hnsw 141 142 store := testinghelpers.NewDummyStore(t) 143 defer store.Shutdown(context.Background()) 144 145 t.Run("import the test vectors", func(t *testing.T) { 146 index, err := New(Config{ 147 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 148 ID: "delete-test", 149 MakeCommitLoggerThunk: MakeNoopCommitLogger, 150 DistanceProvider: distancer.NewCosineDistanceProvider(), 151 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 152 return vectors[int(id)], nil 153 }, 154 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 155 }, ent.UserConfig{ 156 MaxConnections: 30, 157 EFConstruction: 128, 158 159 // The actual size does not matter for this test, but if it defaults to 160 // zero it will constantly think it's full and needs to be deleted - even 161 // after just being deleted, so make sure to use a positive number here. 162 VectorCacheMaxObjects: 100000, 163 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 164 cyclemanager.NewCallbackGroupNoop(), store) 165 require.Nil(t, err) 166 vectorIndex = index 167 168 for i, vec := range vectors { 169 err := vectorIndex.Add(uint64(i), vec) 170 require.Nil(t, err) 171 } 172 }) 173 174 var control []uint64 175 var bfControl []uint64 176 177 t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { 178 allowList := helpers.NewAllowList() 179 for i := range vectors { 180 if i%2 == 0 { 181 continue 182 } 183 184 allowList.Insert(uint64(i)) 185 } 186 187 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) 188 require.Nil(t, err) 189 require.True(t, len(res) > 0) 190 require.Len(t, res, 20) 191 control = res 192 }) 193 194 t.Run("brute force control", func(t *testing.T) { 195 bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100) 196 bfControl = make([]uint64, len(bf)) 197 i := 0 198 for _, elem := range bf { 199 if elem%2 == 0 { 200 continue 201 } 202 203 bfControl[i] = elem 204 i++ 205 } 206 207 if i > 20 { 208 i = 20 209 } 210 211 bfControl = bfControl[:i] 212 assert.Equal(t, bfControl, control, "control should match bf control") 213 }) 214 215 fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID) 216 t.Run("deleting every even element", func(t *testing.T) { 217 for i := range vectors { 218 if i%2 != 0 { 219 continue 220 } 221 222 err := vectorIndex.Delete(uint64(i)) 223 require.Nil(t, err) 224 } 225 }) 226 227 t.Run("running the cleanup", func(t *testing.T) { 228 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 229 require.Nil(t, err) 230 }) 231 232 t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { 233 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 234 require.Nil(t, err) 235 require.True(t, len(res) > 0) 236 237 for _, elem := range res { 238 if elem%2 == 0 { 239 t.Errorf("search result contained an even element: %d", elem) 240 } 241 } 242 243 assert.Equal(t, control, res) 244 }) 245 246 t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { 247 assert.Len(t, vectorIndex.tombstones, 0) 248 }) 249 250 t.Run("destroy the index", func(t *testing.T) { 251 require.Nil(t, vectorIndex.Drop(context.Background())) 252 }) 253 } 254 255 func TestDelete_WithCleaningUpTombstonesInBetween(t *testing.T) { 256 // there is a single bulk clean event after all the deletes 257 vectors := vectorsForDeleteTest() 258 var vectorIndex *hnsw 259 store := testinghelpers.NewDummyStore(t) 260 defer store.Shutdown(context.Background()) 261 262 t.Run("import the test vectors", func(t *testing.T) { 263 index, err := New(Config{ 264 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 265 ID: "delete-test", 266 MakeCommitLoggerThunk: MakeNoopCommitLogger, 267 DistanceProvider: distancer.NewCosineDistanceProvider(), 268 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 269 return vectors[int(id)], nil 270 }, 271 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 272 }, ent.UserConfig{ 273 MaxConnections: 30, 274 EFConstruction: 128, 275 276 // The actual size does not matter for this test, but if it defaults to 277 // zero it will constantly think it's full and needs to be deleted - even 278 // after just being deleted, so make sure to use a positive number here. 279 VectorCacheMaxObjects: 100000, 280 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 281 cyclemanager.NewCallbackGroupNoop(), store) 282 // makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179 283 index.randFunc = func() float64 { return 0.1 } 284 285 require.Nil(t, err) 286 vectorIndex = index 287 288 for i, vec := range vectors { 289 err := vectorIndex.Add(uint64(i), vec) 290 require.Nil(t, err) 291 } 292 }) 293 294 var control []uint64 295 296 t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { 297 allowList := helpers.NewAllowList() 298 for i := range vectors { 299 if i%2 == 0 { 300 continue 301 } 302 303 allowList.Insert(uint64(i)) 304 } 305 306 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) 307 require.Nil(t, err) 308 require.True(t, len(res) > 0) 309 310 control = res 311 }) 312 313 t.Run("deleting every even element", func(t *testing.T) { 314 for i := range vectors { 315 if i%10 == 0 { 316 // occasionally run clean up 317 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 318 require.Nil(t, err) 319 } 320 321 if i%2 != 0 { 322 continue 323 } 324 325 err := vectorIndex.Delete(uint64(i)) 326 require.Nil(t, err) 327 } 328 329 // finally run one final cleanup 330 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 331 require.Nil(t, err) 332 }) 333 334 t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { 335 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 336 require.Nil(t, err) 337 require.True(t, len(res) > 0) 338 339 for _, elem := range res { 340 if elem%2 == 0 { 341 t.Errorf("search result contained an even element: %d", elem) 342 } 343 } 344 345 assert.Equal(t, control, res) 346 }) 347 348 t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { 349 assert.Len(t, vectorIndex.tombstones, 0) 350 }) 351 352 t.Run("delete the remaining elements", func(t *testing.T) { 353 for i := range vectors { 354 if i%2 == 0 { 355 continue 356 } 357 358 err := vectorIndex.Delete(uint64(i)) 359 require.Nil(t, err) 360 } 361 362 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 363 require.Nil(t, err) 364 }) 365 366 t.Run("try to insert again and search", func(t *testing.T) { 367 for i := 0; i < 5; i++ { 368 err := vectorIndex.Add(uint64(i), vectors[i]) 369 require.Nil(t, err) 370 } 371 372 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 373 require.Nil(t, err) 374 assert.ElementsMatch(t, []uint64{0, 1, 2, 3, 4}, res) 375 }) 376 377 t.Run("destroy the index", func(t *testing.T) { 378 require.Nil(t, vectorIndex.Drop(context.Background())) 379 }) 380 381 store.Shutdown(context.Background()) 382 } 383 384 func createIndexImportAllVectorsAndDeleteEven(t *testing.T, vectors [][]float32, store *lsmkv.Store) (index *hnsw, remainingResult []uint64) { 385 index, err := New(Config{ 386 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 387 ID: "delete-test", 388 MakeCommitLoggerThunk: MakeNoopCommitLogger, 389 DistanceProvider: distancer.NewCosineDistanceProvider(), 390 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 391 return vectors[int(id)], nil 392 }, 393 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 394 }, ent.UserConfig{ 395 MaxConnections: 30, 396 EFConstruction: 128, 397 398 // The actual size does not matter for this test, but if it defaults to 399 // zero it will constantly think it's full and needs to be deleted - even 400 // after just being deleted, so make sure to use a positive number here. 401 VectorCacheMaxObjects: 100000, 402 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 403 cyclemanager.NewCallbackGroupNoop(), store) 404 require.Nil(t, err) 405 406 // makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179 407 index.randFunc = func() float64 { return 0.1 } 408 409 // to speed up test execution, size of nodes array is decreased 410 // from default 25k to little over number of vectors 411 index.nodes = make([]*vertex, int(1.2*float64(len(vectors)))) 412 413 for i, vec := range vectors { 414 err := index.Add(uint64(i), vec) 415 require.Nil(t, err) 416 } 417 418 for i := range vectors { 419 if i%2 != 0 { 420 continue 421 } 422 err := index.Delete(uint64(i)) 423 require.Nil(t, err) 424 } 425 426 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) 427 require.Nil(t, err) 428 require.True(t, len(res) > 0) 429 430 for _, elem := range res { 431 if elem%2 == 0 { 432 t.Errorf("search result contained an even element: %d", elem) 433 } 434 } 435 436 return index, res 437 } 438 439 func genStopAtFunc(i int) func() bool { 440 counter := 0 441 mutex := &sync.Mutex{} 442 return func() bool { 443 mutex.Lock() 444 defer mutex.Unlock() 445 if counter < i { 446 counter++ 447 return false 448 } 449 450 return true 451 } 452 } 453 454 func TestDelete_WithCleaningUpTombstonesStopped(t *testing.T) { 455 vectors := vectorsForDeleteTest() 456 var index *hnsw 457 var possibleStopsCount int 458 // due to not yet resolved bug (https://semi-technology.atlassian.net/browse/WEAVIATE-179) 459 // db can return less vectors than are actually stored after tombstones cleanup 460 // controlRemainingResult contains all odd vectors (before cleanup was performed) 461 // controlRemainingResultAfterCleanup contains most of odd vectors (after cleanup was performed) 462 // 463 // this test verifies if partial cleanup will not change search output, therefore depending on 464 // where cleanup method was stopped, subset of controlRemainingResult is expected, though all 465 // vectors from controlRemainingResultAfterCleanup should be returned 466 // TODO to be simplified after fixing WEAVIATE-179, all results should be the same 467 var controlRemainingResult []uint64 468 var controlRemainingResultAfterCleanup []uint64 469 store := testinghelpers.NewDummyStore(t) 470 defer store.Shutdown(context.Background()) 471 472 t.Run("create control index", func(t *testing.T) { 473 index, controlRemainingResult = createIndexImportAllVectorsAndDeleteEven(t, vectors, store) 474 }) 475 476 t.Run("count all cleanup tombstones stops", func(t *testing.T) { 477 counter := 0 478 mutex := &sync.Mutex{} 479 countingStopFunc := func() bool { 480 mutex.Lock() 481 counter++ 482 mutex.Unlock() 483 return false 484 } 485 486 err := index.CleanUpTombstonedNodes(countingStopFunc) 487 require.Nil(t, err) 488 489 possibleStopsCount = counter 490 }) 491 492 t.Run("search remaining elements after cleanup", func(t *testing.T) { 493 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) 494 require.Nil(t, err) 495 require.True(t, len(res) > 0) 496 497 for _, elem := range res { 498 if elem%2 == 0 { 499 t.Errorf("search result contained an even element: %d", elem) 500 } 501 } 502 controlRemainingResultAfterCleanup = res 503 }) 504 505 t.Run("destroy the control index", func(t *testing.T) { 506 require.Nil(t, index.Drop(context.Background())) 507 }) 508 509 for i := 0; i < possibleStopsCount; i++ { 510 index, _ = createIndexImportAllVectorsAndDeleteEven(t, vectors, store) 511 512 t.Run("stop cleanup at place", func(t *testing.T) { 513 require.Nil(t, index.CleanUpTombstonedNodes(genStopAtFunc(i))) 514 }) 515 516 t.Run("search remaining elements after partial cleanup", func(t *testing.T) { 517 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) 518 require.Nil(t, err) 519 require.Subset(t, controlRemainingResult, res) 520 require.Subset(t, res, controlRemainingResultAfterCleanup) 521 }) 522 523 t.Run("run complete cleanup", func(t *testing.T) { 524 require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) 525 }) 526 527 t.Run("search remaining elements after complete cleanup", func(t *testing.T) { 528 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) 529 require.Nil(t, err) 530 require.Subset(t, controlRemainingResult, res) 531 require.Subset(t, res, controlRemainingResultAfterCleanup) 532 }) 533 534 t.Run("destroy the index", func(t *testing.T) { 535 require.Nil(t, index.Drop(context.Background())) 536 }) 537 } 538 } 539 540 func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce(t *testing.T) { 541 var ( 542 vectorIndex *hnsw 543 // there is a single bulk clean event after all the deletes 544 vectors = vectorsForDeleteTest() 545 rootPath = t.TempDir() 546 userConfig = ent.UserConfig{ 547 MaxConnections: 30, 548 EFConstruction: 128, 549 550 // The actual size does not matter for this test, but if it defaults to 551 // zero it will constantly think it's full and needs to be deleted - even 552 // after just being deleted, so make sure to use a positive number here. 553 VectorCacheMaxObjects: 100000, 554 PQ: ent.PQConfig{ 555 Enabled: true, 556 Encoder: ent.PQEncoder{ 557 Type: ent.PQEncoderTypeTile, 558 Distribution: ent.PQEncoderDistributionNormal, 559 }, 560 }, 561 } 562 ) 563 store := testinghelpers.NewDummyStore(t) 564 defer store.Shutdown(context.Background()) 565 566 t.Run("import the test vectors", func(t *testing.T) { 567 index, err := New(Config{ 568 RootPath: rootPath, 569 ID: "delete-test", 570 MakeCommitLoggerThunk: MakeNoopCommitLogger, 571 DistanceProvider: distancer.NewCosineDistanceProvider(), 572 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 573 if int(id) >= len(vectors) { 574 return nil, storobj.NewErrNotFoundf(id, "out of range") 575 } 576 return vectors[int(id)], nil 577 }, 578 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 579 }, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 580 cyclemanager.NewCallbackGroupNoop(), store) 581 require.Nil(t, err) 582 vectorIndex = index 583 584 for i, vec := range vectors { 585 err := vectorIndex.Add(uint64(i), vec) 586 require.Nil(t, err) 587 } 588 cfg := ent.PQConfig{ 589 Enabled: true, 590 Encoder: ent.PQEncoder{ 591 Type: ent.PQEncoderTypeTile, 592 Distribution: ent.PQEncoderDistributionLogNormal, 593 }, 594 BitCompression: false, 595 Segments: 3, 596 Centroids: 256, 597 } 598 userConfig.PQ = cfg 599 index.compress(userConfig) 600 }) 601 602 var control []uint64 603 var bfControl []uint64 604 605 t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { 606 allowList := helpers.NewAllowList() 607 for i := range vectors { 608 if i%2 == 0 { 609 continue 610 } 611 612 allowList.Insert(uint64(i)) 613 } 614 615 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) 616 require.Nil(t, err) 617 require.True(t, len(res) > 0) 618 require.Len(t, res, 20) 619 control = res 620 }) 621 622 t.Run("brute force control", func(t *testing.T) { 623 bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100) 624 bfControl = make([]uint64, len(bf)) 625 i := 0 626 for _, elem := range bf { 627 if elem%2 == 0 { 628 continue 629 } 630 631 bfControl[i] = elem 632 i++ 633 } 634 635 if i > 20 { 636 i = 20 637 } 638 639 bfControl = bfControl[:i] 640 recall := float32(testinghelpers.MatchesInLists(bfControl, control)) / float32(len(bfControl)) 641 fmt.Println(recall) 642 assert.True(t, recall > 0.6, "control should match bf control") 643 }) 644 645 fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID) 646 t.Run("deleting every even element", func(t *testing.T) { 647 for i := range vectors { 648 if i%2 != 0 { 649 continue 650 } 651 652 err := vectorIndex.Delete(uint64(i)) 653 require.Nil(t, err) 654 } 655 }) 656 657 t.Run("running the cleanup", func(t *testing.T) { 658 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 659 require.Nil(t, err) 660 }) 661 662 t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { 663 res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 664 require.Nil(t, err) 665 require.True(t, len(res) > 0) 666 667 for _, elem := range res { 668 if elem%2 == 0 { 669 t.Errorf("search result contained an even element: %d", elem) 670 } 671 } 672 673 recall := float32(testinghelpers.MatchesInLists(res, control)) / float32(len(control)) 674 assert.True(t, recall > 0.6) 675 }) 676 677 t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { 678 assert.Len(t, vectorIndex.tombstones, 0) 679 }) 680 681 t.Run("destroy the index", func(t *testing.T) { 682 require.Nil(t, vectorIndex.Drop(context.Background())) 683 }) 684 } 685 686 func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce_DoesNotCrash(t *testing.T) { 687 var ( 688 vectorIndex *hnsw 689 // there is a single bulk clean event after all the deletes 690 vectors = vectorsForDeleteTest() 691 rootPath = t.TempDir() 692 userConfig = ent.UserConfig{ 693 MaxConnections: 30, 694 EFConstruction: 128, 695 696 // The actual size does not matter for this test, but if it defaults to 697 // zero it will constantly think it's full and needs to be deleted - even 698 // after just being deleted, so make sure to use a positive number here. 699 VectorCacheMaxObjects: 100000, 700 PQ: ent.PQConfig{Enabled: true, Encoder: ent.PQEncoder{Type: "tile", Distribution: "normal"}}, 701 } 702 ) 703 704 store := testinghelpers.NewDummyStore(t) 705 defer store.Shutdown(context.Background()) 706 707 t.Run("import the test vectors", func(t *testing.T) { 708 index, err := New(Config{ 709 RootPath: rootPath, 710 ID: "delete-test", 711 MakeCommitLoggerThunk: MakeNoopCommitLogger, 712 DistanceProvider: distancer.NewCosineDistanceProvider(), 713 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 714 return vectors[int(id%uint64(len(vectors)))], nil 715 }, 716 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 717 }, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 718 cyclemanager.NewCallbackGroupNoop(), store) 719 require.Nil(t, err) 720 vectorIndex = index 721 722 for i, vec := range vectors { 723 err := vectorIndex.Add(uint64(i), vec) 724 require.Nil(t, err) 725 } 726 cfg := ent.PQConfig{ 727 Enabled: true, 728 Encoder: ent.PQEncoder{ 729 Type: ent.PQEncoderTypeTile, 730 Distribution: ent.PQEncoderDistributionLogNormal, 731 }, 732 BitCompression: false, 733 Segments: 3, 734 Centroids: 256, 735 } 736 userConfig.PQ = cfg 737 index.compress(userConfig) 738 for i := len(vectors); i < 1000; i++ { 739 err := vectorIndex.Add(uint64(i), vectors[i%len(vectors)]) 740 require.Nil(t, err) 741 } 742 }) 743 744 t.Run("deleting every even element", func(t *testing.T) { 745 for i := range vectors { 746 if i%2 != 0 { 747 continue 748 } 749 750 err := vectorIndex.Delete(uint64(i)) 751 require.Nil(t, err) 752 } 753 }) 754 755 t.Run("running the cleanup", func(t *testing.T) { 756 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 757 require.Nil(t, err) 758 }) 759 760 t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { 761 assert.Len(t, vectorIndex.tombstones, 0) 762 }) 763 764 t.Run("destroy the index", func(t *testing.T) { 765 require.Nil(t, vectorIndex.Drop(context.Background())) 766 }) 767 } 768 769 // we need a certain number of elements so that we can make sure that nodes 770 // from all layers will eventually be deleted, otherwise our test only tests 771 // edge cases which aren't very common in real life, but ignore the most common 772 // deletes 773 func vectorsForDeleteTest() [][]float32 { 774 return [][]float32{ 775 {0.27335858, 0.42670676, 0.12599982}, 776 {0.34369454, 0.78510034, 0.78000546}, 777 {0.2342731, 0.076864816, 0.6405078}, 778 {0.07597838, 0.7752282, 0.87022865}, 779 {0.78632426, 0.06902865, 0.7423889}, 780 {0.3055758, 0.3901508, 0.9399572}, 781 {0.48687622, 0.26338226, 0.06495104}, 782 {0.5384028, 0.35410047, 0.8821815}, 783 {0.25123185, 0.62722564, 0.86443096}, 784 {0.58484185, 0.13103616, 0.4034975}, 785 {0.0019696166, 0.46822622, 0.42492124}, 786 {0.42401955, 0.8278863, 0.5952888}, 787 {0.15367928, 0.70778894, 0.0070928824}, 788 {0.95760256, 0.45898128, 0.1541115}, 789 {0.9125976, 0.9021616, 0.21607016}, 790 {0.9876307, 0.5243228, 0.37294936}, 791 {0.8194746, 0.56142205, 0.5130103}, 792 {0.805065, 0.62250346, 0.63715476}, 793 {0.9969276, 0.5115748, 0.18916714}, 794 {0.16419733, 0.15029702, 0.36020836}, 795 {0.9660323, 0.35887036, 0.6072966}, 796 {0.72765416, 0.27891788, 0.9094314}, 797 {0.8626208, 0.3540126, 0.3100354}, 798 {0.7153876, 0.17094712, 0.7801294}, 799 {0.23180388, 0.107446484, 0.69542855}, 800 {0.54731685, 0.8949827, 0.68316746}, 801 {0.15049729, 0.1293767, 0.0574729}, 802 {0.89379513, 0.67022973, 0.57360715}, 803 {0.725353, 0.25326362, 0.44264215}, 804 {0.2568602, 0.4986094, 0.9759933}, 805 {0.7300015, 0.70019704, 0.49546525}, 806 {0.54314494, 0.2004176, 0.63803226}, 807 {0.6180191, 0.5260845, 0.9373999}, 808 {0.63356537, 0.81430644, 0.78373694}, 809 {0.69995105, 0.84198904, 0.17851257}, 810 {0.5197941, 0.11502675, 0.95129955}, 811 {0.15791401, 0.07516741, 0.113447875}, 812 {0.06811827, 0.4450082, 0.98595786}, 813 {0.7153448, 0.41833848, 0.06332495}, 814 {0.6704102, 0.28931814, 0.031580303}, 815 {0.47773632, 0.73334247, 0.6925025}, 816 {0.7976896, 0.9499536, 0.6394833}, 817 {0.3074854, 0.14025249, 0.35961738}, 818 {0.49956197, 0.093575336, 0.790093}, 819 {0.4641653, 0.21276893, 0.528895}, 820 {0.1021849, 0.9416305, 0.46738508}, 821 {0.3790398, 0.50099677, 0.98233247}, 822 {0.39650732, 0.020929832, 0.53968865}, 823 {0.77604437, 0.8554197, 0.24056046}, 824 {0.07174444, 0.28758526, 0.67587185}, 825 {0.22292718, 0.66624546, 0.6077909}, 826 {0.22090498, 0.36197436, 0.40415043}, 827 {0.04838009, 0.120789215, 0.17928012}, 828 {0.55166364, 0.3400502, 0.43698996}, 829 {0.7638108, 0.47014108, 0.23208627}, 830 {0.9239513, 0.8418566, 0.23518613}, 831 {0.289589, 0.85010827, 0.055741556}, 832 {0.32436147, 0.18756394, 0.4217864}, 833 {0.041671168, 0.37824047, 0.66486764}, 834 {0.5052222, 0.07982704, 0.64345413}, 835 {0.62675995, 0.20138603, 0.8231867}, 836 {0.86306876, 0.9698708, 0.11398846}, 837 {0.68566775, 0.22026269, 0.13525572}, 838 {0.57706076, 0.32325208, 0.6122228}, 839 {0.80035216, 0.18560356, 0.6328281}, 840 {0.87145543, 0.19380389, 0.8863942}, 841 {0.33777508, 0.6056442, 0.9110077}, 842 {0.3961719, 0.49714503, 0.14191929}, 843 {0.5344362, 0.8166916, 0.75880384}, 844 {0.015749464, 0.63223976, 0.5470922}, 845 {0.10512444, 0.2212036, 0.24995685}, 846 {0.10831311, 0.27044898, 0.8668174}, 847 {0.3272971, 0.6659298, 0.87119603}, 848 {0.42913893, 0.14528985, 0.69957525}, 849 {0.33012474, 0.81964344, 0.092787445}, 850 {0.093618214, 0.90637344, 0.94406706}, 851 {0.12161567, 0.75131124, 0.40563175}, 852 {0.9154454, 0.75925833, 0.8406739}, 853 {0.81649286, 0.9025715, 0.3105051}, 854 {0.2927649, 0.22649862, 0.9708593}, 855 {0.30813727, 0.0079439245, 0.39662006}, 856 {0.94943213, 0.36778906, 0.217876}, 857 {0.716794, 0.3811725, 0.18448676}, 858 {0.66879725, 0.29722908, 0.0031202603}, 859 {0.11104216, 0.13094379, 0.0787222}, 860 {0.8508966, 0.86416596, 0.15885831}, 861 {0.2303136, 0.56660503, 0.17114973}, 862 {0.8632685, 0.4229249, 0.1936724}, 863 {0.03060897, 0.35226125, 0.8115969}, 864 } 865 } 866 867 func TestDelete_EntrypointIssues(t *testing.T) { 868 // This test is motivated by flakyness of other tests. We seemed to have 869 // experienced a failure with the following structure 870 // 871 // Entrypoint: 6 872 // Max Level: 1 873 // Tombstones map[] 874 875 // Nodes and Connections: 876 // Node 0 877 // Level 0: Connections: [1 2 3 4 5 6 7 8] 878 // Node 1 879 // Level 0: Connections: [0 2 3 4 5 6 7 8] 880 // Node 2 881 // Level 0: Connections: [1 0 3 4 5 6 7 8] 882 // Node 3 883 // Level 0: Connections: [2 1 0 4 5 6 7 8] 884 // Node 4 885 // Level 0: Connections: [3 2 1 0 5 6 7 8] 886 // Node 5 887 // Level 0: Connections: [3 4 2 1 0 6 7 8] 888 // Node 6 889 // Level 0: Connections: [4 2 1 3 5 0 7 8] 890 // Level 1: Connections: [7] 891 // Node 7 892 // Level 1: Connections: [6] 893 // Level 0: Connections: [6 4 3 5 2 1 0 8] 894 // Node 8 895 // Level 0: Connections: [7 6 4 3 5 2 1 0] 896 // 897 // This test aims to rebuild this tree exactly (manually) and verifies that 898 // deletion of the old entrypoint (element 6), works without issue 899 // 900 // The underlying test set can be found in vectors_for_test.go 901 902 index, err := New(Config{ 903 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 904 ID: "delete-entrypoint-test", 905 MakeCommitLoggerThunk: MakeNoopCommitLogger, 906 DistanceProvider: distancer.NewCosineDistanceProvider(), 907 VectorForIDThunk: testVectorForID, 908 }, ent.UserConfig{ 909 MaxConnections: 30, 910 EFConstruction: 128, 911 912 // The actual size does not matter for this test, but if it defaults to 913 // zero it will constantly think it's full and needs to be deleted - even 914 // after just being deleted, so make sure to use a positive number here. 915 VectorCacheMaxObjects: 100000, 916 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 917 cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) 918 require.Nil(t, err) 919 920 // manually build the index 921 index.entryPointID = 6 922 index.currentMaximumLayer = 1 923 index.nodes = make([]*vertex, 50) 924 index.nodes[0] = &vertex{ 925 id: 0, 926 connections: [][]uint64{ 927 {1, 2, 3, 4, 5, 6, 7, 8}, 928 }, 929 } 930 index.nodes[1] = &vertex{ 931 id: 1, 932 connections: [][]uint64{ 933 {0, 2, 3, 4, 5, 6, 7, 8}, 934 }, 935 } 936 index.nodes[2] = &vertex{ 937 id: 2, 938 connections: [][]uint64{ 939 {1, 0, 3, 4, 5, 6, 7, 8}, 940 }, 941 } 942 index.nodes[3] = &vertex{ 943 id: 3, 944 connections: [][]uint64{ 945 {2, 1, 0, 4, 5, 6, 7, 8}, 946 }, 947 } 948 index.nodes[4] = &vertex{ 949 id: 4, 950 connections: [][]uint64{ 951 {3, 2, 1, 0, 5, 6, 7, 8}, 952 }, 953 } 954 index.nodes[5] = &vertex{ 955 id: 5, 956 connections: [][]uint64{ 957 {3, 4, 2, 1, 0, 6, 7, 8}, 958 }, 959 } 960 index.nodes[6] = &vertex{ 961 id: 6, 962 connections: [][]uint64{ 963 {4, 3, 1, 3, 5, 0, 7, 8}, 964 {7}, 965 }, 966 level: 1, 967 } 968 index.nodes[7] = &vertex{ 969 id: 7, 970 connections: [][]uint64{ 971 {6, 4, 3, 5, 2, 1, 0, 8}, 972 {6}, 973 }, 974 level: 1, 975 } 976 index.nodes[8] = &vertex{ 977 id: 8, 978 connections: [][]uint64{ 979 8: {7, 6, 4, 3, 5, 2, 1, 0}, 980 }, 981 } 982 983 dumpIndex(index, "before delete") 984 985 t.Run("delete some elements and permanently delete tombstoned elements", 986 func(t *testing.T) { 987 err := index.Delete(6) 988 require.Nil(t, err) 989 err = index.Delete(8) 990 require.Nil(t, err) 991 992 err = index.CleanUpTombstonedNodes(neverStop) 993 require.Nil(t, err) 994 }) 995 996 dumpIndex(index, "after delete") 997 998 expectedResults := []uint64{ 999 3, 5, 4, // cluster 2 1000 7, // cluster 3 with element 6 and 8 deleted 1001 2, 1, 0, // cluster 1 1002 } 1003 1004 t.Run("verify that the results are correct", func(t *testing.T) { 1005 position := 3 1006 res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil) 1007 require.Nil(t, err) 1008 assert.Equal(t, expectedResults, res) 1009 }) 1010 1011 // t.Fail() 1012 t.Run("destroy the index", func(t *testing.T) { 1013 require.Nil(t, index.Drop(context.Background())) 1014 }) 1015 } 1016 1017 func TestDelete_MoreEntrypointIssues(t *testing.T) { 1018 vectors := [][]float32{ 1019 {7, 1}, 1020 {8, 2}, 1021 {23, 14}, 1022 {6.5, -1}, 1023 } 1024 1025 vecForID := func(ctx context.Context, id uint64) ([]float32, error) { 1026 return vectors[int(id)], nil 1027 } 1028 // This test is motivated by flakyness of other tests. We seemed to have 1029 // experienced a failure with the following structure 1030 // 1031 // ID: thing_geoupdatetestclass_single_location 1032 // Entrypoint: 2 1033 // Max Level: 1 1034 // Tombstones map[0:{} 1:{}] 1035 // 1036 // Nodes and Connections: 1037 // Node 0 1038 // Level 0: Connections: [1] 1039 // Node 1 1040 // Level 0: Connections: [0 2] 1041 // Level 1: Connections: [2] 1042 // Node 2 1043 // Level 1: Connections: [1] 1044 // Level 0: Connections: [1] 1045 1046 index, err := New(Config{ 1047 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1048 ID: "more-delete-entrypoint-flakyness-test", 1049 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1050 DistanceProvider: distancer.NewGeoProvider(), 1051 VectorForIDThunk: vecForID, 1052 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 1053 }, ent.UserConfig{ 1054 MaxConnections: 30, 1055 EFConstruction: 128, 1056 1057 // The actual size does not matter for this test, but if it defaults to 1058 // zero it will constantly think it's full and needs to be deleted - even 1059 // after just being deleted, so make sure to use a positive number here. 1060 VectorCacheMaxObjects: 100000, 1061 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1062 cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) 1063 require.Nil(t, err) 1064 1065 // manually build the index 1066 index.entryPointID = 2 1067 index.currentMaximumLayer = 1 1068 index.tombstones = map[uint64]struct{}{ 1069 0: {}, 1070 1: {}, 1071 } 1072 index.nodes = make([]*vertex, 50) 1073 index.nodes[0] = &vertex{ 1074 id: 0, 1075 connections: [][]uint64{ 1076 0: {1}, 1077 }, 1078 } 1079 index.nodes[1] = &vertex{ 1080 id: 1, 1081 connections: [][]uint64{ 1082 0: {0, 2}, 1083 1: {2}, 1084 }, 1085 } 1086 index.nodes[2] = &vertex{ 1087 id: 2, 1088 connections: [][]uint64{ 1089 0: {1}, 1090 1: {1}, 1091 }, 1092 } 1093 1094 dumpIndex(index, "before adding another element") 1095 t.Run("adding a third element", func(t *testing.T) { 1096 vec, _ := testVectorForID(context.TODO(), 3) 1097 index.Add(3, vec) 1098 }) 1099 1100 expectedResults := []uint64{ 1101 3, 2, 1102 } 1103 1104 t.Run("verify that the results are correct", func(t *testing.T) { 1105 position := 3 1106 res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil) 1107 require.Nil(t, err) 1108 assert.Equal(t, expectedResults, res) 1109 }) 1110 1111 t.Run("destroy the index", func(t *testing.T) { 1112 require.Nil(t, index.Drop(context.Background())) 1113 }) 1114 } 1115 1116 func TestDelete_TombstonedEntrypoint(t *testing.T) { 1117 vecForID := func(ctx context.Context, id uint64) ([]float32, error) { 1118 // always return same vec for all elements 1119 return []float32{0.1, 0.2}, nil 1120 } 1121 index, err := New(Config{ 1122 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1123 ID: "tombstoned-entrypoint-test", 1124 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1125 DistanceProvider: distancer.NewCosineDistanceProvider(), 1126 VectorForIDThunk: vecForID, 1127 TempVectorForIDThunk: TempVectorForIDThunk([][]float32{{0.1, 0.2}}), 1128 }, ent.UserConfig{ 1129 MaxConnections: 30, 1130 EFConstruction: 128, 1131 // explicitly turn off, so we only focus on the tombstoned periods 1132 CleanupIntervalSeconds: 0, 1133 1134 // The actual size does not matter for this test, but if it defaults to 1135 // zero it will constantly think it's full and needs to be deleted - even 1136 // after just being deleted, so make sure to use a positive number here. 1137 VectorCacheMaxObjects: 100000, 1138 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1139 cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) 1140 require.Nil(t, err) 1141 1142 objVec := []float32{0.1, 0.2} 1143 searchVec := []float32{0.05, 0.05} 1144 1145 require.Nil(t, index.Add(0, objVec)) 1146 require.Nil(t, index.Delete(0)) 1147 require.Nil(t, index.Add(1, objVec)) 1148 1149 res, _, err := index.SearchByVector(searchVec, 100, nil) 1150 require.Nil(t, err) 1151 assert.Equal(t, []uint64{1}, res, "should contain the only result") 1152 1153 t.Run("destroy the index", func(t *testing.T) { 1154 require.Nil(t, index.Drop(context.Background())) 1155 }) 1156 } 1157 1158 func TestDelete_Flakyness_gh_1369(t *testing.T) { 1159 // parse a snapshot form a flaky test 1160 snapshotBefore := []byte(`{"labels":["ran a cleanup cycle"],"id":"delete-test","entrypoint":3,"currentMaximumLayer":3,"tombstones":{},"nodes":[{"id":1,"level":0,"connections":{"0":[11,25,33,3,29,32,5,19,30,7,17,27,21,31,36,34,35,23,15,9,13]}},{"id":3,"level":3,"connections":{"0":[1,29,11,5,25,33,19,32,7,17,30,21,35,31,27,36,23,34,9,15,13],"1":[29,36,13],"2":[29,36],"3":[36]}},{"id":5,"level":0,"connections":{"0":[29,19,7,32,35,21,1,31,3,33,23,25,11,17,36,27,30,9,15,34,13]}},{"id":7,"level":0,"connections":{"0":[32,19,21,31,5,35,23,29,33,36,17,1,9,27,25,30,11,3,15,13,34]}},{"id":9,"level":0,"connections":{"0":[36,23,31,21,15,17,27,7,32,35,30,13,19,33,5,25,29,11,1,34,3]}},{"id":11,"level":0,"connections":{"0":[25,33,1,30,17,3,27,32,34,29,19,7,5,36,15,21,31,23,9,13,35]}},{"id":13,"level":1,"connections":{"0":[15,27,34,36,30,17,9,33,25,31,23,21,11,32,7,1,19,35,5,29,3],"1":[36,29,3]}},{"id":15,"level":0,"connections":{"0":[13,27,36,17,30,9,34,33,31,23,25,21,32,11,7,1,19,35,5,29,3]}},{"id":17,"level":0,"connections":{"0":[27,30,36,33,15,32,25,31,9,11,21,7,23,1,34,13,19,5,29,35,3]}},{"id":19,"level":0,"connections":{"0":[5,7,32,29,35,21,31,23,1,33,17,3,25,36,11,27,9,30,15,34,13]}},{"id":21,"level":0,"connections":{"0":[31,23,7,35,32,19,9,36,5,17,27,33,29,30,15,1,25,11,3,13,34]}},{"id":23,"level":0,"connections":{"0":[31,21,9,35,7,36,32,19,17,5,27,33,15,29,30,25,1,13,11,3,34]}},{"id":25,"level":0,"connections":{"0":[11,33,1,30,17,27,32,3,34,29,7,19,36,5,15,21,31,23,9,13,35]}},{"id":27,"level":0,"connections":{"0":[17,30,36,15,33,25,13,9,34,32,11,31,21,7,23,1,19,5,29,35,3]}},{"id":29,"level":2,"connections":{"0":[5,19,32,7,3,1,33,35,21,25,31,11,23,17,30,36,27,9,15,34,13],"1":[3,36,13],"2":[3,36]}},{"id":30,"level":0,"connections":{"0":[27,17,33,25,15,36,11,34,32,1,13,9,31,7,21,23,19,29,5,3,35]}},{"id":31,"level":0,"connections":{"0":[21,23,7,32,35,9,36,19,17,5,27,33,29,30,15,25,1,11,13,3,34]}},{"id":32,"level":0,"connections":{"0":[7,19,21,31,5,33,29,17,23,1,35,36,25,27,30,11,9,3,15,34,13]}},{"id":33,"level":0,"connections":{"0":[25,11,1,17,30,32,27,7,19,36,29,5,21,31,3,34,15,23,9,35,13]}},{"id":34,"level":0,"connections":{"0":[30,27,15,13,25,17,11,33,36,1,32,9,31,7,21,3,23,19,29,5,35]}},{"id":35,"level":0,"connections":{"0":[21,7,31,23,19,5,32,29,9,36,17,33,1,27,25,30,3,11,15,13,34]}},{"id":36,"level":3,"connections":{"0":[17,9,27,15,31,23,21,30,32,7,33,13,25,19,35,11,34,1,5,29,3],"1":[13,29,3],"2":[29,3],"3":[3]}}]} 1161 `) 1162 1163 vectors := vectorsForDeleteTest() 1164 vecForID := func(ctx context.Context, id uint64) ([]float32, error) { 1165 return vectors[int(id)], nil 1166 } 1167 1168 index, err := NewFromJSONDumpMap(snapshotBefore, vecForID) 1169 require.Nil(t, err) 1170 index.forbidFlat = true 1171 1172 var control []uint64 1173 t.Run("control search before delete with the respective allow list", func(t *testing.T) { 1174 allowList := helpers.NewAllowList() 1175 for i := range vectors { 1176 if i%2 == 0 { 1177 continue 1178 } 1179 1180 allowList.Insert(uint64(i)) 1181 } 1182 1183 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) 1184 require.Nil(t, err) 1185 require.True(t, len(res) > 0) 1186 1187 control = res 1188 }) 1189 1190 t.Run("delete the remaining even entries", func(t *testing.T) { 1191 require.Nil(t, index.Delete(30)) 1192 require.Nil(t, index.Delete(32)) 1193 require.Nil(t, index.Delete(34)) 1194 require.Nil(t, index.Delete(36)) 1195 }) 1196 1197 t.Run("verify against control BEFORE Tombstone Cleanup", func(t *testing.T) { 1198 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 1199 require.Nil(t, err) 1200 require.True(t, len(res) > 0) 1201 assert.Equal(t, control, res) 1202 }) 1203 1204 t.Run("clean up tombstoned nodes", func(t *testing.T) { 1205 require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) 1206 }) 1207 1208 t.Run("verify against control AFTER Tombstone Cleanup", func(t *testing.T) { 1209 res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) 1210 require.Nil(t, err) 1211 require.True(t, len(res) > 0) 1212 assert.Equal(t, control, res) 1213 }) 1214 1215 t.Run("now delete the entrypoint", func(t *testing.T) { 1216 require.Nil(t, index.Delete(index.entryPointID)) 1217 }) 1218 1219 t.Run("clean up tombstoned nodes", func(t *testing.T) { 1220 require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) 1221 }) 1222 1223 t.Run("now delete the entrypoint", func(t *testing.T) { 1224 // this verifies that our findNewLocalEntrypoint also works when the global 1225 // entrypoint is affected 1226 require.Nil(t, index.Delete(index.entryPointID)) 1227 }) 1228 1229 t.Run("clean up tombstoned nodes", func(t *testing.T) { 1230 require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) 1231 }) 1232 1233 t.Run("destroy the index", func(t *testing.T) { 1234 require.Nil(t, index.Drop(context.Background())) 1235 }) 1236 } 1237 1238 func bruteForceCosine(vectors [][]float32, query []float32, k int) []uint64 { 1239 type distanceAndIndex struct { 1240 distance float32 1241 index uint64 1242 } 1243 1244 distances := make([]distanceAndIndex, len(vectors)) 1245 1246 d := distancer.NewCosineDistanceProvider().New(distancer.Normalize(query)) 1247 for i, vec := range vectors { 1248 dist, _, _ := d.Distance(distancer.Normalize(vec)) 1249 distances[i] = distanceAndIndex{ 1250 index: uint64(i), 1251 distance: dist, 1252 } 1253 } 1254 1255 sort.Slice(distances, func(a, b int) bool { 1256 return distances[a].distance < distances[b].distance 1257 }) 1258 1259 if len(distances) < k { 1260 k = len(distances) 1261 } 1262 1263 out := make([]uint64, k) 1264 for i := 0; i < k; i++ { 1265 out[i] = distances[i].index 1266 } 1267 1268 return out 1269 } 1270 1271 func neverStop() bool { 1272 return false 1273 } 1274 1275 // This test simulates what happens when the EP is removed from the 1276 // VectorForID-serving store 1277 func Test_DeleteEPVecInUnderlyingObjectStore(t *testing.T) { 1278 var vectorIndex *hnsw 1279 1280 vectors := [][]float32{ 1281 {1, 1}, 1282 {2, 2}, 1283 {3, 3}, 1284 } 1285 1286 vectorErrors := []error{ 1287 nil, 1288 nil, 1289 nil, 1290 } 1291 store := testinghelpers.NewDummyStore(t) 1292 defer store.Shutdown(context.Background()) 1293 1294 t.Run("import the test vectors", func(t *testing.T) { 1295 index, err := New(Config{ 1296 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1297 ID: "delete-ep-in-underlying-store-test", 1298 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1299 DistanceProvider: distancer.NewL2SquaredProvider(), 1300 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 1301 fmt.Printf("vec for pos=%d is %v\n", id, vectors[int(id)]) 1302 return vectors[int(id)], vectorErrors[int(id)] 1303 }, 1304 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 1305 }, ent.UserConfig{ 1306 MaxConnections: 30, 1307 EFConstruction: 128, 1308 1309 // The actual size does not matter for this test, but if it defaults to 1310 // zero it will constantly think it's full and needs to be deleted - even 1311 // after just being deleted, so make sure to use a positive number here. 1312 VectorCacheMaxObjects: 100000, 1313 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1314 cyclemanager.NewCallbackGroupNoop(), store) 1315 require.Nil(t, err) 1316 vectorIndex = index 1317 1318 for i, vec := range vectors { 1319 err := vectorIndex.Add(uint64(i), vec) 1320 require.Nil(t, err) 1321 } 1322 1323 fmt.Printf("ep is %d\n", vectorIndex.entryPointID) 1324 }) 1325 1326 t.Run("simulate ep vec deletion in object store", func(t *testing.T) { 1327 vectors[0] = nil 1328 vectorErrors[0] = storobj.NewErrNotFoundf(0, "deleted") 1329 vectorIndex.cache.Delete(context.Background(), 0) 1330 }) 1331 1332 t.Run("try to insert a fourth vector", func(t *testing.T) { 1333 vectors = append(vectors, []float32{4, 4}) 1334 vectorErrors = append(vectorErrors, nil) 1335 1336 pos := len(vectors) - 1 1337 err := vectorIndex.Add(uint64(pos), vectors[pos]) 1338 require.Nil(t, err) 1339 }) 1340 } 1341 1342 func TestDelete_WithCleaningUpTombstonesOncePreservesMaxConnections(t *testing.T) { 1343 // there is a single bulk clean event after all the deletes 1344 vectors := vectorsForDeleteTest() 1345 var vectorIndex *hnsw 1346 1347 store := testinghelpers.NewDummyStore(t) 1348 defer store.Shutdown(context.Background()) 1349 1350 index, err := New(Config{ 1351 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1352 ID: "delete-test", 1353 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1354 DistanceProvider: distancer.NewCosineDistanceProvider(), 1355 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 1356 return vectors[int(id)], nil 1357 }, 1358 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 1359 }, ent.UserConfig{ 1360 MaxConnections: 30, 1361 EFConstruction: 128, 1362 1363 // The actual size does not matter for this test, but if it defaults to 1364 // zero it will constantly think it's full and needs to be deleted - even 1365 // after just being deleted, so make sure to use a positive number here. 1366 VectorCacheMaxObjects: 100000, 1367 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1368 cyclemanager.NewCallbackGroupNoop(), store) 1369 require.Nil(t, err) 1370 vectorIndex = index 1371 1372 for i, vec := range vectors { 1373 err := vectorIndex.Add(uint64(i), vec) 1374 require.Nil(t, err) 1375 } 1376 1377 require.Equal(t, 60, index.maximumConnectionsLayerZero) 1378 some := false 1379 for _, node := range index.nodes { 1380 if node == nil { 1381 continue 1382 } 1383 require.LessOrEqual(t, len(node.connections[0]), index.maximumConnectionsLayerZero) 1384 some = some || len(node.connections[0]) > index.maximumConnections 1385 } 1386 require.True(t, some) 1387 1388 for i := range vectors { 1389 if i%2 != 0 { 1390 continue 1391 } 1392 1393 err := vectorIndex.Delete(uint64(i)) 1394 require.Nil(t, err) 1395 } 1396 1397 err = vectorIndex.CleanUpTombstonedNodes(neverStop) 1398 require.Nil(t, err) 1399 require.Equal(t, 60, index.maximumConnectionsLayerZero) 1400 some = false 1401 for _, node := range index.nodes { 1402 if node == nil { 1403 continue 1404 } 1405 require.LessOrEqual(t, len(node.connections[0]), index.maximumConnectionsLayerZero) 1406 some = some || len(node.connections[0]) > index.maximumConnections 1407 } 1408 require.True(t, some) 1409 1410 t.Run("destroy the index", func(t *testing.T) { 1411 require.Nil(t, vectorIndex.Drop(context.Background())) 1412 }) 1413 } 1414 1415 func TestDelete_WithCleaningUpTombstonesOnceRemovesAllRelatedConnections(t *testing.T) { 1416 // there is a single bulk clean event after all the deletes 1417 vectors := vectorsForDeleteTest() 1418 var vectorIndex *hnsw 1419 store := testinghelpers.NewDummyStore(t) 1420 1421 index, err := New(Config{ 1422 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1423 ID: "delete-test", 1424 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1425 DistanceProvider: distancer.NewCosineDistanceProvider(), 1426 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 1427 return vectors[int(id)], nil 1428 }, 1429 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 1430 }, ent.UserConfig{ 1431 MaxConnections: 30, 1432 EFConstruction: 128, 1433 1434 // The actual size does not matter for this test, but if it defaults to 1435 // zero it will constantly think it's full and needs to be deleted - even 1436 // after just being deleted, so make sure to use a positive number here. 1437 VectorCacheMaxObjects: 100000, 1438 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1439 cyclemanager.NewCallbackGroupNoop(), store) 1440 require.Nil(t, err) 1441 vectorIndex = index 1442 1443 for i, vec := range vectors { 1444 err := vectorIndex.Add(uint64(i), vec) 1445 require.Nil(t, err) 1446 } 1447 1448 for i := range vectors { 1449 if i%2 != 0 { 1450 continue 1451 } 1452 1453 err := vectorIndex.Delete(uint64(i)) 1454 require.Nil(t, err) 1455 } 1456 1457 err = vectorIndex.CleanUpTombstonedNodes(neverStop) 1458 require.Nil(t, err) 1459 1460 for i, node := range vectorIndex.nodes { 1461 if node == nil { 1462 continue 1463 } 1464 assert.NotEqual(t, 0, i%2) 1465 for level, connections := range node.connections { 1466 for _, id := range connections { 1467 assert.NotEqual(t, uint64(0), id%2) 1468 if id%2 == 0 { 1469 fmt.Println("at: ", vectorIndex.entryPointID, i, level, id) 1470 } 1471 } 1472 } 1473 } 1474 1475 require.Nil(t, vectorIndex.Drop(context.Background())) 1476 store.Shutdown(context.Background()) 1477 } 1478 1479 func TestDelete_WithCleaningUpTombstonesWithHighConcurrency(t *testing.T) { 1480 os.Setenv("TOMBSTONE_DELETION_CONCURRENCY", "100") 1481 defer os.Unsetenv("TOMBSTONE_DELETION_CONCURRENCY") 1482 // there is a single bulk clean event after all the deletes 1483 vectors, _ := testinghelpers.RandomVecs(3_000, 1, 1536) 1484 var vectorIndex *hnsw 1485 1486 store := testinghelpers.NewDummyStore(t) 1487 defer store.Shutdown(context.Background()) 1488 1489 t.Run("import the test vectors", func(t *testing.T) { 1490 index, err := New(Config{ 1491 RootPath: "doesnt-matter-as-committlogger-is-mocked-out", 1492 ID: "delete-test", 1493 MakeCommitLoggerThunk: MakeNoopCommitLogger, 1494 DistanceProvider: distancer.NewCosineDistanceProvider(), 1495 VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { 1496 return vectors[int(id)], nil 1497 }, 1498 TempVectorForIDThunk: TempVectorForIDThunk(vectors), 1499 }, ent.UserConfig{ 1500 MaxConnections: 30, 1501 EFConstruction: 128, 1502 1503 // The actual size does not matter for this test, but if it defaults to 1504 // zero it will constantly think it's full and needs to be deleted - even 1505 // after just being deleted, so make sure to use a positive number here. 1506 VectorCacheMaxObjects: 100000, 1507 }, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), 1508 cyclemanager.NewCallbackGroupNoop(), store) 1509 require.Nil(t, err) 1510 vectorIndex = index 1511 1512 for i, vec := range vectors { 1513 err := vectorIndex.Add(uint64(i), vec) 1514 require.Nil(t, err) 1515 } 1516 }) 1517 1518 fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID) 1519 t.Run("deleting elements", func(t *testing.T) { 1520 for i := range vectors { 1521 if i < 10 { 1522 continue 1523 } 1524 1525 err := vectorIndex.Delete(uint64(i)) 1526 require.Nil(t, err) 1527 } 1528 }) 1529 1530 fmt.Printf("entrypoint after %d\n", vectorIndex.entryPointID) 1531 1532 t.Run("running the cleanup", func(t *testing.T) { 1533 err := vectorIndex.CleanUpTombstonedNodes(neverStop) 1534 require.Nil(t, err) 1535 }) 1536 1537 t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { 1538 assert.Len(t, vectorIndex.tombstones, 0) 1539 }) 1540 1541 t.Run("destroy the index", func(t *testing.T) { 1542 require.Nil(t, vectorIndex.Drop(context.Background())) 1543 }) 1544 }