github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/delete.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "fmt" 17 "os" 18 "runtime" 19 "runtime/debug" 20 "strconv" 21 "sync" 22 "time" 23 24 enterrors "github.com/weaviate/weaviate/entities/errors" 25 26 "github.com/pkg/errors" 27 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 28 "github.com/weaviate/weaviate/adapters/repos/db/vector/cache" 29 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 30 "github.com/weaviate/weaviate/entities/cyclemanager" 31 "github.com/weaviate/weaviate/entities/storobj" 32 ) 33 34 type breakCleanUpTombstonedNodesFunc func() bool 35 36 // Delete attaches a tombstone to an item so it can be periodically cleaned up 37 // later and the edges reassigned 38 func (h *hnsw) Delete(ids ...uint64) error { 39 h.compressActionLock.RLock() 40 defer h.compressActionLock.RUnlock() 41 42 h.deleteVsInsertLock.Lock() 43 defer h.deleteVsInsertLock.Unlock() 44 45 h.deleteLock.Lock() 46 defer h.deleteLock.Unlock() 47 48 before := time.Now() 49 defer h.metrics.TrackDelete(before, "total") 50 51 if err := h.addTombstone(ids...); err != nil { 52 return err 53 } 54 55 for _, id := range ids { 56 h.metrics.DeleteVector() 57 58 // Adding a tombstone might not be enough in some cases, if the tombstoned 59 // entry was the entrypoint this might lead to issues for following inserts: 60 // On a nearly empty graph the entrypoint might be the only viable element to 61 // connect to, however, because the entrypoint itself is tombstones 62 // connections to it are impossible. So, unless we find a new entrypoint, 63 // subsequent inserts might end up isolated (without edges) in the graph. 64 // This is especially true if the tombstoned entrypoint is the only node in 65 // the graph. In this case we must reset the graph, so it acts like an empty 66 // one. Otherwise we'd insert the next id and have only one possible node to 67 // connect it to (the entrypoint). With that one being tombstoned, the new 68 // node would be guaranteed to have zero edges 69 70 node := h.nodeByID(id) 71 if node == nil { 72 // node was already deleted/cleaned up 73 continue 74 } 75 76 if h.getEntrypoint() == id { 77 beforeDeleteEP := time.Now() 78 defer h.metrics.TrackDelete(beforeDeleteEP, "delete_entrypoint") 79 80 denyList := h.tombstonesAsDenyList() 81 if onlyNode, err := h.resetIfOnlyNode(node, denyList); err != nil { 82 return errors.Wrap(err, "reset index") 83 } else if !onlyNode { 84 if err := h.deleteEntrypoint(node, denyList); err != nil { 85 return errors.Wrap(err, "delete entrypoint") 86 } 87 } 88 } 89 } 90 91 return nil 92 } 93 94 func (h *hnsw) resetIfEmpty() (empty bool, err error) { 95 h.resetLock.Lock() 96 defer h.resetLock.Unlock() 97 h.Lock() 98 defer h.Unlock() 99 100 empty = func() bool { 101 h.shardedNodeLocks.RLock(h.entryPointID) 102 defer h.shardedNodeLocks.RUnlock(h.entryPointID) 103 104 return h.isEmptyUnlocked() 105 }() 106 // It can happen that between calls of isEmptyUnlocked and resetUnlocked 107 // values of h.nodes will change (due to locks being RUnlocked and Locked again) 108 // This is acceptable in order to avoid long Locking of all striped locks 109 if empty { 110 h.shardedNodeLocks.LockAll() 111 defer h.shardedNodeLocks.UnlockAll() 112 113 return true, h.resetUnlocked() 114 } 115 return false, nil 116 } 117 118 func (h *hnsw) resetIfOnlyNode(needle *vertex, denyList helpers.AllowList) (onlyNode bool, err error) { 119 h.resetLock.Lock() 120 defer h.resetLock.Unlock() 121 h.Lock() 122 defer h.Unlock() 123 124 onlyNode = func() bool { 125 h.shardedNodeLocks.RLockAll() 126 defer h.shardedNodeLocks.RUnlockAll() 127 128 return h.isOnlyNodeUnlocked(needle, denyList) 129 }() 130 // It can happen that between calls of isOnlyNodeUnlocked and resetUnlocked 131 // values of h.nodes will change (due to locks being RUnlocked and Locked again) 132 // This is acceptable in order to avoid long Locking of all striped locks 133 if onlyNode { 134 h.shardedNodeLocks.LockAll() 135 defer h.shardedNodeLocks.UnlockAll() 136 137 return true, h.resetUnlocked() 138 } 139 return false, nil 140 } 141 142 func (h *hnsw) resetUnlocked() error { 143 h.resetCtxCancel() 144 resetCtx, resetCtxCancel := context.WithCancel(context.Background()) 145 h.resetCtx = resetCtx 146 h.resetCtxCancel = resetCtxCancel 147 148 h.entryPointID = 0 149 h.currentMaximumLayer = 0 150 h.initialInsertOnce = &sync.Once{} 151 h.nodes = make([]*vertex, cache.InitialSize) 152 153 return h.commitLog.Reset() 154 } 155 156 func (h *hnsw) tombstonesAsDenyList() helpers.AllowList { 157 deleteList := helpers.NewAllowList() 158 h.tombstoneLock.Lock() 159 defer h.tombstoneLock.Unlock() 160 161 tombstones := h.tombstones 162 for id := range tombstones { 163 deleteList.Insert(id) 164 } 165 166 return deleteList 167 } 168 169 func (h *hnsw) getEntrypoint() uint64 { 170 h.RLock() 171 defer h.RUnlock() 172 173 return h.entryPointID 174 } 175 176 func (h *hnsw) copyTombstonesToAllowList(breakCleanUpTombstonedNodes breakCleanUpTombstonedNodesFunc) (ok bool, deleteList helpers.AllowList) { 177 h.resetLock.Lock() 178 defer h.resetLock.Unlock() 179 180 if breakCleanUpTombstonedNodes() { 181 return false, nil 182 } 183 184 h.RLock() 185 lenOfNodes := uint64(len(h.nodes)) 186 h.RUnlock() 187 188 h.tombstoneLock.Lock() 189 defer h.tombstoneLock.Unlock() 190 191 deleteList = helpers.NewAllowList() 192 for id := range h.tombstones { 193 if lenOfNodes <= id { 194 // we're trying to delete an id outside the possible range, nothing to do 195 continue 196 } 197 198 deleteList.Insert(id) 199 } 200 201 if deleteList.IsEmpty() { 202 return false, nil 203 } 204 205 return true, deleteList 206 } 207 208 // CleanUpTombstonedNodes removes nodes with a tombstone and reassigns 209 // edges that were previously pointing to the tombstoned nodes 210 func (h *hnsw) CleanUpTombstonedNodes(shouldAbort cyclemanager.ShouldAbortCallback) error { 211 _, err := h.cleanUpTombstonedNodes(shouldAbort) 212 return err 213 } 214 215 func (h *hnsw) cleanUpTombstonedNodes(shouldAbort cyclemanager.ShouldAbortCallback) (bool, error) { 216 h.compressActionLock.RLock() 217 defer h.compressActionLock.RUnlock() 218 defer func() { 219 err := recover() 220 if err != nil { 221 h.logger.WithField("panic", err).Errorf("class %s: tombstone cleanup panicked", h.className) 222 debug.PrintStack() 223 } 224 }() 225 226 h.metrics.StartCleanup(1) 227 defer h.metrics.EndCleanup(1) 228 229 h.resetLock.Lock() 230 resetCtx := h.resetCtx 231 h.resetLock.Unlock() 232 233 breakCleanUpTombstonedNodes := func() bool { 234 return resetCtx.Err() != nil || shouldAbort() 235 } 236 237 executed := false 238 ok, deleteList := h.copyTombstonesToAllowList(breakCleanUpTombstonedNodes) 239 if !ok { 240 return executed, nil 241 } 242 243 executed = true 244 if ok, err := h.reassignNeighborsOf(deleteList, breakCleanUpTombstonedNodes); err != nil { 245 return executed, err 246 } else if !ok { 247 return executed, nil 248 } 249 h.reassignNeighbor(h.getEntrypoint(), deleteList, breakCleanUpTombstonedNodes) 250 251 if ok, err := h.replaceDeletedEntrypoint(deleteList, breakCleanUpTombstonedNodes); err != nil { 252 return executed, err 253 } else if !ok { 254 return executed, nil 255 } 256 257 if ok, err := h.removeTombstonesAndNodes(deleteList, breakCleanUpTombstonedNodes); err != nil { 258 return executed, err 259 } else if !ok { 260 return executed, nil 261 } 262 263 if _, err := h.resetIfEmpty(); err != nil { 264 return executed, err 265 } 266 267 return executed, nil 268 } 269 270 func (h *hnsw) replaceDeletedEntrypoint(deleteList helpers.AllowList, breakCleanUpTombstonedNodes breakCleanUpTombstonedNodesFunc) (ok bool, err error) { 271 h.resetLock.Lock() 272 defer h.resetLock.Unlock() 273 274 if breakCleanUpTombstonedNodes() { 275 return false, nil 276 } 277 278 it := deleteList.Iterator() 279 for id, ok := it.Next(); ok; id, ok = it.Next() { 280 if h.getEntrypoint() == id { 281 // this a special case because: 282 // 283 // 1. we need to find a new entrypoint, if this is the last point on this 284 // level, we need to find an entrypoint on a lower level 285 // 2. there is a risk that this is the only node in the entire graph. In 286 // this case we must reset the graph 287 h.shardedNodeLocks.RLock(id) 288 node := h.nodes[id] 289 h.shardedNodeLocks.RUnlock(id) 290 291 if err := h.deleteEntrypoint(node, deleteList); err != nil { 292 return false, errors.Wrap(err, "delete entrypoint") 293 } 294 } 295 } 296 297 return true, nil 298 } 299 300 func tombstoneDeletionConcurrency() int { 301 if v := os.Getenv("TOMBSTONE_DELETION_CONCURRENCY"); v != "" { 302 asInt, err := strconv.Atoi(v) 303 if err == nil && asInt > 0 { 304 return asInt 305 } 306 } 307 return runtime.GOMAXPROCS(0) / 2 308 } 309 310 func (h *hnsw) reassignNeighborsOf(deleteList helpers.AllowList, breakCleanUpTombstonedNodes breakCleanUpTombstonedNodesFunc) (ok bool, err error) { 311 h.RLock() 312 size := len(h.nodes) 313 h.RUnlock() 314 h.resetLock.Lock() 315 defer h.resetLock.Unlock() 316 317 g, ctx := enterrors.NewErrorGroupWithContextWrapper(h.logger, h.shutdownCtx) 318 ch := make(chan uint64) 319 320 for i := 0; i < tombstoneDeletionConcurrency(); i++ { 321 g.Go(func() error { 322 for { 323 select { 324 case <-ctx.Done(): 325 return nil 326 case deletedID, ok := <-ch: 327 if !ok { 328 return nil 329 } 330 h.shardedNodeLocks.RLock(deletedID) 331 if uint64(size) < deletedID || h.nodes[deletedID] == nil { 332 h.shardedNodeLocks.RUnlock(deletedID) 333 continue 334 } 335 h.shardedNodeLocks.RUnlock(deletedID) 336 if h.getEntrypoint() != deletedID { 337 h.reassignNeighbor(deletedID, deleteList, breakCleanUpTombstonedNodes) 338 } 339 } 340 } 341 }) 342 } 343 344 LOOP: 345 for i := 0; i < size; i++ { 346 select { 347 case ch <- uint64(i): 348 case <-ctx.Done(): 349 break LOOP 350 } 351 } 352 353 close(ch) 354 355 err = g.Wait() 356 if errors.Is(err, context.Canceled) { 357 h.logger.Errorf("class %s: tombstone cleanup canceled", h.className) 358 return false, nil 359 } 360 361 return true, err 362 } 363 364 func (h *hnsw) reassignNeighbor( 365 neighbor uint64, 366 deleteList helpers.AllowList, 367 breakCleanUpTombstonedNodes breakCleanUpTombstonedNodesFunc, 368 ) (ok bool, err error) { 369 if breakCleanUpTombstonedNodes() { 370 return false, nil 371 } 372 373 h.RLock() 374 h.shardedNodeLocks.RLock(neighbor) 375 neighborNode := h.nodes[neighbor] 376 h.shardedNodeLocks.RUnlock(neighbor) 377 currentEntrypoint := h.entryPointID 378 currentMaximumLayer := h.currentMaximumLayer 379 h.RUnlock() 380 381 if neighborNode == nil || deleteList.Contains(neighborNode.id) { 382 return true, nil 383 } 384 385 var neighborVec []float32 386 var compressorDistancer compressionhelpers.CompressorDistancer 387 if h.compressed.Load() { 388 compressorDistancer, err = h.compressor.NewDistancerFromID(neighbor) 389 } else { 390 neighborVec, err = h.cache.Get(context.Background(), neighbor) 391 } 392 393 if err != nil { 394 var e storobj.ErrNotFound 395 if errors.As(err, &e) { 396 h.handleDeletedNode(e.DocID) 397 return true, nil 398 } else { 399 // not a typed error, we can recover from, return with err 400 return false, errors.Wrap(err, "get neighbor vec") 401 } 402 } 403 neighborNode.Lock() 404 neighborLevel := neighborNode.level 405 if !connectionsPointTo(neighborNode.connections, deleteList) { 406 // nothing needs to be changed, skip 407 neighborNode.Unlock() 408 return true, nil 409 } 410 neighborNode.Unlock() 411 412 entryPointID, err := h.findBestEntrypointForNode(currentMaximumLayer, 413 neighborLevel, currentEntrypoint, neighborVec, compressorDistancer) 414 if err != nil { 415 return false, errors.Wrap(err, "find best entrypoint") 416 } 417 418 if entryPointID == neighbor { 419 // if we use ourselves as entrypoint and delete all connections in the 420 // next step, we won't find any neighbors, so we need to use an 421 // alternative entryPoint in this round 422 423 if h.isOnlyNode(&vertex{id: neighbor}, deleteList) { 424 neighborNode.Lock() 425 // delete all existing connections before re-assigning 426 neighborLevel = neighborNode.level 427 neighborNode.connections = make([][]uint64, neighborLevel+1) 428 neighborNode.Unlock() 429 430 if err := h.commitLog.ClearLinks(neighbor); err != nil { 431 return false, err 432 } 433 return true, nil 434 } 435 436 tmpDenyList := deleteList.DeepCopy() 437 tmpDenyList.Insert(entryPointID) 438 439 alternative, level := h.findNewLocalEntrypoint(tmpDenyList, currentMaximumLayer, 440 entryPointID) 441 if level > neighborLevel { 442 neighborNode.Lock() 443 // reset connections according to level 444 neighborNode.connections = make([][]uint64, level+1) 445 neighborNode.Unlock() 446 neighborLevel = level 447 } 448 entryPointID = alternative 449 } 450 451 neighborNode.markAsMaintenance() 452 if err := h.reconnectNeighboursOf(neighborNode, entryPointID, neighborVec, compressorDistancer, 453 neighborLevel, currentMaximumLayer, deleteList); err != nil { 454 return false, errors.Wrap(err, "find and connect neighbors") 455 } 456 neighborNode.unmarkAsMaintenance() 457 458 h.metrics.CleanedUp() 459 return true, nil 460 } 461 462 func connectionsPointTo(connections [][]uint64, needles helpers.AllowList) bool { 463 for _, atLevel := range connections { 464 for _, pointer := range atLevel { 465 if needles.Contains(pointer) { 466 return true 467 } 468 } 469 } 470 471 return false 472 } 473 474 // deleteEntrypoint deletes the current entrypoint and replaces it with a new 475 // one. It respects the attached denyList, so that it doesn't assign another 476 // node which also has a tombstone and is also in the process of being cleaned 477 // up 478 func (h *hnsw) deleteEntrypoint(node *vertex, denyList helpers.AllowList) error { 479 if h.isOnlyNode(node, denyList) { 480 // no point in finding another entrypoint if this is the only node 481 return nil 482 } 483 484 node.Lock() 485 level := node.level 486 id := node.id 487 node.Unlock() 488 489 newEntrypoint, level, ok := h.findNewGlobalEntrypoint(denyList, level, id) 490 if !ok { 491 return nil 492 } 493 494 h.Lock() 495 h.entryPointID = newEntrypoint 496 h.currentMaximumLayer = level 497 h.Unlock() 498 if err := h.commitLog.SetEntryPointWithMaxLayer(newEntrypoint, level); err != nil { 499 return err 500 } 501 502 return nil 503 } 504 505 // returns entryPointID, level and whether a change occurred 506 func (h *hnsw) findNewGlobalEntrypoint(denyList helpers.AllowList, targetLevel int, 507 oldEntrypoint uint64, 508 ) (uint64, int, bool) { 509 if h.getEntrypoint() != oldEntrypoint { 510 // entrypoint has already been changed (this could be due to a new import 511 // for example, nothing to do for us 512 return 0, 0, false 513 } 514 515 for l := targetLevel; l >= 0; l-- { 516 // ideally we can find a new entrypoint at the same level of the 517 // to-be-deleted node. However, there is a chance it was the only node on 518 // that level, in that case we need to look at the next lower level for a 519 // better candidate 520 521 h.RLock() 522 maxNodes := len(h.nodes) 523 h.RUnlock() 524 525 for i := 0; i < maxNodes; i++ { 526 if h.getEntrypoint() != oldEntrypoint { 527 // entrypoint has already been changed (this could be due to a new import 528 // for example, nothing to do for us 529 return 0, 0, false 530 } 531 532 if denyList.Contains(uint64(i)) { 533 continue 534 } 535 536 h.shardedNodeLocks.RLock(uint64(i)) 537 candidate := h.nodes[i] 538 h.shardedNodeLocks.RUnlock(uint64(i)) 539 540 if candidate == nil { 541 continue 542 } 543 544 candidate.Lock() 545 candidateLevel := candidate.level 546 candidate.Unlock() 547 548 if candidateLevel != l { 549 // not reaching up to the current level, skip in hope of finding another candidate 550 continue 551 } 552 553 // we have a node that matches 554 return uint64(i), l, true 555 } 556 } 557 558 // we made it through the entire graph and didn't find a new entrypoint all 559 // the way down to level 0. This can only mean the graph is empty, which is 560 // unexpected. This situation should have been prevented by the deleteLock. 561 panic(fmt.Sprintf( 562 "class %s: shard %s: findNewEntrypoint called on an empty hnsw graph", 563 h.className, h.shardName)) 564 } 565 566 // returns entryPointID, level and whether a change occurred 567 func (h *hnsw) findNewLocalEntrypoint(denyList helpers.AllowList, targetLevel int, 568 oldEntrypoint uint64, 569 ) (uint64, int) { 570 if h.getEntrypoint() != oldEntrypoint { 571 // the current global entrypoint is different from our local entrypoint, so 572 // we can just use the global one, as the global one is guaranteed to be 573 // present on every level, i.e. it is always chosen from the highest 574 // currently available level 575 return h.getEntrypoint(), h.currentMaximumLayer 576 } 577 578 h.RLock() 579 maxNodes := len(h.nodes) 580 h.RUnlock() 581 582 for l := targetLevel; l >= 0; l-- { 583 // ideally we can find a new entrypoint at the same level of the 584 // to-be-deleted node. However, there is a chance it was the only node on 585 // that level, in that case we need to look at the next lower level for a 586 // better candidate 587 for i := 0; i < maxNodes; i++ { 588 if denyList.Contains(uint64(i)) { 589 continue 590 } 591 592 h.shardedNodeLocks.RLock(uint64(i)) 593 candidate := h.nodes[i] 594 h.shardedNodeLocks.RUnlock(uint64(i)) 595 596 if candidate == nil { 597 continue 598 } 599 600 candidate.Lock() 601 candidateLevel := candidate.level 602 candidate.Unlock() 603 604 if candidateLevel != l { 605 // not reaching up to the current level, skip in hope of finding another candidate 606 continue 607 } 608 609 // we have a node that matches 610 return uint64(i), l 611 } 612 } 613 614 panic(fmt.Sprintf( 615 "class %s: shard %s: findNewLocalEntrypoint called on an empty hnsw graph", 616 h.className, h.shardName)) 617 } 618 619 func (h *hnsw) isOnlyNode(needle *vertex, denyList helpers.AllowList) bool { 620 h.RLock() 621 h.shardedNodeLocks.RLockAll() 622 defer h.RUnlock() 623 defer h.shardedNodeLocks.RUnlockAll() 624 625 return h.isOnlyNodeUnlocked(needle, denyList) 626 } 627 628 func (h *hnsw) isOnlyNodeUnlocked(needle *vertex, denyList helpers.AllowList) bool { 629 for _, node := range h.nodes { 630 if node == nil || node.id == needle.id || denyList.Contains(node.id) { 631 continue 632 } 633 return false 634 } 635 return true 636 } 637 638 func (h *hnsw) hasTombstone(id uint64) bool { 639 h.tombstoneLock.RLock() 640 defer h.tombstoneLock.RUnlock() 641 _, ok := h.tombstones[id] 642 return ok 643 } 644 645 func (h *hnsw) addTombstone(ids ...uint64) error { 646 h.tombstoneLock.Lock() 647 defer h.tombstoneLock.Unlock() 648 649 for _, id := range ids { 650 h.metrics.AddTombstone() 651 h.tombstones[id] = struct{}{} 652 if err := h.commitLog.AddTombstone(id); err != nil { 653 return err 654 } 655 } 656 return nil 657 } 658 659 func (h *hnsw) removeTombstonesAndNodes(deleteList helpers.AllowList, breakCleanUpTombstonedNodes breakCleanUpTombstonedNodesFunc) (ok bool, err error) { 660 it := deleteList.Iterator() 661 for id, ok := it.Next(); ok; id, ok = it.Next() { 662 h.metrics.RemoveTombstone() 663 h.tombstoneLock.Lock() 664 delete(h.tombstones, id) 665 h.tombstoneLock.Unlock() 666 667 h.resetLock.Lock() 668 if !breakCleanUpTombstonedNodes() { 669 h.shardedNodeLocks.Lock(id) 670 h.nodes[id] = nil 671 h.shardedNodeLocks.Unlock(id) 672 if h.compressed.Load() { 673 h.compressor.Delete(context.TODO(), id) 674 } else { 675 h.cache.Delete(context.TODO(), id) 676 } 677 if err := h.commitLog.DeleteNode(id); err != nil { 678 h.resetLock.Unlock() 679 return false, err 680 } 681 } 682 h.resetLock.Unlock() 683 684 if err := h.commitLog.RemoveTombstone(id); err != nil { 685 return false, err 686 } 687 } 688 689 return true, nil 690 }