github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/neighbor_connections.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package hnsw 13 14 import ( 15 "context" 16 "math" 17 "time" 18 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 21 "github.com/weaviate/weaviate/adapters/repos/db/priorityqueue" 22 "github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers" 23 "github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited" 24 ) 25 26 func (h *hnsw) findAndConnectNeighbors(node *vertex, 27 entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int, 28 denyList helpers.AllowList, 29 ) error { 30 nfc := newNeighborFinderConnector(h, node, entryPointID, nodeVec, distancer, targetLevel, 31 currentMaxLevel, denyList, false) 32 33 return nfc.Do() 34 } 35 36 func (h *hnsw) reconnectNeighboursOf(node *vertex, 37 entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int, 38 denyList helpers.AllowList, 39 ) error { 40 nfc := newNeighborFinderConnector(h, node, entryPointID, nodeVec, distancer, targetLevel, 41 currentMaxLevel, denyList, true) 42 43 return nfc.Do() 44 } 45 46 type neighborFinderConnector struct { 47 ctx context.Context 48 graph *hnsw 49 node *vertex 50 entryPointID uint64 51 entryPointDist float32 52 nodeVec []float32 53 distancer compressionhelpers.CompressorDistancer 54 targetLevel int 55 currentMaxLevel int 56 denyList helpers.AllowList 57 // bufLinksLog BufferedLinksLogger 58 tombstoneCleanupNodes bool 59 } 60 61 func newNeighborFinderConnector(graph *hnsw, node *vertex, entryPointID uint64, 62 nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int, 63 denyList helpers.AllowList, tombstoneCleanupNodes bool, 64 ) *neighborFinderConnector { 65 return &neighborFinderConnector{ 66 ctx: graph.shutdownCtx, 67 graph: graph, 68 node: node, 69 entryPointID: entryPointID, 70 nodeVec: nodeVec, 71 distancer: distancer, 72 targetLevel: targetLevel, 73 currentMaxLevel: currentMaxLevel, 74 denyList: denyList, 75 tombstoneCleanupNodes: tombstoneCleanupNodes, 76 } 77 } 78 79 func (n *neighborFinderConnector) Do() error { 80 for level := min(n.targetLevel, n.currentMaxLevel); level >= 0; level-- { 81 err := n.doAtLevel(level) 82 if err != nil { 83 return errors.Wrapf(err, "at level %d", level) 84 } 85 } 86 87 return nil 88 } 89 90 func (n *neighborFinderConnector) processNode(id uint64) (float32, error) { 91 var dist float32 92 var ok bool 93 var err error 94 95 if n.distancer == nil { 96 dist, ok, err = n.graph.distBetweenNodeAndVec(id, n.nodeVec) 97 } else { 98 dist, ok, err = n.distancer.DistanceToNode(id) 99 } 100 if err != nil { 101 // not an error we could recover from - fail! 102 return math.MaxFloat32, errors.Wrapf(err, 103 "calculate distance between insert node and entrypoint") 104 } 105 if !ok { 106 return math.MaxFloat32, nil 107 } 108 return dist, nil 109 } 110 111 func (n *neighborFinderConnector) processRecursively(from uint64, results *priorityqueue.Queue[any], visited visited.ListSet, level, top int) error { 112 if err := n.ctx.Err(); err != nil { 113 return err 114 } 115 116 var pending []uint64 117 if uint64(len(n.graph.nodes)) < from || n.graph.nodes[from] == nil { 118 n.graph.handleDeletedNode(from) 119 return nil 120 } 121 n.graph.nodes[from].Lock() 122 connections := make([]uint64, len(n.graph.nodes[from].connections[level])) 123 copy(connections, n.graph.nodes[from].connections[level]) 124 n.graph.nodes[from].Unlock() 125 for _, id := range connections { 126 if visited.Visited(id) { 127 continue 128 } 129 visited.Visit(id) 130 if n.denyList.Contains(id) { 131 pending = append(pending, id) 132 continue 133 } 134 135 dist, err := n.processNode(id) 136 if err != nil { 137 return err 138 } 139 if results.Len() >= top && dist < results.Top().Dist { 140 results.Pop() 141 results.Insert(id, dist) 142 } else if results.Len() < top { 143 results.Insert(id, dist) 144 } 145 } 146 for _, id := range pending { 147 if results.Len() >= top { 148 dist, err := n.processNode(id) 149 if err != nil { 150 return err 151 } 152 if dist > results.Top().Dist { 153 continue 154 } 155 } 156 err := n.processRecursively(id, results, visited, level, top) 157 if err != nil { 158 return err 159 } 160 } 161 return nil 162 } 163 164 func (n *neighborFinderConnector) doAtLevel(level int) error { 165 before := time.Now() 166 167 var results *priorityqueue.Queue[any] 168 var extraIDs []uint64 = nil 169 var total int = 0 170 var maxConnections int = n.graph.maximumConnections 171 172 if n.tombstoneCleanupNodes { 173 results = n.graph.pools.pqResults.GetMax(n.graph.efConstruction) 174 175 n.graph.pools.visitedListsLock.RLock() 176 visited := n.graph.pools.visitedLists.Borrow() 177 n.graph.pools.visitedListsLock.RUnlock() 178 n.node.Lock() 179 connections := make([]uint64, len(n.node.connections[level])) 180 copy(connections, n.node.connections[level]) 181 n.node.Unlock() 182 visited.Visit(n.node.id) 183 top := n.graph.efConstruction 184 var pending []uint64 = nil 185 186 for _, id := range connections { 187 visited.Visit(id) 188 if n.denyList.Contains(id) { 189 pending = append(pending, id) 190 continue 191 } 192 extraIDs = append(extraIDs, id) 193 top-- 194 total++ 195 } 196 for _, id := range pending { 197 visited.Visit(id) 198 err := n.processRecursively(id, results, visited, level, top) 199 if err != nil { 200 return err 201 } 202 } 203 n.graph.pools.visitedListsLock.RLock() 204 n.graph.pools.visitedLists.Return(visited) 205 n.graph.pools.visitedListsLock.RUnlock() 206 if err := n.pickEntrypoint(); err != nil { 207 return errors.Wrap(err, "pick entrypoint at level beginning") 208 } 209 // use dynamic max connections only during tombstone cleanup 210 maxConnections = n.maximumConnections(level) 211 } else { 212 if err := n.pickEntrypoint(); err != nil { 213 return errors.Wrap(err, "pick entrypoint at level beginning") 214 } 215 eps := priorityqueue.NewMin[any](1) 216 eps.Insert(n.entryPointID, n.entryPointDist) 217 var err error 218 219 results, err = n.graph.searchLayerByVectorWithDistancer(n.nodeVec, eps, n.graph.efConstruction, 220 level, nil, n.distancer) 221 if err != nil { 222 return errors.Wrapf(err, "search layer at level %d", level) 223 } 224 225 n.graph.insertMetrics.findAndConnectSearch(before) 226 before = time.Now() 227 } 228 229 if err := n.graph.selectNeighborsHeuristic(results, maxConnections-total, n.denyList); err != nil { 230 return errors.Wrap(err, "heuristic") 231 } 232 233 n.graph.insertMetrics.findAndConnectHeuristic(before) 234 before = time.Now() 235 236 // // for distributed spike 237 // neighborsAtLevel[level] = neighbors 238 239 neighbors := make([]uint64, total, total+results.Len()) 240 copy(neighbors, extraIDs) 241 for results.Len() > 0 { 242 id := results.Pop().ID 243 neighbors = append(neighbors, id) 244 } 245 246 n.graph.pools.pqResults.Put(results) 247 248 // set all outgoing in one go 249 n.node.setConnectionsAtLevel(level, neighbors) 250 n.graph.commitLog.ReplaceLinksAtLevel(n.node.id, level, neighbors) 251 252 for _, neighborID := range neighbors { 253 if err := n.connectNeighborAtLevel(neighborID, level); err != nil { 254 return errors.Wrapf(err, "connect neighbor %d", neighborID) 255 } 256 } 257 258 if len(neighbors) > 0 { 259 // there could be no neighbors left, if all are marked deleted, in this 260 // case, don't change the entrypoint 261 nextEntryPointID := neighbors[len(neighbors)-1] 262 if nextEntryPointID == n.node.id { 263 return nil 264 } 265 266 n.entryPointID = nextEntryPointID 267 } 268 269 n.graph.insertMetrics.findAndConnectUpdateConnections(before) 270 return nil 271 } 272 273 func (n *neighborFinderConnector) connectNeighborAtLevel(neighborID uint64, 274 level int, 275 ) error { 276 neighbor := n.graph.nodeByID(neighborID) 277 if skip := n.skipNeighbor(neighbor); skip { 278 return nil 279 } 280 281 neighbor.Lock() 282 defer neighbor.Unlock() 283 if level > neighbor.level { 284 // upgrade neighbor level if the level is out of sync due to a delete re-assign 285 neighbor.upgradeToLevelNoLock(level) 286 } 287 currentConnections := neighbor.connectionsAtLevelNoLock(level) 288 289 maximumConnections := n.maximumConnections(level) 290 if len(currentConnections) < maximumConnections { 291 // we can simply append 292 // updatedConnections = append(currentConnections, n.node.id) 293 neighbor.appendConnectionAtLevelNoLock(level, n.node.id, maximumConnections) 294 if err := n.graph.commitLog.AddLinkAtLevel(neighbor.id, level, n.node.id); err != nil { 295 return err 296 } 297 } else { 298 // we need to run the heuristic 299 300 dist, ok, err := n.graph.distBetweenNodes(n.node.id, neighborID) 301 if err != nil { 302 return errors.Wrapf(err, "dist between %d and %d", n.node.id, neighborID) 303 } 304 305 if !ok { 306 // it seems either the node or the neighbor were deleted in the meantime, 307 // there is nothing we can do now 308 return nil 309 } 310 311 candidates := priorityqueue.NewMax[any](len(currentConnections) + 1) 312 candidates.Insert(n.node.id, dist) 313 314 for _, existingConnection := range currentConnections { 315 dist, ok, err := n.graph.distBetweenNodes(existingConnection, neighborID) 316 if err != nil { 317 return errors.Wrapf(err, "dist between %d and %d", existingConnection, neighborID) 318 } 319 320 if !ok { 321 // was deleted in the meantime 322 continue 323 } 324 325 candidates.Insert(existingConnection, dist) 326 } 327 328 err = n.graph.selectNeighborsHeuristic(candidates, maximumConnections, n.denyList) 329 if err != nil { 330 return errors.Wrap(err, "connect neighbors") 331 } 332 333 neighbor.resetConnectionsAtLevelNoLock(level) 334 if err := n.graph.commitLog.ClearLinksAtLevel(neighbor.id, uint16(level)); err != nil { 335 return err 336 } 337 338 for candidates.Len() > 0 { 339 id := candidates.Pop().ID 340 neighbor.appendConnectionAtLevelNoLock(level, id, maximumConnections) 341 if err := n.graph.commitLog.AddLinkAtLevel(neighbor.id, level, id); err != nil { 342 return err 343 } 344 } 345 } 346 347 return nil 348 } 349 350 func (n *neighborFinderConnector) skipNeighbor(neighbor *vertex) bool { 351 if neighbor == n.node { 352 // don't connect to self 353 return true 354 } 355 356 if neighbor == nil || n.graph.hasTombstone(neighbor.id) { 357 // don't connect to tombstoned nodes. This would only increase the 358 // cleanup that needs to be done. Even worse: A tombstoned node can be 359 // cleaned up at any time, also while we are connecting to it. So, 360 // while the node still exists right now, it might already be nil in 361 // the next line, which would lead to a nil-pointer panic. 362 return true 363 } 364 365 return false 366 } 367 368 func (n *neighborFinderConnector) maximumConnections(level int) int { 369 if level == 0 { 370 return n.graph.maximumConnectionsLayerZero 371 } 372 373 return n.graph.maximumConnections 374 } 375 376 func (n *neighborFinderConnector) pickEntrypoint() error { 377 // the neighborFinderConnector always has a suggestion for an entrypoint that 378 // it got from the outside, most of the times we can use this, but in some 379 // cases we can't. To see if we can use it, three conditions need to be met: 380 // 381 // 1. it needs to exist in the graph, i.e. be not nil 382 // 383 // 2. it can't be under maintenance 384 // 385 // 3. we need to be able to obtain a vector for it 386 387 localDeny := n.denyList.DeepCopy() 388 candidate := n.entryPointID 389 390 // make sure the loop cannot block forever. In most cases, results should be 391 // found within micro to milliseconds, this is just a last resort to handle 392 // the unknown somewhat gracefully, for example if there is a bug in the 393 // underlying object store and we cannot retrieve the vector in time, etc. 394 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) 395 defer cancel() 396 397 for { 398 if err := ctx.Err(); err != nil { 399 return err 400 } 401 402 success, err := n.tryEpCandidate(candidate) 403 if err != nil { 404 return err 405 } 406 407 if success { 408 return nil 409 } 410 411 // no success so far, we need to keep going and find a better candidate 412 // make sure we never visit this candidate again 413 localDeny.Insert(candidate) 414 // now find a new one 415 416 alternative, _ := n.graph.findNewLocalEntrypoint(localDeny, 417 n.graph.currentMaximumLayer, candidate) 418 candidate = alternative 419 } 420 } 421 422 func (n *neighborFinderConnector) tryEpCandidate(candidate uint64) (bool, error) { 423 node := n.graph.nodeByID(candidate) 424 if node == nil { 425 return false, nil 426 } 427 428 if node.isUnderMaintenance() { 429 return false, nil 430 } 431 432 var dist float32 433 var ok bool 434 var err error 435 if n.distancer == nil { 436 dist, ok, err = n.graph.distBetweenNodeAndVec(candidate, n.nodeVec) 437 } else { 438 dist, ok, err = n.distancer.DistanceToNode(candidate) 439 } 440 if err != nil { 441 // not an error we could recover from - fail! 442 return false, errors.Wrapf(err, 443 "calculate distance between insert node and entrypoint") 444 } 445 if !ok { 446 return false, nil 447 } 448 449 // we were able to calculate a distance, we're good 450 n.entryPointDist = dist 451 n.entryPointID = candidate 452 return true, nil 453 }