github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/neighbor_connections.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package hnsw
    13  
    14  import (
    15  	"context"
    16  	"math"
    17  	"time"
    18  
    19  	"github.com/pkg/errors"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/priorityqueue"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/visited"
    24  )
    25  
    26  func (h *hnsw) findAndConnectNeighbors(node *vertex,
    27  	entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int,
    28  	denyList helpers.AllowList,
    29  ) error {
    30  	nfc := newNeighborFinderConnector(h, node, entryPointID, nodeVec, distancer, targetLevel,
    31  		currentMaxLevel, denyList, false)
    32  
    33  	return nfc.Do()
    34  }
    35  
    36  func (h *hnsw) reconnectNeighboursOf(node *vertex,
    37  	entryPointID uint64, nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int,
    38  	denyList helpers.AllowList,
    39  ) error {
    40  	nfc := newNeighborFinderConnector(h, node, entryPointID, nodeVec, distancer, targetLevel,
    41  		currentMaxLevel, denyList, true)
    42  
    43  	return nfc.Do()
    44  }
    45  
    46  type neighborFinderConnector struct {
    47  	ctx             context.Context
    48  	graph           *hnsw
    49  	node            *vertex
    50  	entryPointID    uint64
    51  	entryPointDist  float32
    52  	nodeVec         []float32
    53  	distancer       compressionhelpers.CompressorDistancer
    54  	targetLevel     int
    55  	currentMaxLevel int
    56  	denyList        helpers.AllowList
    57  	// bufLinksLog     BufferedLinksLogger
    58  	tombstoneCleanupNodes bool
    59  }
    60  
    61  func newNeighborFinderConnector(graph *hnsw, node *vertex, entryPointID uint64,
    62  	nodeVec []float32, distancer compressionhelpers.CompressorDistancer, targetLevel, currentMaxLevel int,
    63  	denyList helpers.AllowList, tombstoneCleanupNodes bool,
    64  ) *neighborFinderConnector {
    65  	return &neighborFinderConnector{
    66  		ctx:                   graph.shutdownCtx,
    67  		graph:                 graph,
    68  		node:                  node,
    69  		entryPointID:          entryPointID,
    70  		nodeVec:               nodeVec,
    71  		distancer:             distancer,
    72  		targetLevel:           targetLevel,
    73  		currentMaxLevel:       currentMaxLevel,
    74  		denyList:              denyList,
    75  		tombstoneCleanupNodes: tombstoneCleanupNodes,
    76  	}
    77  }
    78  
    79  func (n *neighborFinderConnector) Do() error {
    80  	for level := min(n.targetLevel, n.currentMaxLevel); level >= 0; level-- {
    81  		err := n.doAtLevel(level)
    82  		if err != nil {
    83  			return errors.Wrapf(err, "at level %d", level)
    84  		}
    85  	}
    86  
    87  	return nil
    88  }
    89  
    90  func (n *neighborFinderConnector) processNode(id uint64) (float32, error) {
    91  	var dist float32
    92  	var ok bool
    93  	var err error
    94  
    95  	if n.distancer == nil {
    96  		dist, ok, err = n.graph.distBetweenNodeAndVec(id, n.nodeVec)
    97  	} else {
    98  		dist, ok, err = n.distancer.DistanceToNode(id)
    99  	}
   100  	if err != nil {
   101  		// not an error we could recover from - fail!
   102  		return math.MaxFloat32, errors.Wrapf(err,
   103  			"calculate distance between insert node and entrypoint")
   104  	}
   105  	if !ok {
   106  		return math.MaxFloat32, nil
   107  	}
   108  	return dist, nil
   109  }
   110  
   111  func (n *neighborFinderConnector) processRecursively(from uint64, results *priorityqueue.Queue[any], visited visited.ListSet, level, top int) error {
   112  	if err := n.ctx.Err(); err != nil {
   113  		return err
   114  	}
   115  
   116  	var pending []uint64
   117  	if uint64(len(n.graph.nodes)) < from || n.graph.nodes[from] == nil {
   118  		n.graph.handleDeletedNode(from)
   119  		return nil
   120  	}
   121  	n.graph.nodes[from].Lock()
   122  	connections := make([]uint64, len(n.graph.nodes[from].connections[level]))
   123  	copy(connections, n.graph.nodes[from].connections[level])
   124  	n.graph.nodes[from].Unlock()
   125  	for _, id := range connections {
   126  		if visited.Visited(id) {
   127  			continue
   128  		}
   129  		visited.Visit(id)
   130  		if n.denyList.Contains(id) {
   131  			pending = append(pending, id)
   132  			continue
   133  		}
   134  
   135  		dist, err := n.processNode(id)
   136  		if err != nil {
   137  			return err
   138  		}
   139  		if results.Len() >= top && dist < results.Top().Dist {
   140  			results.Pop()
   141  			results.Insert(id, dist)
   142  		} else if results.Len() < top {
   143  			results.Insert(id, dist)
   144  		}
   145  	}
   146  	for _, id := range pending {
   147  		if results.Len() >= top {
   148  			dist, err := n.processNode(id)
   149  			if err != nil {
   150  				return err
   151  			}
   152  			if dist > results.Top().Dist {
   153  				continue
   154  			}
   155  		}
   156  		err := n.processRecursively(id, results, visited, level, top)
   157  		if err != nil {
   158  			return err
   159  		}
   160  	}
   161  	return nil
   162  }
   163  
   164  func (n *neighborFinderConnector) doAtLevel(level int) error {
   165  	before := time.Now()
   166  
   167  	var results *priorityqueue.Queue[any]
   168  	var extraIDs []uint64 = nil
   169  	var total int = 0
   170  	var maxConnections int = n.graph.maximumConnections
   171  
   172  	if n.tombstoneCleanupNodes {
   173  		results = n.graph.pools.pqResults.GetMax(n.graph.efConstruction)
   174  
   175  		n.graph.pools.visitedListsLock.RLock()
   176  		visited := n.graph.pools.visitedLists.Borrow()
   177  		n.graph.pools.visitedListsLock.RUnlock()
   178  		n.node.Lock()
   179  		connections := make([]uint64, len(n.node.connections[level]))
   180  		copy(connections, n.node.connections[level])
   181  		n.node.Unlock()
   182  		visited.Visit(n.node.id)
   183  		top := n.graph.efConstruction
   184  		var pending []uint64 = nil
   185  
   186  		for _, id := range connections {
   187  			visited.Visit(id)
   188  			if n.denyList.Contains(id) {
   189  				pending = append(pending, id)
   190  				continue
   191  			}
   192  			extraIDs = append(extraIDs, id)
   193  			top--
   194  			total++
   195  		}
   196  		for _, id := range pending {
   197  			visited.Visit(id)
   198  			err := n.processRecursively(id, results, visited, level, top)
   199  			if err != nil {
   200  				return err
   201  			}
   202  		}
   203  		n.graph.pools.visitedListsLock.RLock()
   204  		n.graph.pools.visitedLists.Return(visited)
   205  		n.graph.pools.visitedListsLock.RUnlock()
   206  		if err := n.pickEntrypoint(); err != nil {
   207  			return errors.Wrap(err, "pick entrypoint at level beginning")
   208  		}
   209  		// use dynamic max connections only during tombstone cleanup
   210  		maxConnections = n.maximumConnections(level)
   211  	} else {
   212  		if err := n.pickEntrypoint(); err != nil {
   213  			return errors.Wrap(err, "pick entrypoint at level beginning")
   214  		}
   215  		eps := priorityqueue.NewMin[any](1)
   216  		eps.Insert(n.entryPointID, n.entryPointDist)
   217  		var err error
   218  
   219  		results, err = n.graph.searchLayerByVectorWithDistancer(n.nodeVec, eps, n.graph.efConstruction,
   220  			level, nil, n.distancer)
   221  		if err != nil {
   222  			return errors.Wrapf(err, "search layer at level %d", level)
   223  		}
   224  
   225  		n.graph.insertMetrics.findAndConnectSearch(before)
   226  		before = time.Now()
   227  	}
   228  
   229  	if err := n.graph.selectNeighborsHeuristic(results, maxConnections-total, n.denyList); err != nil {
   230  		return errors.Wrap(err, "heuristic")
   231  	}
   232  
   233  	n.graph.insertMetrics.findAndConnectHeuristic(before)
   234  	before = time.Now()
   235  
   236  	// // for distributed spike
   237  	// neighborsAtLevel[level] = neighbors
   238  
   239  	neighbors := make([]uint64, total, total+results.Len())
   240  	copy(neighbors, extraIDs)
   241  	for results.Len() > 0 {
   242  		id := results.Pop().ID
   243  		neighbors = append(neighbors, id)
   244  	}
   245  
   246  	n.graph.pools.pqResults.Put(results)
   247  
   248  	// set all outgoing in one go
   249  	n.node.setConnectionsAtLevel(level, neighbors)
   250  	n.graph.commitLog.ReplaceLinksAtLevel(n.node.id, level, neighbors)
   251  
   252  	for _, neighborID := range neighbors {
   253  		if err := n.connectNeighborAtLevel(neighborID, level); err != nil {
   254  			return errors.Wrapf(err, "connect neighbor %d", neighborID)
   255  		}
   256  	}
   257  
   258  	if len(neighbors) > 0 {
   259  		// there could be no neighbors left, if all are marked deleted, in this
   260  		// case, don't change the entrypoint
   261  		nextEntryPointID := neighbors[len(neighbors)-1]
   262  		if nextEntryPointID == n.node.id {
   263  			return nil
   264  		}
   265  
   266  		n.entryPointID = nextEntryPointID
   267  	}
   268  
   269  	n.graph.insertMetrics.findAndConnectUpdateConnections(before)
   270  	return nil
   271  }
   272  
   273  func (n *neighborFinderConnector) connectNeighborAtLevel(neighborID uint64,
   274  	level int,
   275  ) error {
   276  	neighbor := n.graph.nodeByID(neighborID)
   277  	if skip := n.skipNeighbor(neighbor); skip {
   278  		return nil
   279  	}
   280  
   281  	neighbor.Lock()
   282  	defer neighbor.Unlock()
   283  	if level > neighbor.level {
   284  		// upgrade neighbor level if the level is out of sync due to a delete re-assign
   285  		neighbor.upgradeToLevelNoLock(level)
   286  	}
   287  	currentConnections := neighbor.connectionsAtLevelNoLock(level)
   288  
   289  	maximumConnections := n.maximumConnections(level)
   290  	if len(currentConnections) < maximumConnections {
   291  		// we can simply append
   292  		// updatedConnections = append(currentConnections, n.node.id)
   293  		neighbor.appendConnectionAtLevelNoLock(level, n.node.id, maximumConnections)
   294  		if err := n.graph.commitLog.AddLinkAtLevel(neighbor.id, level, n.node.id); err != nil {
   295  			return err
   296  		}
   297  	} else {
   298  		// we need to run the heuristic
   299  
   300  		dist, ok, err := n.graph.distBetweenNodes(n.node.id, neighborID)
   301  		if err != nil {
   302  			return errors.Wrapf(err, "dist between %d and %d", n.node.id, neighborID)
   303  		}
   304  
   305  		if !ok {
   306  			// it seems either the node or the neighbor were deleted in the meantime,
   307  			// there is nothing we can do now
   308  			return nil
   309  		}
   310  
   311  		candidates := priorityqueue.NewMax[any](len(currentConnections) + 1)
   312  		candidates.Insert(n.node.id, dist)
   313  
   314  		for _, existingConnection := range currentConnections {
   315  			dist, ok, err := n.graph.distBetweenNodes(existingConnection, neighborID)
   316  			if err != nil {
   317  				return errors.Wrapf(err, "dist between %d and %d", existingConnection, neighborID)
   318  			}
   319  
   320  			if !ok {
   321  				// was deleted in the meantime
   322  				continue
   323  			}
   324  
   325  			candidates.Insert(existingConnection, dist)
   326  		}
   327  
   328  		err = n.graph.selectNeighborsHeuristic(candidates, maximumConnections, n.denyList)
   329  		if err != nil {
   330  			return errors.Wrap(err, "connect neighbors")
   331  		}
   332  
   333  		neighbor.resetConnectionsAtLevelNoLock(level)
   334  		if err := n.graph.commitLog.ClearLinksAtLevel(neighbor.id, uint16(level)); err != nil {
   335  			return err
   336  		}
   337  
   338  		for candidates.Len() > 0 {
   339  			id := candidates.Pop().ID
   340  			neighbor.appendConnectionAtLevelNoLock(level, id, maximumConnections)
   341  			if err := n.graph.commitLog.AddLinkAtLevel(neighbor.id, level, id); err != nil {
   342  				return err
   343  			}
   344  		}
   345  	}
   346  
   347  	return nil
   348  }
   349  
   350  func (n *neighborFinderConnector) skipNeighbor(neighbor *vertex) bool {
   351  	if neighbor == n.node {
   352  		// don't connect to self
   353  		return true
   354  	}
   355  
   356  	if neighbor == nil || n.graph.hasTombstone(neighbor.id) {
   357  		// don't connect to tombstoned nodes. This would only increase the
   358  		// cleanup that needs to be done. Even worse: A tombstoned node can be
   359  		// cleaned up at any time, also while we are connecting to it. So,
   360  		// while the node still exists right now, it might already be nil in
   361  		// the next line, which would lead to a nil-pointer panic.
   362  		return true
   363  	}
   364  
   365  	return false
   366  }
   367  
   368  func (n *neighborFinderConnector) maximumConnections(level int) int {
   369  	if level == 0 {
   370  		return n.graph.maximumConnectionsLayerZero
   371  	}
   372  
   373  	return n.graph.maximumConnections
   374  }
   375  
   376  func (n *neighborFinderConnector) pickEntrypoint() error {
   377  	// the neighborFinderConnector always has a suggestion for an entrypoint that
   378  	// it got from the outside, most of the times we can use this, but in some
   379  	// cases we can't. To see if we can use it, three conditions need to be met:
   380  	//
   381  	// 1. it needs to exist in the graph, i.e. be not nil
   382  	//
   383  	// 2. it can't be under maintenance
   384  	//
   385  	// 3. we need to be able to obtain a vector for it
   386  
   387  	localDeny := n.denyList.DeepCopy()
   388  	candidate := n.entryPointID
   389  
   390  	// make sure the loop cannot block forever. In most cases, results should be
   391  	// found within micro to milliseconds, this is just a last resort to handle
   392  	// the unknown somewhat gracefully, for example if there is a bug in the
   393  	// underlying object store and we cannot retrieve the vector in time, etc.
   394  	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
   395  	defer cancel()
   396  
   397  	for {
   398  		if err := ctx.Err(); err != nil {
   399  			return err
   400  		}
   401  
   402  		success, err := n.tryEpCandidate(candidate)
   403  		if err != nil {
   404  			return err
   405  		}
   406  
   407  		if success {
   408  			return nil
   409  		}
   410  
   411  		// no success so far, we need to keep going and find a better candidate
   412  		// make sure we never visit this candidate again
   413  		localDeny.Insert(candidate)
   414  		// now find a new one
   415  
   416  		alternative, _ := n.graph.findNewLocalEntrypoint(localDeny,
   417  			n.graph.currentMaximumLayer, candidate)
   418  		candidate = alternative
   419  	}
   420  }
   421  
   422  func (n *neighborFinderConnector) tryEpCandidate(candidate uint64) (bool, error) {
   423  	node := n.graph.nodeByID(candidate)
   424  	if node == nil {
   425  		return false, nil
   426  	}
   427  
   428  	if node.isUnderMaintenance() {
   429  		return false, nil
   430  	}
   431  
   432  	var dist float32
   433  	var ok bool
   434  	var err error
   435  	if n.distancer == nil {
   436  		dist, ok, err = n.graph.distBetweenNodeAndVec(candidate, n.nodeVec)
   437  	} else {
   438  		dist, ok, err = n.distancer.DistanceToNode(candidate)
   439  	}
   440  	if err != nil {
   441  		// not an error we could recover from - fail!
   442  		return false, errors.Wrapf(err,
   443  			"calculate distance between insert node and entrypoint")
   444  	}
   445  	if !ok {
   446  		return false, nil
   447  	}
   448  
   449  	// we were able to calculate a distance, we're good
   450  	n.entryPointDist = dist
   451  	n.entryPointID = candidate
   452  	return true, nil
   453  }