github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/visited/list_set.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package visited
    13  
    14  // ListSet is a reusable list with very efficient resets. Inspired by the C++
    15  // implementation in hnswlib it can be reset with zero memory writes in the
    16  // array by moving the match target instead of altering the list. Only after a
    17  // version overflow do we need to actually reset
    18  //
    19  // The new implementation uses a slice where the first element is reserved for the marker.
    20  // This allow us to use ListSet as a value (i.e. no pointer is required)
    21  // The marker (i.e. set[0]) allows for reusing the same list without having to zero all elements on each list reset.
    22  // Resetting the list takes place once the marker (i.e. set[0]) overflows
    23  type ListSet struct {
    24  	set []uint8 // set[0] is reserved for the marker (version)
    25  }
    26  
    27  // Len returns the number of elements in the list.
    28  func (l ListSet) Len() int { return len(l.set) - 1 }
    29  
    30  // free allocated slice. This list should not be reusable after this call.
    31  func (l *ListSet) free() { l.set = nil }
    32  
    33  // NewList creates a new list. It allocates memory for elements and marker
    34  func NewList(size int) ListSet {
    35  	set := make([]uint8, size+1)
    36  	set[0] = 1 // the marker starts always by 1 since on reset all element are set to 0
    37  	return ListSet{set: set}
    38  }
    39  
    40  // Visit sets element at node to the marker value
    41  func (l *ListSet) Visit(node uint64) {
    42  	if int(node) >= l.Len() { // resize
    43  		newset := make([]uint8, growth(len(l.set), int(node)+1024))
    44  		copy(newset, l.set)
    45  		l.set = newset
    46  	}
    47  	l.set[node+1] = l.set[0]
    48  }
    49  
    50  // Visited checks if l contains the specified node
    51  func (l *ListSet) Visited(node uint64) bool {
    52  	return int(node) < l.Len() && l.set[node+1] == l.set[0]
    53  }
    54  
    55  // Reset list only in case of an overflow.
    56  func (l *ListSet) Reset() {
    57  	l.set[0]++
    58  	if l.set[0] == 0 { // if overflowed
    59  		for i := range l.set {
    60  			l.set[i] = 0
    61  		}
    62  		l.set[0] = 1 // restart counting
    63  	}
    64  }
    65  
    66  // threshold let us double the size if the old size is below it
    67  const threshold = 2048
    68  
    69  // growth calculates the amount a list should grow in a smooth way.
    70  //
    71  // Inspired by the go standard implementation
    72  func growth(oldsize, size int) int {
    73  	doublesize := oldsize << 1
    74  	if size > doublesize {
    75  		return size
    76  	}
    77  	if oldsize < threshold {
    78  		return doublesize // grow by 2x for small slices
    79  	}
    80  	// detect overflow newsize > 0
    81  	// and prevent an infinite loop.
    82  	newsize := oldsize
    83  	for newsize > 0 && newsize < size {
    84  		// grow by 1.25x for large slices
    85  		// This formula allows for smothly growing
    86  		newsize += (newsize + threshold) / 4
    87  	}
    88  	// return requested size in case of overflow
    89  	if newsize <= 0 {
    90  		newsize = size
    91  	}
    92  	return newsize
    93  }