github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/visited/list_set.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package visited 13 14 // ListSet is a reusable list with very efficient resets. Inspired by the C++ 15 // implementation in hnswlib it can be reset with zero memory writes in the 16 // array by moving the match target instead of altering the list. Only after a 17 // version overflow do we need to actually reset 18 // 19 // The new implementation uses a slice where the first element is reserved for the marker. 20 // This allow us to use ListSet as a value (i.e. no pointer is required) 21 // The marker (i.e. set[0]) allows for reusing the same list without having to zero all elements on each list reset. 22 // Resetting the list takes place once the marker (i.e. set[0]) overflows 23 type ListSet struct { 24 set []uint8 // set[0] is reserved for the marker (version) 25 } 26 27 // Len returns the number of elements in the list. 28 func (l ListSet) Len() int { return len(l.set) - 1 } 29 30 // free allocated slice. This list should not be reusable after this call. 31 func (l *ListSet) free() { l.set = nil } 32 33 // NewList creates a new list. It allocates memory for elements and marker 34 func NewList(size int) ListSet { 35 set := make([]uint8, size+1) 36 set[0] = 1 // the marker starts always by 1 since on reset all element are set to 0 37 return ListSet{set: set} 38 } 39 40 // Visit sets element at node to the marker value 41 func (l *ListSet) Visit(node uint64) { 42 if int(node) >= l.Len() { // resize 43 newset := make([]uint8, growth(len(l.set), int(node)+1024)) 44 copy(newset, l.set) 45 l.set = newset 46 } 47 l.set[node+1] = l.set[0] 48 } 49 50 // Visited checks if l contains the specified node 51 func (l *ListSet) Visited(node uint64) bool { 52 return int(node) < l.Len() && l.set[node+1] == l.set[0] 53 } 54 55 // Reset list only in case of an overflow. 56 func (l *ListSet) Reset() { 57 l.set[0]++ 58 if l.set[0] == 0 { // if overflowed 59 for i := range l.set { 60 l.set[i] = 0 61 } 62 l.set[0] = 1 // restart counting 63 } 64 } 65 66 // threshold let us double the size if the old size is below it 67 const threshold = 2048 68 69 // growth calculates the amount a list should grow in a smooth way. 70 // 71 // Inspired by the go standard implementation 72 func growth(oldsize, size int) int { 73 doublesize := oldsize << 1 74 if size > doublesize { 75 return size 76 } 77 if oldsize < threshold { 78 return doublesize // grow by 2x for small slices 79 } 80 // detect overflow newsize > 0 81 // and prevent an infinite loop. 82 newsize := oldsize 83 for newsize > 0 && newsize < size { 84 // grow by 1.25x for large slices 85 // This formula allows for smothly growing 86 newsize += (newsize + threshold) / 4 87 } 88 // return requested size in case of overflow 89 if newsize <= 0 { 90 newsize = size 91 } 92 return newsize 93 }