github.com/weaviate/sroar@v0.0.0-20230210105426-26108af5465d/keys.go (about) 1 package sroar 2 3 import ( 4 "fmt" 5 "strings" 6 ) 7 8 var ( 9 indexNodeSize = 0 10 indexNumKeys = 1 11 indexNodeStart = 2 12 ) 13 14 // node stores uint64 keys and the corresponding container offset in the buffer. 15 // 0th index (indexNodeSize) is used for storing the size of node in bytes. 16 // 1st index (indexNumKeys) is used for storing the number of keys. 17 // 2nd index is where we start writing the key-value pairs. 18 type node []uint64 19 20 func keyOffset(i int) int { return indexNodeStart + 2*i } 21 func valOffset(i int) int { return indexNodeStart + 2*i + 1 } 22 23 func (n node) numKeys() int { return int(n[indexNumKeys]) } 24 func (n node) size() int { return int(n[indexNodeSize]) } 25 func (n node) maxKeys() int { return (len(n) - indexNodeStart) / 2 } 26 func (n node) key(i int) uint64 { return n[keyOffset(i)] } 27 func (n node) val(i int) uint64 { return n[valOffset(i)] } 28 func (n node) data(i int) []uint64 { return n[keyOffset(i):keyOffset(i+1)] } 29 30 func (n node) uint64(idx int) uint64 { return n[idx] } 31 func (n node) setAt(idx int, k uint64) { n[idx] = k } 32 33 func (n node) setNumKeys(num int) { n[indexNumKeys] = uint64(num) } 34 func (n node) setNodeSize(sz int) { n[indexNodeSize] = uint64(sz) } 35 36 func (n node) maxKey() uint64 { 37 idx := n.numKeys() 38 // numKeys == index of the max key, because 0th index is being used for meta information. 39 if idx == 0 { 40 return 0 41 } 42 return n.key(idx) 43 } 44 45 func (n node) moveRight(lo int) { 46 hi := n.numKeys() 47 assert(!n.isFull()) 48 // copy works despite of overlap in src and dst. 49 // See https://golang.org/pkg/builtin/#copy 50 copy(n[keyOffset(lo+1):keyOffset(hi+1)], n[keyOffset(lo):keyOffset(hi)]) 51 } 52 53 // isFull checks that the node is already full. 54 func (n node) isFull() bool { 55 return n.numKeys() == n.maxKeys() 56 } 57 58 // Search returns the index of a smallest key >= k in a node. 59 func (n node) search(k uint64) int { 60 N := n.numKeys() 61 lo, hi := 0, N-1 62 for lo+16 <= hi { 63 mid := lo + (hi-lo)/2 64 ki := n.key(mid) 65 // fmt.Printf("lo: %d mid: %d hi: %d. ki: %#x k: %#x\n", lo, mid, hi, ki, k) 66 67 if ki < k { 68 lo = mid + 1 69 } else if ki > k { 70 hi = mid 71 // We should keep it equal, and not -1, because we'll take the first greater entry. 72 } else { 73 // fmt.Printf("returning mid: %d\n", mid) 74 return mid 75 } 76 } 77 for ; lo <= hi; lo++ { 78 ki := n.key(lo) 79 // fmt.Printf("itr. lo: %d hi: %d. ki: %#x k: %#x\n", lo, hi, ki, k) 80 if ki >= k { 81 return lo 82 } 83 } 84 return N 85 // if N < 4 { 86 // simd.Search has a bug which causes this to return index 11 when it should be returning index 87 // 9. 88 // } 89 // return int(simd.Search(n[keyOffset(0):keyOffset(N)], k)) 90 } 91 92 func zeroOut(data []uint64) { 93 for i := 0; i < len(data); i++ { 94 data[i] = 0 95 } 96 } 97 98 // compacts the node i.e., remove all the kvs with value < lo. It returns the remaining number of 99 // keys. 100 func (n node) compact(lo uint64) int { 101 N := n.numKeys() 102 mk := n.maxKey() 103 var left, right int 104 for right = 0; right < N; right++ { 105 if n.val(right) < lo && n.key(right) < mk { 106 // Skip over this key. Don't copy it. 107 continue 108 } 109 // Valid data. Copy it from right to left. Advance left. 110 if left != right { 111 copy(n.data(left), n.data(right)) 112 } 113 left++ 114 } 115 // zero out rest of the kv pairs. 116 zeroOut(n[keyOffset(left):keyOffset(right)]) 117 n.setNumKeys(left) 118 119 // If the only key we have is the max key, and its value is less than lo, then we can indicate 120 // to the caller by returning a zero that it's OK to drop the node. 121 if left == 1 && n.key(0) == mk && n.val(0) < lo { 122 return 0 123 } 124 return left 125 } 126 127 // getValue returns the value corresponding to the key if found. 128 func (n node) getValue(k uint64) (uint64, bool) { 129 k &= mask // Ensure k has its lowest bits unset. 130 idx := n.search(k) 131 // key is not found 132 if idx >= n.numKeys() { 133 return 0, false 134 } 135 if ki := n.key(idx); ki == k { 136 return n.val(idx), true 137 } 138 return 0, false 139 } 140 141 // set returns true if it added a new key. 142 func (n node) set(k, v uint64) bool { 143 N := n.numKeys() 144 idx := n.search(k) 145 if idx == N { 146 n.setNumKeys(N + 1) 147 n.setAt(keyOffset(idx), k) 148 n.setAt(valOffset(idx), v) 149 return true 150 } 151 152 ki := n.key(idx) 153 if N == n.maxKeys() { 154 // This happens during split of non-root node, when we are updating the child pointer of 155 // right node. Hence, the key should already exist. 156 assert(ki == k) 157 } 158 if ki == k { 159 n.setAt(valOffset(idx), v) 160 return false 161 } 162 assert(ki > k) 163 // Found the first entry which is greater than k. So, we need to fit k 164 // just before it. For that, we should move the rest of the data in the 165 // node to the right to make space for k. 166 n.moveRight(idx) 167 n.setNumKeys(N + 1) 168 n.setAt(keyOffset(idx), k) 169 n.setAt(valOffset(idx), v) 170 return true 171 // panic("shouldn't reach here") 172 } 173 174 func (n node) updateOffsets(beyond, by uint64, add bool) { 175 for i := 0; i < n.numKeys(); i++ { 176 if offset := n.val(i); offset > beyond { 177 if add { 178 n.setAt(valOffset(i), offset+by) 179 } else { 180 assert(offset >= by) 181 n.setAt(valOffset(i), offset-by) 182 } 183 } 184 } 185 } 186 187 func (n node) iterate(fn func(node, int)) { 188 for i := 0; i < n.maxKeys(); i++ { 189 if k := n.key(i); k > 0 { 190 fn(n, i) 191 } else { 192 break 193 } 194 } 195 } 196 197 func (n node) print(parentID uint64) { 198 var keys []string 199 n.iterate(func(n node, i int) { 200 keys = append(keys, fmt.Sprintf("%d", n.key(i))) 201 }) 202 if len(keys) > 8 { 203 copy(keys[4:], keys[len(keys)-4:]) 204 keys[3] = "..." 205 keys = keys[:8] 206 } 207 fmt.Printf("num keys: %d keys: %s\n", n.numKeys(), strings.Join(keys, " ")) 208 }