github.com/weaviate/sroar@v0.0.0-20230210105426-26108af5465d/keys.go (about)

     1  package sroar
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  )
     7  
     8  var (
     9  	indexNodeSize  = 0
    10  	indexNumKeys   = 1
    11  	indexNodeStart = 2
    12  )
    13  
    14  // node stores uint64 keys and the corresponding container offset in the buffer.
    15  // 0th index (indexNodeSize) is used for storing the size of node in bytes.
    16  // 1st index (indexNumKeys) is used for storing the number of keys.
    17  // 2nd index is where we start writing the key-value pairs.
    18  type node []uint64
    19  
    20  func keyOffset(i int) int { return indexNodeStart + 2*i }
    21  func valOffset(i int) int { return indexNodeStart + 2*i + 1 }
    22  
    23  func (n node) numKeys() int        { return int(n[indexNumKeys]) }
    24  func (n node) size() int           { return int(n[indexNodeSize]) }
    25  func (n node) maxKeys() int        { return (len(n) - indexNodeStart) / 2 }
    26  func (n node) key(i int) uint64    { return n[keyOffset(i)] }
    27  func (n node) val(i int) uint64    { return n[valOffset(i)] }
    28  func (n node) data(i int) []uint64 { return n[keyOffset(i):keyOffset(i+1)] }
    29  
    30  func (n node) uint64(idx int) uint64   { return n[idx] }
    31  func (n node) setAt(idx int, k uint64) { n[idx] = k }
    32  
    33  func (n node) setNumKeys(num int) { n[indexNumKeys] = uint64(num) }
    34  func (n node) setNodeSize(sz int) { n[indexNodeSize] = uint64(sz) }
    35  
    36  func (n node) maxKey() uint64 {
    37  	idx := n.numKeys()
    38  	// numKeys == index of the max key, because 0th index is being used for meta information.
    39  	if idx == 0 {
    40  		return 0
    41  	}
    42  	return n.key(idx)
    43  }
    44  
    45  func (n node) moveRight(lo int) {
    46  	hi := n.numKeys()
    47  	assert(!n.isFull())
    48  	// copy works despite of overlap in src and dst.
    49  	// See https://golang.org/pkg/builtin/#copy
    50  	copy(n[keyOffset(lo+1):keyOffset(hi+1)], n[keyOffset(lo):keyOffset(hi)])
    51  }
    52  
    53  // isFull checks that the node is already full.
    54  func (n node) isFull() bool {
    55  	return n.numKeys() == n.maxKeys()
    56  }
    57  
    58  // Search returns the index of a smallest key >= k in a node.
    59  func (n node) search(k uint64) int {
    60  	N := n.numKeys()
    61  	lo, hi := 0, N-1
    62  	for lo+16 <= hi {
    63  		mid := lo + (hi-lo)/2
    64  		ki := n.key(mid)
    65  		// fmt.Printf("lo: %d mid: %d hi: %d. ki: %#x k: %#x\n", lo, mid, hi, ki, k)
    66  
    67  		if ki < k {
    68  			lo = mid + 1
    69  		} else if ki > k {
    70  			hi = mid
    71  			// We should keep it equal, and not -1, because we'll take the first greater entry.
    72  		} else {
    73  			// fmt.Printf("returning mid: %d\n", mid)
    74  			return mid
    75  		}
    76  	}
    77  	for ; lo <= hi; lo++ {
    78  		ki := n.key(lo)
    79  		// fmt.Printf("itr. lo: %d hi: %d. ki: %#x k: %#x\n", lo, hi, ki, k)
    80  		if ki >= k {
    81  			return lo
    82  		}
    83  	}
    84  	return N
    85  	// if N < 4 {
    86  	// simd.Search has a bug which causes this to return index 11 when it should be returning index
    87  	// 9.
    88  	// }
    89  	// return int(simd.Search(n[keyOffset(0):keyOffset(N)], k))
    90  }
    91  
    92  func zeroOut(data []uint64) {
    93  	for i := 0; i < len(data); i++ {
    94  		data[i] = 0
    95  	}
    96  }
    97  
    98  // compacts the node i.e., remove all the kvs with value < lo. It returns the remaining number of
    99  // keys.
   100  func (n node) compact(lo uint64) int {
   101  	N := n.numKeys()
   102  	mk := n.maxKey()
   103  	var left, right int
   104  	for right = 0; right < N; right++ {
   105  		if n.val(right) < lo && n.key(right) < mk {
   106  			// Skip over this key. Don't copy it.
   107  			continue
   108  		}
   109  		// Valid data. Copy it from right to left. Advance left.
   110  		if left != right {
   111  			copy(n.data(left), n.data(right))
   112  		}
   113  		left++
   114  	}
   115  	// zero out rest of the kv pairs.
   116  	zeroOut(n[keyOffset(left):keyOffset(right)])
   117  	n.setNumKeys(left)
   118  
   119  	// If the only key we have is the max key, and its value is less than lo, then we can indicate
   120  	// to the caller by returning a zero that it's OK to drop the node.
   121  	if left == 1 && n.key(0) == mk && n.val(0) < lo {
   122  		return 0
   123  	}
   124  	return left
   125  }
   126  
   127  // getValue returns the value corresponding to the key if found.
   128  func (n node) getValue(k uint64) (uint64, bool) {
   129  	k &= mask // Ensure k has its lowest bits unset.
   130  	idx := n.search(k)
   131  	// key is not found
   132  	if idx >= n.numKeys() {
   133  		return 0, false
   134  	}
   135  	if ki := n.key(idx); ki == k {
   136  		return n.val(idx), true
   137  	}
   138  	return 0, false
   139  }
   140  
   141  // set returns true if it added a new key.
   142  func (n node) set(k, v uint64) bool {
   143  	N := n.numKeys()
   144  	idx := n.search(k)
   145  	if idx == N {
   146  		n.setNumKeys(N + 1)
   147  		n.setAt(keyOffset(idx), k)
   148  		n.setAt(valOffset(idx), v)
   149  		return true
   150  	}
   151  
   152  	ki := n.key(idx)
   153  	if N == n.maxKeys() {
   154  		// This happens during split of non-root node, when we are updating the child pointer of
   155  		// right node. Hence, the key should already exist.
   156  		assert(ki == k)
   157  	}
   158  	if ki == k {
   159  		n.setAt(valOffset(idx), v)
   160  		return false
   161  	}
   162  	assert(ki > k)
   163  	// Found the first entry which is greater than k. So, we need to fit k
   164  	// just before it. For that, we should move the rest of the data in the
   165  	// node to the right to make space for k.
   166  	n.moveRight(idx)
   167  	n.setNumKeys(N + 1)
   168  	n.setAt(keyOffset(idx), k)
   169  	n.setAt(valOffset(idx), v)
   170  	return true
   171  	// panic("shouldn't reach here")
   172  }
   173  
   174  func (n node) updateOffsets(beyond, by uint64, add bool) {
   175  	for i := 0; i < n.numKeys(); i++ {
   176  		if offset := n.val(i); offset > beyond {
   177  			if add {
   178  				n.setAt(valOffset(i), offset+by)
   179  			} else {
   180  				assert(offset >= by)
   181  				n.setAt(valOffset(i), offset-by)
   182  			}
   183  		}
   184  	}
   185  }
   186  
   187  func (n node) iterate(fn func(node, int)) {
   188  	for i := 0; i < n.maxKeys(); i++ {
   189  		if k := n.key(i); k > 0 {
   190  			fn(n, i)
   191  		} else {
   192  			break
   193  		}
   194  	}
   195  }
   196  
   197  func (n node) print(parentID uint64) {
   198  	var keys []string
   199  	n.iterate(func(n node, i int) {
   200  		keys = append(keys, fmt.Sprintf("%d", n.key(i)))
   201  	})
   202  	if len(keys) > 8 {
   203  		copy(keys[4:], keys[len(keys)-4:])
   204  		keys[3] = "..."
   205  		keys = keys[:8]
   206  	}
   207  	fmt.Printf("num keys: %d keys: %s\n", n.numKeys(), strings.Join(keys, " "))
   208  }