github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/go/callgraph/vta/internal/trie/bits.go (about)

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trie
     6  
     7  import (
     8  	"math/bits"
     9  )
    10  
    11  // This file contains bit twiddling functions for Patricia tries.
    12  // Consult this paper for details.
    13  //   C. Okasaki and A. Gill, “Fast mergeable integer maps,” in ACM SIGPLAN
    14  //   Workshop on ML, September 1998, pp. 77–86.
    15  
    16  // key is a key in a Map.
    17  type key uint64
    18  
    19  // bitpos is the position of a bit. A position is represented by having a 1
    20  // bit in that position.
    21  // Examples:
    22  //   - 0b0010 is the position of the `1` bit in 2.
    23  //     It is the 3rd most specific bit position in big endian encoding
    24  //     (0b0 and 0b1 are more specific).
    25  //   - 0b0100 is the position of the bit that 1 and 5 disagree on.
    26  //   - 0b0 is a special value indicating that all bit agree.
    27  type bitpos uint64
    28  
    29  // prefixes represent a set of keys that all agree with the
    30  // prefix up to a bitpos m.
    31  //
    32  // The value for a prefix is determined by the mask(k, m) function.
    33  // (See mask for details on the values.)
    34  // A `p` prefix for position `m` matches a key `k` iff mask(k, m) == p.
    35  // A prefix always mask(p, m) == p.
    36  //
    37  // A key is its own prefix for the bit position 64,
    38  // e.g. seeing a `prefix(key)` is not a problem.
    39  //
    40  // Prefixes should never be turned into keys.
    41  type prefix uint64
    42  
    43  // branchingBit returns the position of the first bit in `x` and `y`
    44  // that are not equal.
    45  func branchingBit(x, y prefix) bitpos {
    46  	p := x ^ y
    47  	if p == 0 {
    48  		return 0
    49  	}
    50  	return bitpos(1) << uint(bits.Len64(uint64(p))-1) // uint conversion needed for go1.12
    51  }
    52  
    53  // zeroBit returns true if k has a 0 bit at position `b`.
    54  func zeroBit(k prefix, b bitpos) bool {
    55  	return (uint64(k) & uint64(b)) == 0
    56  }
    57  
    58  // matchPrefix returns true if a prefix k matches a prefix p up to position `b`.
    59  func matchPrefix(k prefix, p prefix, b bitpos) bool {
    60  	return mask(k, b) == p
    61  }
    62  
    63  // mask returns a prefix of `k` with all bits after and including `b` zeroed out.
    64  //
    65  // In big endian encoding, this value is the [64-(m-1)] most significant bits of k
    66  // followed by a `0` bit at bitpos m, followed m-1 `1` bits.
    67  // Examples:
    68  //
    69  //	prefix(0b1011) for a bitpos 0b0100 represents the keys:
    70  //	  0b1000, 0b1001, 0b1010, 0b1011, 0b1100, 0b1101, 0b1110, 0b1111
    71  //
    72  // This mask function has the property that if matchPrefix(k, p, b), then
    73  // k <= p if and only if zeroBit(k, m). This induces binary search tree tries.
    74  // See Okasaki & Gill for more details about this choice of mask function.
    75  //
    76  // mask is idempotent for a given `b`, i.e. mask(mask(p, b), b) == mask(p,b).
    77  func mask(k prefix, b bitpos) prefix {
    78  	return prefix((uint64(k) | (uint64(b) - 1)) & (^uint64(b)))
    79  }
    80  
    81  // ord returns true if m comes before n in the bit ordering.
    82  func ord(m, n bitpos) bool {
    83  	return m > n // big endian encoding
    84  }
    85  
    86  // prefixesOverlap returns true if there is some key a prefix `p` for bitpos `m`
    87  // can hold that can also be held by a prefix `q` for some bitpos `n`.
    88  //
    89  // This is equivalent to:
    90  //
    91  //	m ==n && p == q,
    92  //	higher(m, n) && matchPrefix(q, p, m), or
    93  //	higher(n, m) && matchPrefix(p, q, n)
    94  func prefixesOverlap(p prefix, m bitpos, q prefix, n bitpos) bool {
    95  	fbb := n
    96  	if ord(m, n) {
    97  		fbb = m
    98  	}
    99  	return mask(p, fbb) == mask(q, fbb)
   100  	// Lemma:
   101  	//   mask(p, fbb) == mask(q, fbb)
   102  	// iff
   103  	//   m > n && matchPrefix(q, p, m) or  (note: big endian encoding)
   104  	//   m < n && matchPrefix(p, q, n) or  (note: big endian encoding)
   105  	//   m ==n && p == q
   106  	// Quick-n-dirty proof:
   107  	// p == mask(p0, m) for some p0 by precondition.
   108  	// q == mask(q0, n) for some q0 by precondition.
   109  	// So mask(p, m) == p and mask(q, n) == q as mask(*, n') is idempotent.
   110  	//
   111  	// [=> proof]
   112  	// Suppose mask(p, fbb) == mask(q, fbb).
   113  	// if m ==n, p == mask(p, m) == mask(p, fbb) == mask(q, fbb) == mask(q, n) == q
   114  	// if m > n, fbb = firstBranchBit(m, n) = m (big endian).
   115  	//   p == mask(p, m) == mask(p, fbb) == mask(q, fbb) == mask(q, m)
   116  	//   so mask(q, m) == p or matchPrefix(q, p, m)
   117  	// if m < n, is symmetric to the above.
   118  	//
   119  	// [<= proof]
   120  	// case m ==n && p == q. Then mask(p, fbb) == mask(q, fbb)
   121  	//
   122  	// case m > n && matchPrefix(q, p, m).
   123  	// fbb == firstBranchBit(m, n) == m (by m>n).
   124  	// mask(q, fbb) == mask(q, m) == p == mask(p, m) == mask(p, fbb)
   125  	//
   126  	// case m < n && matchPrefix(p, q, n) is symmetric.
   127  }