github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/testkeys/testkeys.go

github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/testkeys/testkeys.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package testkeys provides facilities for generating and comparing
     6  // human-readable test keys for use in tests and benchmarks. This package
     7  // provides a single Comparer implementation that compares all keys generated
     8  // by this package.
     9  //
    10  // Keys generated by this package may optionally have a 'suffix' encoding an
    11  // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
    12  // suffix are performed using integer value, not the byte representation.
    13  package testkeys
    14  
    15  import (
    16  	"bytes"
    17  	"fmt"
    18  	"math"
    19  	"strconv"
    20  	"strings"
    21  
    22  	"github.com/zuoyebang/bitalostable/internal/base"
    23  )
    24  
    25  const alpha = "abcdefghijklmnopqrstuvwxyz"
    26  
    27  const suffixDelim = '@'
    28  
    29  // MaxSuffixLen is the maximum length of a suffix generated by this package.
    30  var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", math.MaxInt64))
    31  
    32  // Comparer is the comparer for test keys generated by this package.
    33  var Comparer *base.Comparer = &base.Comparer{
    34  	Compare: compare,
    35  	Equal:   func(a, b []byte) bool { return compare(a, b) == 0 },
    36  	AbbreviatedKey: func(k []byte) uint64 {
    37  		return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
    38  	},
    39  	FormatKey: base.DefaultFormatter,
    40  	Separator: func(dst, a, b []byte) []byte {
    41  		ai := split(a)
    42  		if ai == len(a) {
    43  			return append(dst, a...)
    44  		}
    45  		bi := split(b)
    46  		if bi == len(b) {
    47  			return append(dst, a...)
    48  		}
    49  
    50  		// If the keys are the same just return a.
    51  		if bytes.Equal(a[:ai], b[:bi]) {
    52  			return append(dst, a...)
    53  		}
    54  		n := len(dst)
    55  		dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
    56  		// Did it pick a separator different than a[:ai] -- if not we can't do better than a.
    57  		buf := dst[n:]
    58  		if bytes.Equal(a[:ai], buf) {
    59  			return append(dst[:n], a...)
    60  		}
    61  		// The separator is > a[:ai], so we only need to add the sentinel.
    62  		return append(dst, 0)
    63  	},
    64  	Successor: func(dst, a []byte) []byte {
    65  		ai := split(a)
    66  		if ai == len(a) {
    67  			return append(dst, a...)
    68  		}
    69  		n := len(dst)
    70  		dst = base.DefaultComparer.Successor(dst, a[:ai])
    71  		// Did it pick a successor different than a[:ai] -- if not we can't do better than a.
    72  		buf := dst[n:]
    73  		if bytes.Equal(a[:ai], buf) {
    74  			return append(dst[:n], a...)
    75  		}
    76  		// The successor is > a[:ai], so we only need to add the sentinel.
    77  		return append(dst, 0)
    78  	},
    79  	ImmediateSuccessor: func(dst, a []byte) []byte {
    80  		// TODO(jackson): Consider changing this Comparer to only support
    81  		// representable prefix keys containing characters a-z.
    82  		ai := split(a)
    83  		if ai != len(a) {
    84  			panic("bitalostable: ImmediateSuccessor invoked with a non-prefix key")
    85  		}
    86  		return append(append(dst, a...), 0x00)
    87  	},
    88  	Split: split,
    89  	Name:  "bitalostable.internal.testkeys",
    90  }
    91  
    92  func compare(a, b []byte) int {
    93  	ai, bi := split(a), split(b)
    94  	if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
    95  		return v
    96  	}
    97  
    98  	if len(a[ai:]) == 0 {
    99  		if len(b[bi:]) == 0 {
   100  			return 0
   101  		}
   102  		return -1
   103  	} else if len(b[bi:]) == 0 {
   104  		return +1
   105  	}
   106  	return compareTimestamps(a[ai:], b[bi:])
   107  }
   108  
   109  func split(a []byte) int {
   110  	i := bytes.LastIndexByte(a, suffixDelim)
   111  	if i >= 0 {
   112  		return i
   113  	}
   114  	return len(a)
   115  }
   116  
   117  func compareTimestamps(a, b []byte) int {
   118  	ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
   119  	if err != nil {
   120  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
   121  	}
   122  	bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
   123  	if err != nil {
   124  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
   125  	}
   126  	switch {
   127  	case ai < bi:
   128  		return +1
   129  	case ai > bi:
   130  		return -1
   131  	default:
   132  		return 0
   133  	}
   134  }
   135  
   136  // Keyspace describes a finite keyspace of unsuffixed test keys.
   137  type Keyspace interface {
   138  	// Count returns the number of keys that exist within this keyspace.
   139  	Count() int
   140  
   141  	// MaxLen returns the maximum length, in bytes, of a key within this
   142  	// keyspace. This is only guaranteed to return an upper bound.
   143  	MaxLen() int
   144  
   145  	// Slice returns the sub-keyspace from index i, inclusive, to index j,
   146  	// exclusive. The receiver is unmodified.
   147  	Slice(i, j int) Keyspace
   148  
   149  	// EveryN returns a key space that includes 1 key for every N keys in the
   150  	// original keyspace. The receiver is unmodified.
   151  	EveryN(n int) Keyspace
   152  
   153  	key(buf []byte, i int) int
   154  }
   155  
   156  // Divvy divides the provided keyspace into N equal portions, containing
   157  // disjoint keys evenly distributed across the keyspace.
   158  func Divvy(ks Keyspace, n int) []Keyspace {
   159  	ret := make([]Keyspace, n)
   160  	for i := 0; i < n; i++ {
   161  		ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
   162  	}
   163  	return ret
   164  }
   165  
   166  // Alpha constructs a keyspace consisting of all keys containing characters a-z,
   167  // with at most `maxLength` characters.
   168  func Alpha(maxLength int) Keyspace {
   169  	return alphabet{
   170  		alphabet:  []byte(alpha),
   171  		maxLength: maxLength,
   172  		increment: 1,
   173  	}
   174  }
   175  
   176  // KeyAt returns the i-th key within the keyspace with a suffix encoding the
   177  // timestamp t.
   178  func KeyAt(k Keyspace, i int, t int) []byte {
   179  	b := make([]byte, k.MaxLen()+MaxSuffixLen)
   180  	return b[:WriteKeyAt(b, k, i, t)]
   181  }
   182  
   183  // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
   184  // suffix encoding the timestamp t suffix. It returns the number of bytes
   185  // written.
   186  func WriteKeyAt(dst []byte, k Keyspace, i int, t int) int {
   187  	n := WriteKey(dst, k, i)
   188  	n += WriteSuffix(dst[n:], t)
   189  	return n
   190  }
   191  
   192  // Suffix returns the test keys suffix representation of timestamp t.
   193  func Suffix(t int) []byte {
   194  	b := make([]byte, MaxSuffixLen)
   195  	return b[:WriteSuffix(b, t)]
   196  }
   197  
   198  // SuffixLen returns the exact length of the given suffix when encoded.
   199  func SuffixLen(t int) int {
   200  	// Begin at 1 for the '@' delimiter, 1 for a single digit.
   201  	n := 2
   202  	t /= 10
   203  	for t > 0 {
   204  		t /= 10
   205  		n++
   206  	}
   207  	return n
   208  }
   209  
   210  // ParseSuffix returns the integer representation of the encoded suffix.
   211  func ParseSuffix(s []byte) (int, error) {
   212  	return strconv.Atoi(strings.TrimPrefix(string(s), string(suffixDelim)))
   213  }
   214  
   215  // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
   216  // returning the number of bytes written.
   217  func WriteSuffix(dst []byte, t int) int {
   218  	dst[0] = suffixDelim
   219  	n := 1
   220  	n += len(strconv.AppendInt(dst[n:n], int64(t), 10))
   221  	return n
   222  }
   223  
   224  // Key returns the i-th unsuffixed key within the keyspace.
   225  func Key(k Keyspace, i int) []byte {
   226  	b := make([]byte, k.MaxLen())
   227  	return b[:k.key(b, i)]
   228  }
   229  
   230  // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
   231  // returns the number of bytes written.
   232  func WriteKey(dst []byte, k Keyspace, i int) int {
   233  	return k.key(dst, i)
   234  }
   235  
   236  type alphabet struct {
   237  	alphabet  []byte
   238  	maxLength int
   239  	headSkip  int
   240  	tailSkip  int
   241  	increment int
   242  }
   243  
   244  func (a alphabet) Count() int {
   245  	// Calculate the total number of keys, ignoring the increment.
   246  	total := (keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip)
   247  
   248  	// The increment dictates that we take every N keys, where N = a.increment.
   249  	// Consider a total containing the 5 keys:
   250  	//   a  b  c  d  e
   251  	//   ^     ^     ^
   252  	// If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
   253  	// dividing by the increment, there may be remainder. If there is, there's
   254  	// one additional key in the alphabet.
   255  	count := total / a.increment
   256  	if total%a.increment > 0 {
   257  		count++
   258  	}
   259  	return count
   260  }
   261  
   262  func (a alphabet) MaxLen() int {
   263  	return a.maxLength
   264  }
   265  
   266  func (a alphabet) Slice(i, j int) Keyspace {
   267  	s := a
   268  	s.headSkip += i
   269  	s.tailSkip += a.Count() - j
   270  	return s
   271  }
   272  
   273  func (a alphabet) EveryN(n int) Keyspace {
   274  	s := a
   275  	s.increment *= n
   276  	return s
   277  }
   278  
   279  func keyCount(n, l int) int {
   280  	// The number of representable keys in the keyspace is a function of the
   281  	// length of the alphabet n and the max key length l. Consider how the
   282  	// number of representable keys grows as l increases:
   283  	//
   284  	// l = 1: n
   285  	// l = 2: n + n^2
   286  	// l = 3: n + n^2 + n^3
   287  	// ...
   288  	// Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
   289  	return (n * (int(math.Pow(float64(n), float64(l))) - 1)) / (n - 1)
   290  }
   291  
   292  func (a alphabet) key(buf []byte, idx int) int {
   293  	// This function generates keys of length 1..maxKeyLength, pulling
   294  	// characters from the alphabet. The idx determines which key to generate,
   295  	// generating the i-th lexicographically next key.
   296  	//
   297  	// The index to use is advanced by `headSkip`, allowing a keyspace to encode
   298  	// a subregion of the keyspace.
   299  	//
   300  	// Eg, alphabet = `ab`, maxKeyLength = 3:
   301  	//
   302  	//           aaa aab     aba abb         baa bab     bba bbb
   303  	//       aa          ab              ba          bb
   304  	//   a                           b
   305  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   306  	//
   307  	return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
   308  		keyCount(len(a.alphabet), a.maxLength))
   309  }
   310  
   311  func generateAlphabetKey(buf, alphabet []byte, i, keyCount int) int {
   312  	if keyCount == 0 || i > keyCount || i < 0 {
   313  		return 0
   314  	}
   315  
   316  	// Of the keyCount keys in the generative keyspace, how many are there
   317  	// starting with a particular character?
   318  	keysPerCharacter := keyCount / len(alphabet)
   319  
   320  	// Find the character that the key at index i starts with and set it.
   321  	characterIdx := i / keysPerCharacter
   322  	buf[0] = alphabet[characterIdx]
   323  
   324  	// Consider characterIdx = 0, pointing to 'a'.
   325  	//
   326  	//           aaa aab     aba abb         baa bab     bba bbb
   327  	//       aa          ab              ba          bb
   328  	//   a                           b
   329  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   330  	//  \_________________________/
   331  	//    |keysPerCharacter| keys
   332  	//
   333  	// In our recursive call, we reduce the problem to:
   334  	//
   335  	//           aaa aab     aba abb
   336  	//       aa          ab
   337  	//       0   1   2   3   4   5
   338  	//     \________________________/
   339  	//    |keysPerCharacter-1| keys
   340  	//
   341  	// In the subproblem, there are keysPerCharacter-1 keys (eliminating the
   342  	// just 'a' key, plus any keys beginning with any other character).
   343  	//
   344  	// The index i is also offset, reduced by the count of keys beginning with
   345  	// characters earlier in the alphabet (keysPerCharacter*characterIdx) and
   346  	// the key consisting of just the 'a' (-1).
   347  	i = i - keysPerCharacter*characterIdx - 1
   348  	return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
   349  }