github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/testkeys/testkeys.go

github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/testkeys/testkeys.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package testkeys provides facilities for generating and comparing
     6  // human-readable test keys for use in tests and benchmarks. This package
     7  // provides a single Comparer implementation that compares all keys generated
     8  // by this package.
     9  //
    10  // Keys generated by this package may optionally have a 'suffix' encoding an
    11  // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
    12  // suffix are performed using integer value, not the byte representation.
    13  package testkeys
    14  
    15  import (
    16  	"bytes"
    17  	"cmp"
    18  	"fmt"
    19  	"math"
    20  	"strconv"
    21  	"strings"
    22  
    23  	"github.com/cockroachdb/pebble/internal/base"
    24  	"golang.org/x/exp/rand"
    25  )
    26  
    27  const alpha = "abcdefghijklmnopqrstuvwxyz"
    28  
    29  const suffixDelim = '@'
    30  
    31  var inverseAlphabet = make(map[byte]int64, len(alpha))
    32  
    33  func init() {
    34  	for i := range alpha {
    35  		inverseAlphabet[alpha[i]] = int64(i)
    36  	}
    37  }
    38  
    39  // MaxSuffixLen is the maximum length of a suffix generated by this package.
    40  var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
    41  
    42  // Comparer is the comparer for test keys generated by this package.
    43  var Comparer = &base.Comparer{
    44  	Compare: compare,
    45  	Equal:   func(a, b []byte) bool { return compare(a, b) == 0 },
    46  	AbbreviatedKey: func(k []byte) uint64 {
    47  		return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
    48  	},
    49  	FormatKey: base.DefaultFormatter,
    50  	Separator: func(dst, a, b []byte) []byte {
    51  		ai := split(a)
    52  		if ai == len(a) {
    53  			return append(dst, a...)
    54  		}
    55  		bi := split(b)
    56  		if bi == len(b) {
    57  			return append(dst, a...)
    58  		}
    59  
    60  		// If the keys are the same just return a.
    61  		if bytes.Equal(a[:ai], b[:bi]) {
    62  			return append(dst, a...)
    63  		}
    64  		n := len(dst)
    65  		dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
    66  		// Did it pick a separator different than a[:ai] -- if not we can't do better than a.
    67  		buf := dst[n:]
    68  		if bytes.Equal(a[:ai], buf) {
    69  			return append(dst[:n], a...)
    70  		}
    71  		// The separator is > a[:ai], so return it
    72  		return dst
    73  	},
    74  	Successor: func(dst, a []byte) []byte {
    75  		ai := split(a)
    76  		if ai == len(a) {
    77  			return append(dst, a...)
    78  		}
    79  		n := len(dst)
    80  		dst = base.DefaultComparer.Successor(dst, a[:ai])
    81  		// Did it pick a successor different than a[:ai] -- if not we can't do better than a.
    82  		buf := dst[n:]
    83  		if bytes.Equal(a[:ai], buf) {
    84  			return append(dst[:n], a...)
    85  		}
    86  		// The successor is > a[:ai], so return it.
    87  		return dst
    88  	},
    89  	ImmediateSuccessor: func(dst, a []byte) []byte {
    90  		// TODO(jackson): Consider changing this Comparer to only support
    91  		// representable prefix keys containing characters a-z.
    92  		ai := split(a)
    93  		if ai != len(a) {
    94  			panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
    95  		}
    96  		return append(append(dst, a...), 0x00)
    97  	},
    98  	Split: split,
    99  	Name:  "pebble.internal.testkeys",
   100  }
   101  
   102  func compare(a, b []byte) int {
   103  	ai, bi := split(a), split(b)
   104  	if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
   105  		return v
   106  	}
   107  
   108  	if len(a[ai:]) == 0 {
   109  		if len(b[bi:]) == 0 {
   110  			return 0
   111  		}
   112  		return -1
   113  	} else if len(b[bi:]) == 0 {
   114  		return +1
   115  	}
   116  	return compareTimestamps(a[ai:], b[bi:])
   117  }
   118  
   119  func split(a []byte) int {
   120  	i := bytes.LastIndexByte(a, suffixDelim)
   121  	if i >= 0 {
   122  		return i
   123  	}
   124  	return len(a)
   125  }
   126  
   127  func compareTimestamps(a, b []byte) int {
   128  	ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
   129  	if err != nil {
   130  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
   131  	}
   132  	bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
   133  	if err != nil {
   134  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
   135  	}
   136  	return cmp.Compare(bi, ai)
   137  }
   138  
   139  // Keyspace describes a finite keyspace of unsuffixed test keys.
   140  type Keyspace interface {
   141  	// Count returns the number of keys that exist within this keyspace.
   142  	Count() int64
   143  
   144  	// MaxLen returns the maximum length, in bytes, of a key within this
   145  	// keyspace. This is only guaranteed to return an upper bound.
   146  	MaxLen() int
   147  
   148  	// Slice returns the sub-keyspace from index i, inclusive, to index j,
   149  	// exclusive. The receiver is unmodified.
   150  	Slice(i, j int64) Keyspace
   151  
   152  	// EveryN returns a key space that includes 1 key for every N keys in the
   153  	// original keyspace. The receiver is unmodified.
   154  	EveryN(n int64) Keyspace
   155  
   156  	// key writes the i-th key to the buffer and returns the length.
   157  	key(buf []byte, i int64) int
   158  }
   159  
   160  // Divvy divides the provided keyspace into N equal portions, containing
   161  // disjoint keys evenly distributed across the keyspace.
   162  func Divvy(ks Keyspace, n int64) []Keyspace {
   163  	ret := make([]Keyspace, n)
   164  	for i := int64(0); i < n; i++ {
   165  		ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
   166  	}
   167  	return ret
   168  }
   169  
   170  // Alpha constructs a keyspace consisting of all keys containing characters a-z,
   171  // with at most `maxLength` characters.
   172  func Alpha(maxLength int) Keyspace {
   173  	return alphabet{
   174  		alphabet:  []byte(alpha),
   175  		maxLength: maxLength,
   176  		increment: 1,
   177  	}
   178  }
   179  
   180  // KeyAt returns the i-th key within the keyspace with a suffix encoding the
   181  // timestamp t.
   182  func KeyAt(k Keyspace, i int64, t int64) []byte {
   183  	b := make([]byte, k.MaxLen()+MaxSuffixLen)
   184  	return b[:WriteKeyAt(b, k, i, t)]
   185  }
   186  
   187  // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
   188  // suffix encoding the timestamp t suffix. It returns the number of bytes
   189  // written.
   190  func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
   191  	n := WriteKey(dst, k, i)
   192  	n += WriteSuffix(dst[n:], t)
   193  	return n
   194  }
   195  
   196  // Suffix returns the test keys suffix representation of timestamp t.
   197  func Suffix(t int64) []byte {
   198  	b := make([]byte, MaxSuffixLen)
   199  	return b[:WriteSuffix(b, t)]
   200  }
   201  
   202  // SuffixLen returns the exact length of the given suffix when encoded.
   203  func SuffixLen(t int64) int {
   204  	// Begin at 1 for the '@' delimiter, 1 for a single digit.
   205  	n := 2
   206  	t /= 10
   207  	for t > 0 {
   208  		t /= 10
   209  		n++
   210  	}
   211  	return n
   212  }
   213  
   214  // ParseSuffix returns the integer representation of the encoded suffix.
   215  func ParseSuffix(s []byte) (int64, error) {
   216  	return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
   217  }
   218  
   219  // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
   220  // returning the number of bytes written.
   221  func WriteSuffix(dst []byte, t int64) int {
   222  	dst[0] = suffixDelim
   223  	n := 1
   224  	n += len(strconv.AppendInt(dst[n:n], t, 10))
   225  	return n
   226  }
   227  
   228  // Key returns the i-th unsuffixed key within the keyspace.
   229  func Key(k Keyspace, i int64) []byte {
   230  	b := make([]byte, k.MaxLen())
   231  	return b[:k.key(b, i)]
   232  }
   233  
   234  // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
   235  // returns the number of bytes written.
   236  func WriteKey(dst []byte, k Keyspace, i int64) int {
   237  	return k.key(dst, i)
   238  }
   239  
   240  type alphabet struct {
   241  	alphabet  []byte
   242  	maxLength int
   243  	headSkip  int64
   244  	tailSkip  int64
   245  	increment int64
   246  }
   247  
   248  func (a alphabet) Count() int64 {
   249  	// Calculate the total number of keys, ignoring the increment.
   250  	total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
   251  
   252  	// The increment dictates that we take every N keys, where N = a.increment.
   253  	// Consider a total containing the 5 keys:
   254  	//   a  b  c  d  e
   255  	//   ^     ^     ^
   256  	// If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
   257  	// dividing by the increment, there may be remainder. If there is, there's
   258  	// one additional key in the alphabet.
   259  	count := total / a.increment
   260  	if total%a.increment > 0 {
   261  		count++
   262  	}
   263  	return count
   264  }
   265  
   266  func (a alphabet) MaxLen() int {
   267  	return a.maxLength
   268  }
   269  
   270  func (a alphabet) Slice(i, j int64) Keyspace {
   271  	s := a
   272  	s.headSkip += i
   273  	s.tailSkip += a.Count() - j
   274  	return s
   275  }
   276  
   277  func (a alphabet) EveryN(n int64) Keyspace {
   278  	s := a
   279  	s.increment *= n
   280  	return s
   281  }
   282  
   283  func keyCount(n, l int) int64 {
   284  	if n == 0 {
   285  		return 0
   286  	} else if n == 1 {
   287  		return int64(l)
   288  	}
   289  	// The number of representable keys in the keyspace is a function of the
   290  	// length of the alphabet n and the max key length l. Consider how the
   291  	// number of representable keys grows as l increases:
   292  	//
   293  	// l = 1: n
   294  	// l = 2: n + n^2
   295  	// l = 3: n + n^2 + n^3
   296  	// ...
   297  	// Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
   298  	return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1)
   299  }
   300  
   301  func (a alphabet) key(buf []byte, idx int64) int {
   302  	// This function generates keys of length 1..maxKeyLength, pulling
   303  	// characters from the alphabet. The idx determines which key to generate,
   304  	// generating the i-th lexicographically next key.
   305  	//
   306  	// The index to use is advanced by `headSkip`, allowing a keyspace to encode
   307  	// a subregion of the keyspace.
   308  	//
   309  	// Eg, alphabet = `ab`, maxKeyLength = 3:
   310  	//
   311  	//           aaa aab     aba abb         baa bab     bba bbb
   312  	//       aa          ab              ba          bb
   313  	//   a                           b
   314  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   315  	//
   316  	return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
   317  		keyCount(len(a.alphabet), a.maxLength))
   318  }
   319  
   320  func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
   321  	if keyCount == 0 || i > keyCount || i < 0 {
   322  		return 0
   323  	}
   324  
   325  	// Of the keyCount keys in the generative keyspace, how many are there
   326  	// starting with a particular character?
   327  	keysPerCharacter := keyCount / int64(len(alphabet))
   328  
   329  	// Find the character that the key at index i starts with and set it.
   330  	characterIdx := i / keysPerCharacter
   331  	buf[0] = alphabet[characterIdx]
   332  
   333  	// Consider characterIdx = 0, pointing to 'a'.
   334  	//
   335  	//           aaa aab     aba abb         baa bab     bba bbb
   336  	//       aa          ab              ba          bb
   337  	//   a                           b
   338  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   339  	//  \_________________________/
   340  	//    |keysPerCharacter| keys
   341  	//
   342  	// In our recursive call, we reduce the problem to:
   343  	//
   344  	//           aaa aab     aba abb
   345  	//       aa          ab
   346  	//       0   1   2   3   4   5
   347  	//     \________________________/
   348  	//    |keysPerCharacter-1| keys
   349  	//
   350  	// In the subproblem, there are keysPerCharacter-1 keys (eliminating the
   351  	// just 'a' key, plus any keys beginning with any other character).
   352  	//
   353  	// The index i is also offset, reduced by the count of keys beginning with
   354  	// characters earlier in the alphabet (keysPerCharacter*characterIdx) and
   355  	// the key consisting of just the 'a' (-1).
   356  	i = i - keysPerCharacter*characterIdx - 1
   357  	return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
   358  }
   359  
   360  // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
   361  // returning the index of a particular key, given the provided alphabet and max
   362  // length of a key.
   363  //
   364  // len(key) must be ≥ 1.
   365  func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
   366  	i, ok := alphabet[key[0]]
   367  	if !ok {
   368  		panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
   369  	}
   370  	// How many keys exist that start with the preceding i characters? Each of
   371  	// the i characters themselves are a key, plus the count of all the keys
   372  	// with one less character for each.
   373  	ret := i + i*keyCount(len(alphabet), n-1)
   374  	if len(key) > 1 {
   375  		ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
   376  	}
   377  	return ret
   378  }
   379  
   380  func abs(a int64) int64 {
   381  	if a < 0 {
   382  		return -a
   383  	}
   384  	return a
   385  }
   386  
   387  // RandomSeparator returns a random alphabetic key k such that a < k < b,
   388  // pulling randomness from the provided random number generator. If dst is
   389  // provided and the generated key fits within dst's capacity, the returned slice
   390  // will use dst's memory.
   391  //
   392  // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key
   393  // will consist of the prefix P appended with the provided suffix. A zero suffix
   394  // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will
   395  // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b,
   396  // but the generated key will not use the provided suffix. Note that it's
   397  // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case
   398  // RandomSeparator returns nil.
   399  //
   400  // If RandomSeparator generates a new prefix, the generated prefix will have
   401  // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))).
   402  //
   403  // RandomSeparator panics if a or b fails to decode.
   404  func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte {
   405  	if Comparer.Compare(a, b) >= 0 {
   406  		return nil
   407  	}
   408  
   409  	// Determine both keys' logical prefixes and suffixes.
   410  	ai := Comparer.Split(a)
   411  	bi := Comparer.Split(b)
   412  	ap := a[:ai]
   413  	bp := b[:bi]
   414  	maxLength = max(maxLength, len(ap), len(bp))
   415  	var as, bs int64
   416  	var err error
   417  	if ai != len(a) {
   418  		as, err = ParseSuffix(a[ai:])
   419  		if err != nil {
   420  			panic(fmt.Sprintf("failed to parse suffix of %q", a))
   421  		}
   422  	}
   423  	if bi != len(b) {
   424  		bs, err = ParseSuffix(b[bi:])
   425  		if err != nil {
   426  			panic(fmt.Sprintf("failed to parse suffix of %q", b))
   427  		}
   428  	}
   429  
   430  	apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength)
   431  	bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength)
   432  	diff := bpIdx - apIdx
   433  	generatedIdx := bpIdx
   434  	if diff > 0 {
   435  		var add int64 = diff + 1
   436  		var start int64 = apIdx
   437  		if as == 1 {
   438  			// There's no expressible key with prefix a greater than a@1. So,
   439  			// exclude ap.
   440  			start = apIdx + 1
   441  			add = diff
   442  		}
   443  		if bs == 0 {
   444  			// No key with prefix b can sort before b@0. We don't want to pick b.
   445  			add--
   446  		}
   447  		// We're allowing generated id to be in the range [start, start + add - 1].
   448  		if start > start+add-1 {
   449  			return nil
   450  		}
   451  		// If we can generate a key which is actually in the middle of apIdx
   452  		// and bpIdx use it so that we don't have to bother about timestamps.
   453  		generatedIdx = rng.Int63n(add) + start
   454  		for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx {
   455  			generatedIdx = rng.Int63n(add) + start
   456  		}
   457  	}
   458  
   459  	switch {
   460  	case generatedIdx == apIdx && generatedIdx == bpIdx:
   461  		if abs(bs-as) <= 1 {
   462  			// There's no expressible suffix between the two, and there's no
   463  			// possible separator key.
   464  			return nil
   465  		}
   466  		// The key b is >= key a, but has the same prefix, so b must have the
   467  		// smaller timestamp, unless a has timestamp of 0.
   468  		//
   469  		// NB: The zero suffix (suffix-less) sorts before all other suffixes, so
   470  		// any suffix we generate will be greater than it.
   471  		if as == 0 {
   472  			// bs > as
   473  			suffix = bs + rng.Int63n(10) + 1
   474  		} else {
   475  			// bs < as.
   476  			// Generate suffix in range [bs + 1, as - 1]
   477  			suffix = bs + 1 + rng.Int63n(as-bs-1)
   478  		}
   479  	case generatedIdx == apIdx:
   480  		// NB: The zero suffix (suffix-less) sorts before all other suffixes, so
   481  		// any suffix we generate will be greater than it.
   482  		if as == 0 && suffix == 0 {
   483  			suffix++
   484  		} else if as != 0 && suffix >= as {
   485  			suffix = rng.Int63n(as)
   486  		}
   487  	case generatedIdx == bpIdx:
   488  		if suffix <= bs {
   489  			suffix = bs + rng.Int63n(10) + 1
   490  		}
   491  	}
   492  	if sz := maxLength + SuffixLen(suffix); cap(dst) < sz {
   493  		dst = make([]byte, sz)
   494  	} else {
   495  		dst = dst[:cap(dst)]
   496  	}
   497  	var w int
   498  	if suffix == 0 {
   499  		w = WriteKey(dst, Alpha(maxLength), generatedIdx)
   500  	} else {
   501  		w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix)
   502  	}
   503  	return dst[:w]
   504  }