github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/testkeys/testkeys.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  // Package testkeys provides facilities for generating and comparing
     6  // human-readable test keys for use in tests and benchmarks. This package
     7  // provides a single Comparer implementation that compares all keys generated
     8  // by this package.
     9  //
    10  // Keys generated by this package may optionally have a 'suffix' encoding an
    11  // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the
    12  // suffix are performed using integer value, not the byte representation.
    13  package testkeys
    14  
    15  import (
    16  	"bytes"
    17  	"fmt"
    18  	"math"
    19  	"strconv"
    20  	"strings"
    21  
    22  	"github.com/cockroachdb/pebble/internal/base"
    23  	"golang.org/x/exp/constraints"
    24  	"golang.org/x/exp/rand"
    25  )
    26  
    27  const alpha = "abcdefghijklmnopqrstuvwxyz"
    28  
    29  const suffixDelim = '@'
    30  
    31  var inverseAlphabet = make(map[byte]int64, len(alpha))
    32  
    33  func init() {
    34  	for i := range alpha {
    35  		inverseAlphabet[alpha[i]] = int64(i)
    36  	}
    37  }
    38  
    39  // MaxSuffixLen is the maximum length of a suffix generated by this package.
    40  var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64)))
    41  
    42  // Comparer is the comparer for test keys generated by this package.
    43  var Comparer = &base.Comparer{
    44  	Compare: compare,
    45  	Equal:   func(a, b []byte) bool { return compare(a, b) == 0 },
    46  	AbbreviatedKey: func(k []byte) uint64 {
    47  		return base.DefaultComparer.AbbreviatedKey(k[:split(k)])
    48  	},
    49  	FormatKey: base.DefaultFormatter,
    50  	Separator: func(dst, a, b []byte) []byte {
    51  		ai := split(a)
    52  		if ai == len(a) {
    53  			return append(dst, a...)
    54  		}
    55  		bi := split(b)
    56  		if bi == len(b) {
    57  			return append(dst, a...)
    58  		}
    59  
    60  		// If the keys are the same just return a.
    61  		if bytes.Equal(a[:ai], b[:bi]) {
    62  			return append(dst, a...)
    63  		}
    64  		n := len(dst)
    65  		dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi])
    66  		// Did it pick a separator different than a[:ai] -- if not we can't do better than a.
    67  		buf := dst[n:]
    68  		if bytes.Equal(a[:ai], buf) {
    69  			return append(dst[:n], a...)
    70  		}
    71  		// The separator is > a[:ai], so return it
    72  		return dst
    73  	},
    74  	Successor: func(dst, a []byte) []byte {
    75  		ai := split(a)
    76  		if ai == len(a) {
    77  			return append(dst, a...)
    78  		}
    79  		n := len(dst)
    80  		dst = base.DefaultComparer.Successor(dst, a[:ai])
    81  		// Did it pick a successor different than a[:ai] -- if not we can't do better than a.
    82  		buf := dst[n:]
    83  		if bytes.Equal(a[:ai], buf) {
    84  			return append(dst[:n], a...)
    85  		}
    86  		// The successor is > a[:ai], so return it.
    87  		return dst
    88  	},
    89  	ImmediateSuccessor: func(dst, a []byte) []byte {
    90  		// TODO(jackson): Consider changing this Comparer to only support
    91  		// representable prefix keys containing characters a-z.
    92  		ai := split(a)
    93  		if ai != len(a) {
    94  			panic("pebble: ImmediateSuccessor invoked with a non-prefix key")
    95  		}
    96  		return append(append(dst, a...), 0x00)
    97  	},
    98  	Split: split,
    99  	Name:  "pebble.internal.testkeys",
   100  }
   101  
   102  func compare(a, b []byte) int {
   103  	ai, bi := split(a), split(b)
   104  	if v := bytes.Compare(a[:ai], b[:bi]); v != 0 {
   105  		return v
   106  	}
   107  
   108  	if len(a[ai:]) == 0 {
   109  		if len(b[bi:]) == 0 {
   110  			return 0
   111  		}
   112  		return -1
   113  	} else if len(b[bi:]) == 0 {
   114  		return +1
   115  	}
   116  	return compareTimestamps(a[ai:], b[bi:])
   117  }
   118  
   119  func split(a []byte) int {
   120  	i := bytes.LastIndexByte(a, suffixDelim)
   121  	if i >= 0 {
   122  		return i
   123  	}
   124  	return len(a)
   125  }
   126  
   127  func compareTimestamps(a, b []byte) int {
   128  	ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64)
   129  	if err != nil {
   130  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", a))
   131  	}
   132  	bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64)
   133  	if err != nil {
   134  		panic(fmt.Sprintf("invalid test mvcc timestamp %q", b))
   135  	}
   136  	switch {
   137  	case ai < bi:
   138  		return +1
   139  	case ai > bi:
   140  		return -1
   141  	default:
   142  		return 0
   143  	}
   144  }
   145  
   146  // Keyspace describes a finite keyspace of unsuffixed test keys.
   147  type Keyspace interface {
   148  	// Count returns the number of keys that exist within this keyspace.
   149  	Count() int64
   150  
   151  	// MaxLen returns the maximum length, in bytes, of a key within this
   152  	// keyspace. This is only guaranteed to return an upper bound.
   153  	MaxLen() int
   154  
   155  	// Slice returns the sub-keyspace from index i, inclusive, to index j,
   156  	// exclusive. The receiver is unmodified.
   157  	Slice(i, j int64) Keyspace
   158  
   159  	// EveryN returns a key space that includes 1 key for every N keys in the
   160  	// original keyspace. The receiver is unmodified.
   161  	EveryN(n int64) Keyspace
   162  
   163  	// key writes the i-th key to the buffer and returns the length.
   164  	key(buf []byte, i int64) int
   165  }
   166  
   167  // Divvy divides the provided keyspace into N equal portions, containing
   168  // disjoint keys evenly distributed across the keyspace.
   169  func Divvy(ks Keyspace, n int64) []Keyspace {
   170  	ret := make([]Keyspace, n)
   171  	for i := int64(0); i < n; i++ {
   172  		ret[i] = ks.Slice(i, ks.Count()).EveryN(n)
   173  	}
   174  	return ret
   175  }
   176  
   177  // Alpha constructs a keyspace consisting of all keys containing characters a-z,
   178  // with at most `maxLength` characters.
   179  func Alpha(maxLength int) Keyspace {
   180  	return alphabet{
   181  		alphabet:  []byte(alpha),
   182  		maxLength: maxLength,
   183  		increment: 1,
   184  	}
   185  }
   186  
   187  // KeyAt returns the i-th key within the keyspace with a suffix encoding the
   188  // timestamp t.
   189  func KeyAt(k Keyspace, i int64, t int64) []byte {
   190  	b := make([]byte, k.MaxLen()+MaxSuffixLen)
   191  	return b[:WriteKeyAt(b, k, i, t)]
   192  }
   193  
   194  // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a
   195  // suffix encoding the timestamp t suffix. It returns the number of bytes
   196  // written.
   197  func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int {
   198  	n := WriteKey(dst, k, i)
   199  	n += WriteSuffix(dst[n:], t)
   200  	return n
   201  }
   202  
   203  // Suffix returns the test keys suffix representation of timestamp t.
   204  func Suffix(t int64) []byte {
   205  	b := make([]byte, MaxSuffixLen)
   206  	return b[:WriteSuffix(b, t)]
   207  }
   208  
   209  // SuffixLen returns the exact length of the given suffix when encoded.
   210  func SuffixLen(t int64) int {
   211  	// Begin at 1 for the '@' delimiter, 1 for a single digit.
   212  	n := 2
   213  	t /= 10
   214  	for t > 0 {
   215  		t /= 10
   216  		n++
   217  	}
   218  	return n
   219  }
   220  
   221  // ParseSuffix returns the integer representation of the encoded suffix.
   222  func ParseSuffix(s []byte) (int64, error) {
   223  	return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64)
   224  }
   225  
   226  // WriteSuffix writes the test keys suffix representation of timestamp t to dst,
   227  // returning the number of bytes written.
   228  func WriteSuffix(dst []byte, t int64) int {
   229  	dst[0] = suffixDelim
   230  	n := 1
   231  	n += len(strconv.AppendInt(dst[n:n], t, 10))
   232  	return n
   233  }
   234  
   235  // Key returns the i-th unsuffixed key within the keyspace.
   236  func Key(k Keyspace, i int64) []byte {
   237  	b := make([]byte, k.MaxLen())
   238  	return b[:k.key(b, i)]
   239  }
   240  
   241  // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It
   242  // returns the number of bytes written.
   243  func WriteKey(dst []byte, k Keyspace, i int64) int {
   244  	return k.key(dst, i)
   245  }
   246  
   247  type alphabet struct {
   248  	alphabet  []byte
   249  	maxLength int
   250  	headSkip  int64
   251  	tailSkip  int64
   252  	increment int64
   253  }
   254  
   255  func (a alphabet) Count() int64 {
   256  	// Calculate the total number of keys, ignoring the increment.
   257  	total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip
   258  
   259  	// The increment dictates that we take every N keys, where N = a.increment.
   260  	// Consider a total containing the 5 keys:
   261  	//   a  b  c  d  e
   262  	//   ^     ^     ^
   263  	// If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After
   264  	// dividing by the increment, there may be remainder. If there is, there's
   265  	// one additional key in the alphabet.
   266  	count := total / a.increment
   267  	if total%a.increment > 0 {
   268  		count++
   269  	}
   270  	return count
   271  }
   272  
   273  func (a alphabet) MaxLen() int {
   274  	return a.maxLength
   275  }
   276  
   277  func (a alphabet) Slice(i, j int64) Keyspace {
   278  	s := a
   279  	s.headSkip += i
   280  	s.tailSkip += a.Count() - j
   281  	return s
   282  }
   283  
   284  func (a alphabet) EveryN(n int64) Keyspace {
   285  	s := a
   286  	s.increment *= n
   287  	return s
   288  }
   289  
   290  func keyCount(n, l int) int64 {
   291  	if n == 0 {
   292  		return 0
   293  	} else if n == 1 {
   294  		return int64(l)
   295  	}
   296  	// The number of representable keys in the keyspace is a function of the
   297  	// length of the alphabet n and the max key length l. Consider how the
   298  	// number of representable keys grows as l increases:
   299  	//
   300  	// l = 1: n
   301  	// l = 2: n + n^2
   302  	// l = 3: n + n^2 + n^3
   303  	// ...
   304  	// Σ i=(1...l) n^i = n*(n^l - 1)/(n-1)
   305  	return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1)
   306  }
   307  
   308  func (a alphabet) key(buf []byte, idx int64) int {
   309  	// This function generates keys of length 1..maxKeyLength, pulling
   310  	// characters from the alphabet. The idx determines which key to generate,
   311  	// generating the i-th lexicographically next key.
   312  	//
   313  	// The index to use is advanced by `headSkip`, allowing a keyspace to encode
   314  	// a subregion of the keyspace.
   315  	//
   316  	// Eg, alphabet = `ab`, maxKeyLength = 3:
   317  	//
   318  	//           aaa aab     aba abb         baa bab     bba bbb
   319  	//       aa          ab              ba          bb
   320  	//   a                           b
   321  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   322  	//
   323  	return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip,
   324  		keyCount(len(a.alphabet), a.maxLength))
   325  }
   326  
   327  func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int {
   328  	if keyCount == 0 || i > keyCount || i < 0 {
   329  		return 0
   330  	}
   331  
   332  	// Of the keyCount keys in the generative keyspace, how many are there
   333  	// starting with a particular character?
   334  	keysPerCharacter := keyCount / int64(len(alphabet))
   335  
   336  	// Find the character that the key at index i starts with and set it.
   337  	characterIdx := i / keysPerCharacter
   338  	buf[0] = alphabet[characterIdx]
   339  
   340  	// Consider characterIdx = 0, pointing to 'a'.
   341  	//
   342  	//           aaa aab     aba abb         baa bab     bba bbb
   343  	//       aa          ab              ba          bb
   344  	//   a                           b
   345  	//   0   1   2   3   4   5   6   7   8   9   10  11  12  13
   346  	//  \_________________________/
   347  	//    |keysPerCharacter| keys
   348  	//
   349  	// In our recursive call, we reduce the problem to:
   350  	//
   351  	//           aaa aab     aba abb
   352  	//       aa          ab
   353  	//       0   1   2   3   4   5
   354  	//     \________________________/
   355  	//    |keysPerCharacter-1| keys
   356  	//
   357  	// In the subproblem, there are keysPerCharacter-1 keys (eliminating the
   358  	// just 'a' key, plus any keys beginning with any other character).
   359  	//
   360  	// The index i is also offset, reduced by the count of keys beginning with
   361  	// characters earlier in the alphabet (keysPerCharacter*characterIdx) and
   362  	// the key consisting of just the 'a' (-1).
   363  	i = i - keysPerCharacter*characterIdx - 1
   364  	return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1)
   365  }
   366  
   367  // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey,
   368  // returning the index of a particular key, given the provided alphabet and max
   369  // length of a key.
   370  //
   371  // len(key) must be ≥ 1.
   372  func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 {
   373  	i, ok := alphabet[key[0]]
   374  	if !ok {
   375  		panic(fmt.Sprintf("unrecognized alphabet character %v", key[0]))
   376  	}
   377  	// How many keys exist that start with the preceding i characters? Each of
   378  	// the i characters themselves are a key, plus the count of all the keys
   379  	// with one less character for each.
   380  	ret := i + i*keyCount(len(alphabet), n-1)
   381  	if len(key) > 1 {
   382  		ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1)
   383  	}
   384  	return ret
   385  }
   386  
   387  func abs(a int64) int64 {
   388  	if a < 0 {
   389  		return -a
   390  	}
   391  	return a
   392  }
   393  
   394  // RandomSeparator returns a random alphabetic key k such that a < k < b,
   395  // pulling randomness from the provided random number generator. If dst is
   396  // provided and the generated key fits within dst's capacity, the returned slice
   397  // will use dst's memory.
   398  //
   399  // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key
   400  // will consist of the prefix P appended with the provided suffix. A zero suffix
   401  // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will
   402  // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b,
   403  // but the generated key will not use the provided suffix. Note that it's
   404  // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case
   405  // RandomSeparator returns nil.
   406  //
   407  // If RandomSeparator generates a new prefix, the generated prefix will have
   408  // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))).
   409  //
   410  // RandomSeparator panics if a or b fails to decode.
   411  func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte {
   412  	if Comparer.Compare(a, b) >= 0 {
   413  		return nil
   414  	}
   415  
   416  	// Determine both keys' logical prefixes and suffixes.
   417  	ai := Comparer.Split(a)
   418  	bi := Comparer.Split(b)
   419  	ap := a[:ai]
   420  	bp := b[:bi]
   421  	maxLength = max[int](maxLength, max[int](len(ap), len(bp)))
   422  	var as, bs int64
   423  	var err error
   424  	if ai != len(a) {
   425  		as, err = ParseSuffix(a[ai:])
   426  		if err != nil {
   427  			panic(fmt.Sprintf("failed to parse suffix of %q", a))
   428  		}
   429  	}
   430  	if bi != len(b) {
   431  		bs, err = ParseSuffix(b[bi:])
   432  		if err != nil {
   433  			panic(fmt.Sprintf("failed to parse suffix of %q", b))
   434  		}
   435  	}
   436  
   437  	apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength)
   438  	bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength)
   439  	diff := bpIdx - apIdx
   440  	generatedIdx := bpIdx
   441  	if diff > 0 {
   442  		var add int64 = diff + 1
   443  		var start int64 = apIdx
   444  		if as == 1 {
   445  			// There's no expressible key with prefix a greater than a@1. So,
   446  			// exclude ap.
   447  			start = apIdx + 1
   448  			add = diff
   449  		}
   450  		if bs == 0 {
   451  			// No key with prefix b can sort before b@0. We don't want to pick b.
   452  			add--
   453  		}
   454  		// We're allowing generated id to be in the range [start, start + add - 1].
   455  		if start > start+add-1 {
   456  			return nil
   457  		}
   458  		// If we can generate a key which is actually in the middle of apIdx
   459  		// and bpIdx use it so that we don't have to bother about timestamps.
   460  		generatedIdx = rng.Int63n(add) + start
   461  		for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx {
   462  			generatedIdx = rng.Int63n(add) + start
   463  		}
   464  	}
   465  
   466  	switch {
   467  	case generatedIdx == apIdx && generatedIdx == bpIdx:
   468  		if abs(bs-as) <= 1 {
   469  			// There's no expressible suffix between the two, and there's no
   470  			// possible separator key.
   471  			return nil
   472  		}
   473  		// The key b is >= key a, but has the same prefix, so b must have the
   474  		// smaller timestamp, unless a has timestamp of 0.
   475  		//
   476  		// NB: The zero suffix (suffix-less) sorts before all other suffixes, so
   477  		// any suffix we generate will be greater than it.
   478  		if as == 0 {
   479  			// bs > as
   480  			suffix = bs + rng.Int63n(10) + 1
   481  		} else {
   482  			// bs < as.
   483  			// Generate suffix in range [bs + 1, as - 1]
   484  			suffix = bs + 1 + rng.Int63n(as-bs-1)
   485  		}
   486  	case generatedIdx == apIdx:
   487  		// NB: The zero suffix (suffix-less) sorts before all other suffixes, so
   488  		// any suffix we generate will be greater than it.
   489  		if as == 0 && suffix == 0 {
   490  			suffix++
   491  		} else if as != 0 && suffix >= as {
   492  			suffix = rng.Int63n(as)
   493  		}
   494  	case generatedIdx == bpIdx:
   495  		if suffix <= bs {
   496  			suffix = bs + rng.Int63n(10) + 1
   497  		}
   498  	}
   499  	if sz := maxLength + SuffixLen(suffix); cap(dst) < sz {
   500  		dst = make([]byte, sz)
   501  	} else {
   502  		dst = dst[:cap(dst)]
   503  	}
   504  	var w int
   505  	if suffix == 0 {
   506  		w = WriteKey(dst, Alpha(maxLength), generatedIdx)
   507  	} else {
   508  		w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix)
   509  	}
   510  	return dst[:w]
   511  }
   512  
   513  func max[I constraints.Ordered](a, b I) I {
   514  	if b > a {
   515  		return b
   516  	}
   517  	return a
   518  }