github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/testkeys/testkeys.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // Package testkeys provides facilities for generating and comparing 6 // human-readable test keys for use in tests and benchmarks. This package 7 // provides a single Comparer implementation that compares all keys generated 8 // by this package. 9 // 10 // Keys generated by this package may optionally have a 'suffix' encoding an 11 // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the 12 // suffix are performed using integer value, not the byte representation. 13 package testkeys 14 15 import ( 16 "bytes" 17 "cmp" 18 "fmt" 19 "math" 20 "strconv" 21 "strings" 22 23 "github.com/cockroachdb/pebble/internal/base" 24 "golang.org/x/exp/rand" 25 ) 26 27 const alpha = "abcdefghijklmnopqrstuvwxyz" 28 29 const suffixDelim = '@' 30 31 var inverseAlphabet = make(map[byte]int64, len(alpha)) 32 33 func init() { 34 for i := range alpha { 35 inverseAlphabet[alpha[i]] = int64(i) 36 } 37 } 38 39 // MaxSuffixLen is the maximum length of a suffix generated by this package. 40 var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64))) 41 42 // Comparer is the comparer for test keys generated by this package. 43 var Comparer = &base.Comparer{ 44 Compare: compare, 45 Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, 46 AbbreviatedKey: func(k []byte) uint64 { 47 return base.DefaultComparer.AbbreviatedKey(k[:split(k)]) 48 }, 49 FormatKey: base.DefaultFormatter, 50 Separator: func(dst, a, b []byte) []byte { 51 ai := split(a) 52 if ai == len(a) { 53 return append(dst, a...) 54 } 55 bi := split(b) 56 if bi == len(b) { 57 return append(dst, a...) 58 } 59 60 // If the keys are the same just return a. 61 if bytes.Equal(a[:ai], b[:bi]) { 62 return append(dst, a...) 63 } 64 n := len(dst) 65 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi]) 66 // Did it pick a separator different than a[:ai] -- if not we can't do better than a. 67 buf := dst[n:] 68 if bytes.Equal(a[:ai], buf) { 69 return append(dst[:n], a...) 70 } 71 // The separator is > a[:ai], so return it 72 return dst 73 }, 74 Successor: func(dst, a []byte) []byte { 75 ai := split(a) 76 if ai == len(a) { 77 return append(dst, a...) 78 } 79 n := len(dst) 80 dst = base.DefaultComparer.Successor(dst, a[:ai]) 81 // Did it pick a successor different than a[:ai] -- if not we can't do better than a. 82 buf := dst[n:] 83 if bytes.Equal(a[:ai], buf) { 84 return append(dst[:n], a...) 85 } 86 // The successor is > a[:ai], so return it. 87 return dst 88 }, 89 ImmediateSuccessor: func(dst, a []byte) []byte { 90 // TODO(jackson): Consider changing this Comparer to only support 91 // representable prefix keys containing characters a-z. 92 ai := split(a) 93 if ai != len(a) { 94 panic("pebble: ImmediateSuccessor invoked with a non-prefix key") 95 } 96 return append(append(dst, a...), 0x00) 97 }, 98 Split: split, 99 Name: "pebble.internal.testkeys", 100 } 101 102 func compare(a, b []byte) int { 103 ai, bi := split(a), split(b) 104 if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { 105 return v 106 } 107 108 if len(a[ai:]) == 0 { 109 if len(b[bi:]) == 0 { 110 return 0 111 } 112 return -1 113 } else if len(b[bi:]) == 0 { 114 return +1 115 } 116 return compareTimestamps(a[ai:], b[bi:]) 117 } 118 119 func split(a []byte) int { 120 i := bytes.LastIndexByte(a, suffixDelim) 121 if i >= 0 { 122 return i 123 } 124 return len(a) 125 } 126 127 func compareTimestamps(a, b []byte) int { 128 ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64) 129 if err != nil { 130 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a)) 131 } 132 bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64) 133 if err != nil { 134 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b)) 135 } 136 return cmp.Compare(bi, ai) 137 } 138 139 // Keyspace describes a finite keyspace of unsuffixed test keys. 140 type Keyspace interface { 141 // Count returns the number of keys that exist within this keyspace. 142 Count() int64 143 144 // MaxLen returns the maximum length, in bytes, of a key within this 145 // keyspace. This is only guaranteed to return an upper bound. 146 MaxLen() int 147 148 // Slice returns the sub-keyspace from index i, inclusive, to index j, 149 // exclusive. The receiver is unmodified. 150 Slice(i, j int64) Keyspace 151 152 // EveryN returns a key space that includes 1 key for every N keys in the 153 // original keyspace. The receiver is unmodified. 154 EveryN(n int64) Keyspace 155 156 // key writes the i-th key to the buffer and returns the length. 157 key(buf []byte, i int64) int 158 } 159 160 // Divvy divides the provided keyspace into N equal portions, containing 161 // disjoint keys evenly distributed across the keyspace. 162 func Divvy(ks Keyspace, n int64) []Keyspace { 163 ret := make([]Keyspace, n) 164 for i := int64(0); i < n; i++ { 165 ret[i] = ks.Slice(i, ks.Count()).EveryN(n) 166 } 167 return ret 168 } 169 170 // Alpha constructs a keyspace consisting of all keys containing characters a-z, 171 // with at most `maxLength` characters. 172 func Alpha(maxLength int) Keyspace { 173 return alphabet{ 174 alphabet: []byte(alpha), 175 maxLength: maxLength, 176 increment: 1, 177 } 178 } 179 180 // KeyAt returns the i-th key within the keyspace with a suffix encoding the 181 // timestamp t. 182 func KeyAt(k Keyspace, i int64, t int64) []byte { 183 b := make([]byte, k.MaxLen()+MaxSuffixLen) 184 return b[:WriteKeyAt(b, k, i, t)] 185 } 186 187 // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a 188 // suffix encoding the timestamp t suffix. It returns the number of bytes 189 // written. 190 func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int { 191 n := WriteKey(dst, k, i) 192 n += WriteSuffix(dst[n:], t) 193 return n 194 } 195 196 // Suffix returns the test keys suffix representation of timestamp t. 197 func Suffix(t int64) []byte { 198 b := make([]byte, MaxSuffixLen) 199 return b[:WriteSuffix(b, t)] 200 } 201 202 // SuffixLen returns the exact length of the given suffix when encoded. 203 func SuffixLen(t int64) int { 204 // Begin at 1 for the '@' delimiter, 1 for a single digit. 205 n := 2 206 t /= 10 207 for t > 0 { 208 t /= 10 209 n++ 210 } 211 return n 212 } 213 214 // ParseSuffix returns the integer representation of the encoded suffix. 215 func ParseSuffix(s []byte) (int64, error) { 216 return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64) 217 } 218 219 // WriteSuffix writes the test keys suffix representation of timestamp t to dst, 220 // returning the number of bytes written. 221 func WriteSuffix(dst []byte, t int64) int { 222 dst[0] = suffixDelim 223 n := 1 224 n += len(strconv.AppendInt(dst[n:n], t, 10)) 225 return n 226 } 227 228 // Key returns the i-th unsuffixed key within the keyspace. 229 func Key(k Keyspace, i int64) []byte { 230 b := make([]byte, k.MaxLen()) 231 return b[:k.key(b, i)] 232 } 233 234 // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It 235 // returns the number of bytes written. 236 func WriteKey(dst []byte, k Keyspace, i int64) int { 237 return k.key(dst, i) 238 } 239 240 type alphabet struct { 241 alphabet []byte 242 maxLength int 243 headSkip int64 244 tailSkip int64 245 increment int64 246 } 247 248 func (a alphabet) Count() int64 { 249 // Calculate the total number of keys, ignoring the increment. 250 total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip 251 252 // The increment dictates that we take every N keys, where N = a.increment. 253 // Consider a total containing the 5 keys: 254 // a b c d e 255 // ^ ^ ^ 256 // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After 257 // dividing by the increment, there may be remainder. If there is, there's 258 // one additional key in the alphabet. 259 count := total / a.increment 260 if total%a.increment > 0 { 261 count++ 262 } 263 return count 264 } 265 266 func (a alphabet) MaxLen() int { 267 return a.maxLength 268 } 269 270 func (a alphabet) Slice(i, j int64) Keyspace { 271 s := a 272 s.headSkip += i 273 s.tailSkip += a.Count() - j 274 return s 275 } 276 277 func (a alphabet) EveryN(n int64) Keyspace { 278 s := a 279 s.increment *= n 280 return s 281 } 282 283 func keyCount(n, l int) int64 { 284 if n == 0 { 285 return 0 286 } else if n == 1 { 287 return int64(l) 288 } 289 // The number of representable keys in the keyspace is a function of the 290 // length of the alphabet n and the max key length l. Consider how the 291 // number of representable keys grows as l increases: 292 // 293 // l = 1: n 294 // l = 2: n + n^2 295 // l = 3: n + n^2 + n^3 296 // ... 297 // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1) 298 return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1) 299 } 300 301 func (a alphabet) key(buf []byte, idx int64) int { 302 // This function generates keys of length 1..maxKeyLength, pulling 303 // characters from the alphabet. The idx determines which key to generate, 304 // generating the i-th lexicographically next key. 305 // 306 // The index to use is advanced by `headSkip`, allowing a keyspace to encode 307 // a subregion of the keyspace. 308 // 309 // Eg, alphabet = `ab`, maxKeyLength = 3: 310 // 311 // aaa aab aba abb baa bab bba bbb 312 // aa ab ba bb 313 // a b 314 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 315 // 316 return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip, 317 keyCount(len(a.alphabet), a.maxLength)) 318 } 319 320 func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int { 321 if keyCount == 0 || i > keyCount || i < 0 { 322 return 0 323 } 324 325 // Of the keyCount keys in the generative keyspace, how many are there 326 // starting with a particular character? 327 keysPerCharacter := keyCount / int64(len(alphabet)) 328 329 // Find the character that the key at index i starts with and set it. 330 characterIdx := i / keysPerCharacter 331 buf[0] = alphabet[characterIdx] 332 333 // Consider characterIdx = 0, pointing to 'a'. 334 // 335 // aaa aab aba abb baa bab bba bbb 336 // aa ab ba bb 337 // a b 338 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 339 // \_________________________/ 340 // |keysPerCharacter| keys 341 // 342 // In our recursive call, we reduce the problem to: 343 // 344 // aaa aab aba abb 345 // aa ab 346 // 0 1 2 3 4 5 347 // \________________________/ 348 // |keysPerCharacter-1| keys 349 // 350 // In the subproblem, there are keysPerCharacter-1 keys (eliminating the 351 // just 'a' key, plus any keys beginning with any other character). 352 // 353 // The index i is also offset, reduced by the count of keys beginning with 354 // characters earlier in the alphabet (keysPerCharacter*characterIdx) and 355 // the key consisting of just the 'a' (-1). 356 i = i - keysPerCharacter*characterIdx - 1 357 return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1) 358 } 359 360 // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey, 361 // returning the index of a particular key, given the provided alphabet and max 362 // length of a key. 363 // 364 // len(key) must be ≥ 1. 365 func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 { 366 i, ok := alphabet[key[0]] 367 if !ok { 368 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0])) 369 } 370 // How many keys exist that start with the preceding i characters? Each of 371 // the i characters themselves are a key, plus the count of all the keys 372 // with one less character for each. 373 ret := i + i*keyCount(len(alphabet), n-1) 374 if len(key) > 1 { 375 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1) 376 } 377 return ret 378 } 379 380 func abs(a int64) int64 { 381 if a < 0 { 382 return -a 383 } 384 return a 385 } 386 387 // RandomSeparator returns a random alphabetic key k such that a < k < b, 388 // pulling randomness from the provided random number generator. If dst is 389 // provided and the generated key fits within dst's capacity, the returned slice 390 // will use dst's memory. 391 // 392 // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key 393 // will consist of the prefix P appended with the provided suffix. A zero suffix 394 // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will 395 // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b, 396 // but the generated key will not use the provided suffix. Note that it's 397 // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case 398 // RandomSeparator returns nil. 399 // 400 // If RandomSeparator generates a new prefix, the generated prefix will have 401 // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))). 402 // 403 // RandomSeparator panics if a or b fails to decode. 404 func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte { 405 if Comparer.Compare(a, b) >= 0 { 406 return nil 407 } 408 409 // Determine both keys' logical prefixes and suffixes. 410 ai := Comparer.Split(a) 411 bi := Comparer.Split(b) 412 ap := a[:ai] 413 bp := b[:bi] 414 maxLength = max(maxLength, len(ap), len(bp)) 415 var as, bs int64 416 var err error 417 if ai != len(a) { 418 as, err = ParseSuffix(a[ai:]) 419 if err != nil { 420 panic(fmt.Sprintf("failed to parse suffix of %q", a)) 421 } 422 } 423 if bi != len(b) { 424 bs, err = ParseSuffix(b[bi:]) 425 if err != nil { 426 panic(fmt.Sprintf("failed to parse suffix of %q", b)) 427 } 428 } 429 430 apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength) 431 bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength) 432 diff := bpIdx - apIdx 433 generatedIdx := bpIdx 434 if diff > 0 { 435 var add int64 = diff + 1 436 var start int64 = apIdx 437 if as == 1 { 438 // There's no expressible key with prefix a greater than a@1. So, 439 // exclude ap. 440 start = apIdx + 1 441 add = diff 442 } 443 if bs == 0 { 444 // No key with prefix b can sort before b@0. We don't want to pick b. 445 add-- 446 } 447 // We're allowing generated id to be in the range [start, start + add - 1]. 448 if start > start+add-1 { 449 return nil 450 } 451 // If we can generate a key which is actually in the middle of apIdx 452 // and bpIdx use it so that we don't have to bother about timestamps. 453 generatedIdx = rng.Int63n(add) + start 454 for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx { 455 generatedIdx = rng.Int63n(add) + start 456 } 457 } 458 459 switch { 460 case generatedIdx == apIdx && generatedIdx == bpIdx: 461 if abs(bs-as) <= 1 { 462 // There's no expressible suffix between the two, and there's no 463 // possible separator key. 464 return nil 465 } 466 // The key b is >= key a, but has the same prefix, so b must have the 467 // smaller timestamp, unless a has timestamp of 0. 468 // 469 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so 470 // any suffix we generate will be greater than it. 471 if as == 0 { 472 // bs > as 473 suffix = bs + rng.Int63n(10) + 1 474 } else { 475 // bs < as. 476 // Generate suffix in range [bs + 1, as - 1] 477 suffix = bs + 1 + rng.Int63n(as-bs-1) 478 } 479 case generatedIdx == apIdx: 480 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so 481 // any suffix we generate will be greater than it. 482 if as == 0 && suffix == 0 { 483 suffix++ 484 } else if as != 0 && suffix >= as { 485 suffix = rng.Int63n(as) 486 } 487 case generatedIdx == bpIdx: 488 if suffix <= bs { 489 suffix = bs + rng.Int63n(10) + 1 490 } 491 } 492 if sz := maxLength + SuffixLen(suffix); cap(dst) < sz { 493 dst = make([]byte, sz) 494 } else { 495 dst = dst[:cap(dst)] 496 } 497 var w int 498 if suffix == 0 { 499 w = WriteKey(dst, Alpha(maxLength), generatedIdx) 500 } else { 501 w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix) 502 } 503 return dst[:w] 504 }