github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/testkeys/testkeys.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // Package testkeys provides facilities for generating and comparing 6 // human-readable test keys for use in tests and benchmarks. This package 7 // provides a single Comparer implementation that compares all keys generated 8 // by this package. 9 // 10 // Keys generated by this package may optionally have a 'suffix' encoding an 11 // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the 12 // suffix are performed using integer value, not the byte representation. 13 package testkeys 14 15 import ( 16 "bytes" 17 "fmt" 18 "math" 19 "strconv" 20 "strings" 21 22 "github.com/cockroachdb/pebble/internal/base" 23 "golang.org/x/exp/constraints" 24 "golang.org/x/exp/rand" 25 ) 26 27 const alpha = "abcdefghijklmnopqrstuvwxyz" 28 29 const suffixDelim = '@' 30 31 var inverseAlphabet = make(map[byte]int64, len(alpha)) 32 33 func init() { 34 for i := range alpha { 35 inverseAlphabet[alpha[i]] = int64(i) 36 } 37 } 38 39 // MaxSuffixLen is the maximum length of a suffix generated by this package. 40 var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64))) 41 42 // Comparer is the comparer for test keys generated by this package. 43 var Comparer = &base.Comparer{ 44 Compare: compare, 45 Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, 46 AbbreviatedKey: func(k []byte) uint64 { 47 return base.DefaultComparer.AbbreviatedKey(k[:split(k)]) 48 }, 49 FormatKey: base.DefaultFormatter, 50 Separator: func(dst, a, b []byte) []byte { 51 ai := split(a) 52 if ai == len(a) { 53 return append(dst, a...) 54 } 55 bi := split(b) 56 if bi == len(b) { 57 return append(dst, a...) 58 } 59 60 // If the keys are the same just return a. 61 if bytes.Equal(a[:ai], b[:bi]) { 62 return append(dst, a...) 63 } 64 n := len(dst) 65 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi]) 66 // Did it pick a separator different than a[:ai] -- if not we can't do better than a. 67 buf := dst[n:] 68 if bytes.Equal(a[:ai], buf) { 69 return append(dst[:n], a...) 70 } 71 // The separator is > a[:ai], so return it 72 return dst 73 }, 74 Successor: func(dst, a []byte) []byte { 75 ai := split(a) 76 if ai == len(a) { 77 return append(dst, a...) 78 } 79 n := len(dst) 80 dst = base.DefaultComparer.Successor(dst, a[:ai]) 81 // Did it pick a successor different than a[:ai] -- if not we can't do better than a. 82 buf := dst[n:] 83 if bytes.Equal(a[:ai], buf) { 84 return append(dst[:n], a...) 85 } 86 // The successor is > a[:ai], so return it. 87 return dst 88 }, 89 ImmediateSuccessor: func(dst, a []byte) []byte { 90 // TODO(jackson): Consider changing this Comparer to only support 91 // representable prefix keys containing characters a-z. 92 ai := split(a) 93 if ai != len(a) { 94 panic("pebble: ImmediateSuccessor invoked with a non-prefix key") 95 } 96 return append(append(dst, a...), 0x00) 97 }, 98 Split: split, 99 Name: "pebble.internal.testkeys", 100 } 101 102 func compare(a, b []byte) int { 103 ai, bi := split(a), split(b) 104 if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { 105 return v 106 } 107 108 if len(a[ai:]) == 0 { 109 if len(b[bi:]) == 0 { 110 return 0 111 } 112 return -1 113 } else if len(b[bi:]) == 0 { 114 return +1 115 } 116 return compareTimestamps(a[ai:], b[bi:]) 117 } 118 119 func split(a []byte) int { 120 i := bytes.LastIndexByte(a, suffixDelim) 121 if i >= 0 { 122 return i 123 } 124 return len(a) 125 } 126 127 func compareTimestamps(a, b []byte) int { 128 ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64) 129 if err != nil { 130 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a)) 131 } 132 bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64) 133 if err != nil { 134 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b)) 135 } 136 switch { 137 case ai < bi: 138 return +1 139 case ai > bi: 140 return -1 141 default: 142 return 0 143 } 144 } 145 146 // Keyspace describes a finite keyspace of unsuffixed test keys. 147 type Keyspace interface { 148 // Count returns the number of keys that exist within this keyspace. 149 Count() int64 150 151 // MaxLen returns the maximum length, in bytes, of a key within this 152 // keyspace. This is only guaranteed to return an upper bound. 153 MaxLen() int 154 155 // Slice returns the sub-keyspace from index i, inclusive, to index j, 156 // exclusive. The receiver is unmodified. 157 Slice(i, j int64) Keyspace 158 159 // EveryN returns a key space that includes 1 key for every N keys in the 160 // original keyspace. The receiver is unmodified. 161 EveryN(n int64) Keyspace 162 163 // key writes the i-th key to the buffer and returns the length. 164 key(buf []byte, i int64) int 165 } 166 167 // Divvy divides the provided keyspace into N equal portions, containing 168 // disjoint keys evenly distributed across the keyspace. 169 func Divvy(ks Keyspace, n int64) []Keyspace { 170 ret := make([]Keyspace, n) 171 for i := int64(0); i < n; i++ { 172 ret[i] = ks.Slice(i, ks.Count()).EveryN(n) 173 } 174 return ret 175 } 176 177 // Alpha constructs a keyspace consisting of all keys containing characters a-z, 178 // with at most `maxLength` characters. 179 func Alpha(maxLength int) Keyspace { 180 return alphabet{ 181 alphabet: []byte(alpha), 182 maxLength: maxLength, 183 increment: 1, 184 } 185 } 186 187 // KeyAt returns the i-th key within the keyspace with a suffix encoding the 188 // timestamp t. 189 func KeyAt(k Keyspace, i int64, t int64) []byte { 190 b := make([]byte, k.MaxLen()+MaxSuffixLen) 191 return b[:WriteKeyAt(b, k, i, t)] 192 } 193 194 // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a 195 // suffix encoding the timestamp t suffix. It returns the number of bytes 196 // written. 197 func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int { 198 n := WriteKey(dst, k, i) 199 n += WriteSuffix(dst[n:], t) 200 return n 201 } 202 203 // Suffix returns the test keys suffix representation of timestamp t. 204 func Suffix(t int64) []byte { 205 b := make([]byte, MaxSuffixLen) 206 return b[:WriteSuffix(b, t)] 207 } 208 209 // SuffixLen returns the exact length of the given suffix when encoded. 210 func SuffixLen(t int64) int { 211 // Begin at 1 for the '@' delimiter, 1 for a single digit. 212 n := 2 213 t /= 10 214 for t > 0 { 215 t /= 10 216 n++ 217 } 218 return n 219 } 220 221 // ParseSuffix returns the integer representation of the encoded suffix. 222 func ParseSuffix(s []byte) (int64, error) { 223 return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64) 224 } 225 226 // WriteSuffix writes the test keys suffix representation of timestamp t to dst, 227 // returning the number of bytes written. 228 func WriteSuffix(dst []byte, t int64) int { 229 dst[0] = suffixDelim 230 n := 1 231 n += len(strconv.AppendInt(dst[n:n], t, 10)) 232 return n 233 } 234 235 // Key returns the i-th unsuffixed key within the keyspace. 236 func Key(k Keyspace, i int64) []byte { 237 b := make([]byte, k.MaxLen()) 238 return b[:k.key(b, i)] 239 } 240 241 // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It 242 // returns the number of bytes written. 243 func WriteKey(dst []byte, k Keyspace, i int64) int { 244 return k.key(dst, i) 245 } 246 247 type alphabet struct { 248 alphabet []byte 249 maxLength int 250 headSkip int64 251 tailSkip int64 252 increment int64 253 } 254 255 func (a alphabet) Count() int64 { 256 // Calculate the total number of keys, ignoring the increment. 257 total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip 258 259 // The increment dictates that we take every N keys, where N = a.increment. 260 // Consider a total containing the 5 keys: 261 // a b c d e 262 // ^ ^ ^ 263 // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After 264 // dividing by the increment, there may be remainder. If there is, there's 265 // one additional key in the alphabet. 266 count := total / a.increment 267 if total%a.increment > 0 { 268 count++ 269 } 270 return count 271 } 272 273 func (a alphabet) MaxLen() int { 274 return a.maxLength 275 } 276 277 func (a alphabet) Slice(i, j int64) Keyspace { 278 s := a 279 s.headSkip += i 280 s.tailSkip += a.Count() - j 281 return s 282 } 283 284 func (a alphabet) EveryN(n int64) Keyspace { 285 s := a 286 s.increment *= n 287 return s 288 } 289 290 func keyCount(n, l int) int64 { 291 if n == 0 { 292 return 0 293 } else if n == 1 { 294 return int64(l) 295 } 296 // The number of representable keys in the keyspace is a function of the 297 // length of the alphabet n and the max key length l. Consider how the 298 // number of representable keys grows as l increases: 299 // 300 // l = 1: n 301 // l = 2: n + n^2 302 // l = 3: n + n^2 + n^3 303 // ... 304 // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1) 305 return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1) 306 } 307 308 func (a alphabet) key(buf []byte, idx int64) int { 309 // This function generates keys of length 1..maxKeyLength, pulling 310 // characters from the alphabet. The idx determines which key to generate, 311 // generating the i-th lexicographically next key. 312 // 313 // The index to use is advanced by `headSkip`, allowing a keyspace to encode 314 // a subregion of the keyspace. 315 // 316 // Eg, alphabet = `ab`, maxKeyLength = 3: 317 // 318 // aaa aab aba abb baa bab bba bbb 319 // aa ab ba bb 320 // a b 321 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 322 // 323 return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip, 324 keyCount(len(a.alphabet), a.maxLength)) 325 } 326 327 func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int { 328 if keyCount == 0 || i > keyCount || i < 0 { 329 return 0 330 } 331 332 // Of the keyCount keys in the generative keyspace, how many are there 333 // starting with a particular character? 334 keysPerCharacter := keyCount / int64(len(alphabet)) 335 336 // Find the character that the key at index i starts with and set it. 337 characterIdx := i / keysPerCharacter 338 buf[0] = alphabet[characterIdx] 339 340 // Consider characterIdx = 0, pointing to 'a'. 341 // 342 // aaa aab aba abb baa bab bba bbb 343 // aa ab ba bb 344 // a b 345 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 346 // \_________________________/ 347 // |keysPerCharacter| keys 348 // 349 // In our recursive call, we reduce the problem to: 350 // 351 // aaa aab aba abb 352 // aa ab 353 // 0 1 2 3 4 5 354 // \________________________/ 355 // |keysPerCharacter-1| keys 356 // 357 // In the subproblem, there are keysPerCharacter-1 keys (eliminating the 358 // just 'a' key, plus any keys beginning with any other character). 359 // 360 // The index i is also offset, reduced by the count of keys beginning with 361 // characters earlier in the alphabet (keysPerCharacter*characterIdx) and 362 // the key consisting of just the 'a' (-1). 363 i = i - keysPerCharacter*characterIdx - 1 364 return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1) 365 } 366 367 // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey, 368 // returning the index of a particular key, given the provided alphabet and max 369 // length of a key. 370 // 371 // len(key) must be ≥ 1. 372 func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 { 373 i, ok := alphabet[key[0]] 374 if !ok { 375 panic(fmt.Sprintf("unrecognized alphabet character %v", key[0])) 376 } 377 // How many keys exist that start with the preceding i characters? Each of 378 // the i characters themselves are a key, plus the count of all the keys 379 // with one less character for each. 380 ret := i + i*keyCount(len(alphabet), n-1) 381 if len(key) > 1 { 382 ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1) 383 } 384 return ret 385 } 386 387 func abs(a int64) int64 { 388 if a < 0 { 389 return -a 390 } 391 return a 392 } 393 394 // RandomSeparator returns a random alphabetic key k such that a < k < b, 395 // pulling randomness from the provided random number generator. If dst is 396 // provided and the generated key fits within dst's capacity, the returned slice 397 // will use dst's memory. 398 // 399 // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key 400 // will consist of the prefix P appended with the provided suffix. A zero suffix 401 // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will 402 // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b, 403 // but the generated key will not use the provided suffix. Note that it's 404 // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case 405 // RandomSeparator returns nil. 406 // 407 // If RandomSeparator generates a new prefix, the generated prefix will have 408 // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))). 409 // 410 // RandomSeparator panics if a or b fails to decode. 411 func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte { 412 if Comparer.Compare(a, b) >= 0 { 413 return nil 414 } 415 416 // Determine both keys' logical prefixes and suffixes. 417 ai := Comparer.Split(a) 418 bi := Comparer.Split(b) 419 ap := a[:ai] 420 bp := b[:bi] 421 maxLength = max[int](maxLength, max[int](len(ap), len(bp))) 422 var as, bs int64 423 var err error 424 if ai != len(a) { 425 as, err = ParseSuffix(a[ai:]) 426 if err != nil { 427 panic(fmt.Sprintf("failed to parse suffix of %q", a)) 428 } 429 } 430 if bi != len(b) { 431 bs, err = ParseSuffix(b[bi:]) 432 if err != nil { 433 panic(fmt.Sprintf("failed to parse suffix of %q", b)) 434 } 435 } 436 437 apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength) 438 bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength) 439 diff := bpIdx - apIdx 440 generatedIdx := bpIdx 441 if diff > 0 { 442 var add int64 = diff + 1 443 var start int64 = apIdx 444 if as == 1 { 445 // There's no expressible key with prefix a greater than a@1. So, 446 // exclude ap. 447 start = apIdx + 1 448 add = diff 449 } 450 if bs == 0 { 451 // No key with prefix b can sort before b@0. We don't want to pick b. 452 add-- 453 } 454 // We're allowing generated id to be in the range [start, start + add - 1]. 455 if start > start+add-1 { 456 return nil 457 } 458 // If we can generate a key which is actually in the middle of apIdx 459 // and bpIdx use it so that we don't have to bother about timestamps. 460 generatedIdx = rng.Int63n(add) + start 461 for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx { 462 generatedIdx = rng.Int63n(add) + start 463 } 464 } 465 466 switch { 467 case generatedIdx == apIdx && generatedIdx == bpIdx: 468 if abs(bs-as) <= 1 { 469 // There's no expressible suffix between the two, and there's no 470 // possible separator key. 471 return nil 472 } 473 // The key b is >= key a, but has the same prefix, so b must have the 474 // smaller timestamp, unless a has timestamp of 0. 475 // 476 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so 477 // any suffix we generate will be greater than it. 478 if as == 0 { 479 // bs > as 480 suffix = bs + rng.Int63n(10) + 1 481 } else { 482 // bs < as. 483 // Generate suffix in range [bs + 1, as - 1] 484 suffix = bs + 1 + rng.Int63n(as-bs-1) 485 } 486 case generatedIdx == apIdx: 487 // NB: The zero suffix (suffix-less) sorts before all other suffixes, so 488 // any suffix we generate will be greater than it. 489 if as == 0 && suffix == 0 { 490 suffix++ 491 } else if as != 0 && suffix >= as { 492 suffix = rng.Int63n(as) 493 } 494 case generatedIdx == bpIdx: 495 if suffix <= bs { 496 suffix = bs + rng.Int63n(10) + 1 497 } 498 } 499 if sz := maxLength + SuffixLen(suffix); cap(dst) < sz { 500 dst = make([]byte, sz) 501 } else { 502 dst = dst[:cap(dst)] 503 } 504 var w int 505 if suffix == 0 { 506 w = WriteKey(dst, Alpha(maxLength), generatedIdx) 507 } else { 508 w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix) 509 } 510 return dst[:w] 511 } 512 513 func max[I constraints.Ordered](a, b I) I { 514 if b > a { 515 return b 516 } 517 return a 518 }