github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/testkeys/testkeys.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // Package testkeys provides facilities for generating and comparing 6 // human-readable test keys for use in tests and benchmarks. This package 7 // provides a single Comparer implementation that compares all keys generated 8 // by this package. 9 // 10 // Keys generated by this package may optionally have a 'suffix' encoding an 11 // MVCC timestamp. This suffix is of the form "@<integer>". Comparisons on the 12 // suffix are performed using integer value, not the byte representation. 13 package testkeys 14 15 import ( 16 "bytes" 17 "fmt" 18 "math" 19 "strconv" 20 "strings" 21 22 "github.com/zuoyebang/bitalostable/internal/base" 23 ) 24 25 const alpha = "abcdefghijklmnopqrstuvwxyz" 26 27 const suffixDelim = '@' 28 29 // MaxSuffixLen is the maximum length of a suffix generated by this package. 30 var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", math.MaxInt64)) 31 32 // Comparer is the comparer for test keys generated by this package. 33 var Comparer *base.Comparer = &base.Comparer{ 34 Compare: compare, 35 Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, 36 AbbreviatedKey: func(k []byte) uint64 { 37 return base.DefaultComparer.AbbreviatedKey(k[:split(k)]) 38 }, 39 FormatKey: base.DefaultFormatter, 40 Separator: func(dst, a, b []byte) []byte { 41 ai := split(a) 42 if ai == len(a) { 43 return append(dst, a...) 44 } 45 bi := split(b) 46 if bi == len(b) { 47 return append(dst, a...) 48 } 49 50 // If the keys are the same just return a. 51 if bytes.Equal(a[:ai], b[:bi]) { 52 return append(dst, a...) 53 } 54 n := len(dst) 55 dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi]) 56 // Did it pick a separator different than a[:ai] -- if not we can't do better than a. 57 buf := dst[n:] 58 if bytes.Equal(a[:ai], buf) { 59 return append(dst[:n], a...) 60 } 61 // The separator is > a[:ai], so we only need to add the sentinel. 62 return append(dst, 0) 63 }, 64 Successor: func(dst, a []byte) []byte { 65 ai := split(a) 66 if ai == len(a) { 67 return append(dst, a...) 68 } 69 n := len(dst) 70 dst = base.DefaultComparer.Successor(dst, a[:ai]) 71 // Did it pick a successor different than a[:ai] -- if not we can't do better than a. 72 buf := dst[n:] 73 if bytes.Equal(a[:ai], buf) { 74 return append(dst[:n], a...) 75 } 76 // The successor is > a[:ai], so we only need to add the sentinel. 77 return append(dst, 0) 78 }, 79 ImmediateSuccessor: func(dst, a []byte) []byte { 80 // TODO(jackson): Consider changing this Comparer to only support 81 // representable prefix keys containing characters a-z. 82 ai := split(a) 83 if ai != len(a) { 84 panic("bitalostable: ImmediateSuccessor invoked with a non-prefix key") 85 } 86 return append(append(dst, a...), 0x00) 87 }, 88 Split: split, 89 Name: "bitalostable.internal.testkeys", 90 } 91 92 func compare(a, b []byte) int { 93 ai, bi := split(a), split(b) 94 if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { 95 return v 96 } 97 98 if len(a[ai:]) == 0 { 99 if len(b[bi:]) == 0 { 100 return 0 101 } 102 return -1 103 } else if len(b[bi:]) == 0 { 104 return +1 105 } 106 return compareTimestamps(a[ai:], b[bi:]) 107 } 108 109 func split(a []byte) int { 110 i := bytes.LastIndexByte(a, suffixDelim) 111 if i >= 0 { 112 return i 113 } 114 return len(a) 115 } 116 117 func compareTimestamps(a, b []byte) int { 118 ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64) 119 if err != nil { 120 panic(fmt.Sprintf("invalid test mvcc timestamp %q", a)) 121 } 122 bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64) 123 if err != nil { 124 panic(fmt.Sprintf("invalid test mvcc timestamp %q", b)) 125 } 126 switch { 127 case ai < bi: 128 return +1 129 case ai > bi: 130 return -1 131 default: 132 return 0 133 } 134 } 135 136 // Keyspace describes a finite keyspace of unsuffixed test keys. 137 type Keyspace interface { 138 // Count returns the number of keys that exist within this keyspace. 139 Count() int 140 141 // MaxLen returns the maximum length, in bytes, of a key within this 142 // keyspace. This is only guaranteed to return an upper bound. 143 MaxLen() int 144 145 // Slice returns the sub-keyspace from index i, inclusive, to index j, 146 // exclusive. The receiver is unmodified. 147 Slice(i, j int) Keyspace 148 149 // EveryN returns a key space that includes 1 key for every N keys in the 150 // original keyspace. The receiver is unmodified. 151 EveryN(n int) Keyspace 152 153 key(buf []byte, i int) int 154 } 155 156 // Divvy divides the provided keyspace into N equal portions, containing 157 // disjoint keys evenly distributed across the keyspace. 158 func Divvy(ks Keyspace, n int) []Keyspace { 159 ret := make([]Keyspace, n) 160 for i := 0; i < n; i++ { 161 ret[i] = ks.Slice(i, ks.Count()).EveryN(n) 162 } 163 return ret 164 } 165 166 // Alpha constructs a keyspace consisting of all keys containing characters a-z, 167 // with at most `maxLength` characters. 168 func Alpha(maxLength int) Keyspace { 169 return alphabet{ 170 alphabet: []byte(alpha), 171 maxLength: maxLength, 172 increment: 1, 173 } 174 } 175 176 // KeyAt returns the i-th key within the keyspace with a suffix encoding the 177 // timestamp t. 178 func KeyAt(k Keyspace, i int, t int) []byte { 179 b := make([]byte, k.MaxLen()+MaxSuffixLen) 180 return b[:WriteKeyAt(b, k, i, t)] 181 } 182 183 // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a 184 // suffix encoding the timestamp t suffix. It returns the number of bytes 185 // written. 186 func WriteKeyAt(dst []byte, k Keyspace, i int, t int) int { 187 n := WriteKey(dst, k, i) 188 n += WriteSuffix(dst[n:], t) 189 return n 190 } 191 192 // Suffix returns the test keys suffix representation of timestamp t. 193 func Suffix(t int) []byte { 194 b := make([]byte, MaxSuffixLen) 195 return b[:WriteSuffix(b, t)] 196 } 197 198 // SuffixLen returns the exact length of the given suffix when encoded. 199 func SuffixLen(t int) int { 200 // Begin at 1 for the '@' delimiter, 1 for a single digit. 201 n := 2 202 t /= 10 203 for t > 0 { 204 t /= 10 205 n++ 206 } 207 return n 208 } 209 210 // ParseSuffix returns the integer representation of the encoded suffix. 211 func ParseSuffix(s []byte) (int, error) { 212 return strconv.Atoi(strings.TrimPrefix(string(s), string(suffixDelim))) 213 } 214 215 // WriteSuffix writes the test keys suffix representation of timestamp t to dst, 216 // returning the number of bytes written. 217 func WriteSuffix(dst []byte, t int) int { 218 dst[0] = suffixDelim 219 n := 1 220 n += len(strconv.AppendInt(dst[n:n], int64(t), 10)) 221 return n 222 } 223 224 // Key returns the i-th unsuffixed key within the keyspace. 225 func Key(k Keyspace, i int) []byte { 226 b := make([]byte, k.MaxLen()) 227 return b[:k.key(b, i)] 228 } 229 230 // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It 231 // returns the number of bytes written. 232 func WriteKey(dst []byte, k Keyspace, i int) int { 233 return k.key(dst, i) 234 } 235 236 type alphabet struct { 237 alphabet []byte 238 maxLength int 239 headSkip int 240 tailSkip int 241 increment int 242 } 243 244 func (a alphabet) Count() int { 245 // Calculate the total number of keys, ignoring the increment. 246 total := (keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip) 247 248 // The increment dictates that we take every N keys, where N = a.increment. 249 // Consider a total containing the 5 keys: 250 // a b c d e 251 // ^ ^ ^ 252 // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After 253 // dividing by the increment, there may be remainder. If there is, there's 254 // one additional key in the alphabet. 255 count := total / a.increment 256 if total%a.increment > 0 { 257 count++ 258 } 259 return count 260 } 261 262 func (a alphabet) MaxLen() int { 263 return a.maxLength 264 } 265 266 func (a alphabet) Slice(i, j int) Keyspace { 267 s := a 268 s.headSkip += i 269 s.tailSkip += a.Count() - j 270 return s 271 } 272 273 func (a alphabet) EveryN(n int) Keyspace { 274 s := a 275 s.increment *= n 276 return s 277 } 278 279 func keyCount(n, l int) int { 280 // The number of representable keys in the keyspace is a function of the 281 // length of the alphabet n and the max key length l. Consider how the 282 // number of representable keys grows as l increases: 283 // 284 // l = 1: n 285 // l = 2: n + n^2 286 // l = 3: n + n^2 + n^3 287 // ... 288 // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1) 289 return (n * (int(math.Pow(float64(n), float64(l))) - 1)) / (n - 1) 290 } 291 292 func (a alphabet) key(buf []byte, idx int) int { 293 // This function generates keys of length 1..maxKeyLength, pulling 294 // characters from the alphabet. The idx determines which key to generate, 295 // generating the i-th lexicographically next key. 296 // 297 // The index to use is advanced by `headSkip`, allowing a keyspace to encode 298 // a subregion of the keyspace. 299 // 300 // Eg, alphabet = `ab`, maxKeyLength = 3: 301 // 302 // aaa aab aba abb baa bab bba bbb 303 // aa ab ba bb 304 // a b 305 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 306 // 307 return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip, 308 keyCount(len(a.alphabet), a.maxLength)) 309 } 310 311 func generateAlphabetKey(buf, alphabet []byte, i, keyCount int) int { 312 if keyCount == 0 || i > keyCount || i < 0 { 313 return 0 314 } 315 316 // Of the keyCount keys in the generative keyspace, how many are there 317 // starting with a particular character? 318 keysPerCharacter := keyCount / len(alphabet) 319 320 // Find the character that the key at index i starts with and set it. 321 characterIdx := i / keysPerCharacter 322 buf[0] = alphabet[characterIdx] 323 324 // Consider characterIdx = 0, pointing to 'a'. 325 // 326 // aaa aab aba abb baa bab bba bbb 327 // aa ab ba bb 328 // a b 329 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 330 // \_________________________/ 331 // |keysPerCharacter| keys 332 // 333 // In our recursive call, we reduce the problem to: 334 // 335 // aaa aab aba abb 336 // aa ab 337 // 0 1 2 3 4 5 338 // \________________________/ 339 // |keysPerCharacter-1| keys 340 // 341 // In the subproblem, there are keysPerCharacter-1 keys (eliminating the 342 // just 'a' key, plus any keys beginning with any other character). 343 // 344 // The index i is also offset, reduced by the count of keys beginning with 345 // characters earlier in the alphabet (keysPerCharacter*characterIdx) and 346 // the key consisting of just the 'a' (-1). 347 i = i - keysPerCharacter*characterIdx - 1 348 return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1) 349 }