github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/block_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "testing" 13 "time" 14 "unsafe" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/cockroachdb/pebble/internal/itertest" 19 "github.com/stretchr/testify/require" 20 "golang.org/x/exp/rand" 21 ) 22 23 func ikey(s string) InternalKey { 24 return InternalKey{UserKey: []byte(s)} 25 } 26 27 func TestBlockWriter(t *testing.T) { 28 w := &rawBlockWriter{ 29 blockWriter: blockWriter{restartInterval: 16}, 30 } 31 w.add(ikey("apple"), nil) 32 w.add(ikey("apricot"), nil) 33 w.add(ikey("banana"), nil) 34 block := w.finish() 35 36 expected := []byte( 37 "\x00\x05\x00apple" + 38 "\x02\x05\x00ricot" + 39 "\x00\x06\x00banana" + 40 "\x00\x00\x00\x00\x01\x00\x00\x00") 41 if !bytes.Equal(expected, block) { 42 t.Fatalf("expected\n%q\nfound\n%q", expected, block) 43 } 44 } 45 46 func TestBlockWriterWithPrefix(t *testing.T) { 47 w := &rawBlockWriter{ 48 blockWriter: blockWriter{restartInterval: 2}, 49 } 50 curKey := func() string { 51 return string(base.DecodeInternalKey(w.curKey).UserKey) 52 } 53 addAdapter := func( 54 key InternalKey, 55 value []byte, 56 addValuePrefix bool, 57 valuePrefix valuePrefix, 58 setHasSameKeyPrefix bool) { 59 w.addWithOptionalValuePrefix( 60 key, false, value, len(key.UserKey), addValuePrefix, valuePrefix, setHasSameKeyPrefix) 61 } 62 addAdapter( 63 ikey("apple"), []byte("red"), false, 0, true) 64 require.Equal(t, "apple", curKey()) 65 require.Equal(t, "red", string(w.curValue)) 66 addAdapter( 67 ikey("apricot"), []byte("orange"), true, '\xff', false) 68 require.Equal(t, "apricot", curKey()) 69 require.Equal(t, "orange", string(w.curValue)) 70 // Even though this call has setHasSameKeyPrefix=true, the previous call, 71 // which was after the last restart set it to false. So the restart encoded 72 // with banana has this cumulative bit set to false. 73 addAdapter( 74 ikey("banana"), []byte("yellow"), true, '\x00', true) 75 require.Equal(t, "banana", curKey()) 76 require.Equal(t, "yellow", string(w.curValue)) 77 addAdapter( 78 ikey("cherry"), []byte("red"), false, 0, true) 79 require.Equal(t, "cherry", curKey()) 80 require.Equal(t, "red", string(w.curValue)) 81 // All intervening calls has setHasSameKeyPrefix=true, so the cumulative bit 82 // will be set to true in this restart. 83 addAdapter( 84 ikey("mango"), []byte("juicy"), false, 0, true) 85 require.Equal(t, "mango", curKey()) 86 require.Equal(t, "juicy", string(w.curValue)) 87 88 block := w.finish() 89 90 expected := []byte( 91 "\x00\x0d\x03apple\x00\x00\x00\x00\x00\x00\x00\x00red" + 92 "\x02\x0d\x07ricot\x00\x00\x00\x00\x00\x00\x00\x00\xfforange" + 93 "\x00\x0e\x07banana\x00\x00\x00\x00\x00\x00\x00\x00\x00yellow" + 94 "\x00\x0e\x03cherry\x00\x00\x00\x00\x00\x00\x00\x00red" + 95 "\x00\x0d\x05mango\x00\x00\x00\x00\x00\x00\x00\x00juicy" + 96 // Restarts are: 97 // 00000000 (restart at apple), 2a000000 (restart at banana), 56000080 (restart at mango) 98 // 03000000 (number of restart, i.e., 3). The restart at mango has 1 in the most significant 99 // bit of the uint32, so the last byte in the little endian encoding is \x80. 100 "\x00\x00\x00\x00\x2a\x00\x00\x00\x56\x00\x00\x80\x03\x00\x00\x00") 101 if !bytes.Equal(expected, block) { 102 t.Fatalf("expected\n%x\nfound\n%x", expected, block) 103 } 104 } 105 106 func testBlockCleared(t *testing.T, w, b *blockWriter) { 107 require.Equal(t, w.restartInterval, b.restartInterval) 108 require.Equal(t, w.nEntries, b.nEntries) 109 require.Equal(t, w.nextRestart, b.nextRestart) 110 require.Equal(t, len(w.buf), len(b.buf)) 111 require.Equal(t, len(w.restarts), len(b.restarts)) 112 require.Equal(t, len(w.curKey), len(b.curKey)) 113 require.Equal(t, len(w.prevKey), len(b.prevKey)) 114 require.Equal(t, len(w.curValue), len(b.curValue)) 115 require.Equal(t, w.tmp, b.tmp) 116 117 // Make sure that we didn't lose the allocated byte slices. 118 require.True(t, cap(w.buf) > 0 && cap(b.buf) == 0) 119 require.True(t, cap(w.restarts) > 0 && cap(b.restarts) == 0) 120 require.True(t, cap(w.curKey) > 0 && cap(b.curKey) == 0) 121 require.True(t, cap(w.prevKey) > 0 && cap(b.prevKey) == 0) 122 require.True(t, cap(w.curValue) > 0 && cap(b.curValue) == 0) 123 } 124 125 func TestBlockClear(t *testing.T) { 126 w := blockWriter{restartInterval: 16} 127 w.add(ikey("apple"), nil) 128 w.add(ikey("apricot"), nil) 129 w.add(ikey("banana"), nil) 130 131 w.clear() 132 133 // Once a block is cleared, we expect its fields to be cleared, but we expect 134 // it to keep its allocated byte slices. 135 b := blockWriter{} 136 testBlockCleared(t, &w, &b) 137 } 138 139 func TestInvalidInternalKeyDecoding(t *testing.T) { 140 // Invalid keys since they don't have an 8 byte trailer. 141 testCases := []string{ 142 "", 143 "\x01\x02\x03\x04\x05\x06\x07", 144 "foo", 145 } 146 for _, tc := range testCases { 147 i := blockIter{} 148 i.decodeInternalKey([]byte(tc)) 149 require.Nil(t, i.ikey.UserKey) 150 require.Equal(t, uint64(InternalKeyKindInvalid), i.ikey.Trailer) 151 } 152 } 153 154 func TestBlockIter(t *testing.T) { 155 // k is a block that maps three keys "apple", "apricot", "banana" to empty strings. 156 k := block([]byte( 157 "\x00\x05\x00apple" + 158 "\x02\x05\x00ricot" + 159 "\x00\x06\x00banana" + 160 "\x00\x00\x00\x00\x01\x00\x00\x00")) 161 var testcases = []struct { 162 index int 163 key string 164 }{ 165 {0, ""}, 166 {0, "a"}, 167 {0, "aaaaaaaaaaaaaaa"}, 168 {0, "app"}, 169 {0, "apple"}, 170 {1, "appliance"}, 171 {1, "apricos"}, 172 {1, "apricot"}, 173 {2, "azzzzzzzzzzzzzz"}, 174 {2, "b"}, 175 {2, "banan"}, 176 {2, "banana"}, 177 {3, "banana\x00"}, 178 {3, "c"}, 179 } 180 for _, tc := range testcases { 181 i, err := newRawBlockIter(bytes.Compare, k) 182 require.NoError(t, err) 183 i.SeekGE([]byte(tc.key)) 184 for j, keyWant := range []string{"apple", "apricot", "banana"}[tc.index:] { 185 if !i.Valid() { 186 t.Fatalf("key=%q, index=%d, j=%d: Valid got false, keyWant true", tc.key, tc.index, j) 187 } 188 if keyGot := string(i.Key().UserKey); keyGot != keyWant { 189 t.Fatalf("key=%q, index=%d, j=%d: got %q, keyWant %q", tc.key, tc.index, j, keyGot, keyWant) 190 } 191 i.Next() 192 } 193 if i.Valid() { 194 t.Fatalf("key=%q, index=%d: Valid got true, keyWant false", tc.key, tc.index) 195 } 196 if err := i.Close(); err != nil { 197 t.Fatalf("key=%q, index=%d: got err=%v", tc.key, tc.index, err) 198 } 199 } 200 201 { 202 i, err := newRawBlockIter(bytes.Compare, k) 203 require.NoError(t, err) 204 i.Last() 205 for j, keyWant := range []string{"banana", "apricot", "apple"} { 206 if !i.Valid() { 207 t.Fatalf("j=%d: Valid got false, want true", j) 208 } 209 if keyGot := string(i.Key().UserKey); keyGot != keyWant { 210 t.Fatalf("j=%d: got %q, want %q", j, keyGot, keyWant) 211 } 212 i.Prev() 213 } 214 if i.Valid() { 215 t.Fatalf("Valid got true, want false") 216 } 217 if err := i.Close(); err != nil { 218 t.Fatalf("got err=%v", err) 219 } 220 } 221 } 222 223 func TestBlockIter2(t *testing.T) { 224 makeIkey := func(s string) InternalKey { 225 j := strings.Index(s, ":") 226 seqNum, err := strconv.Atoi(s[j+1:]) 227 if err != nil { 228 panic(err) 229 } 230 return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet) 231 } 232 233 var block []byte 234 235 for _, r := range []int{1, 2, 3, 4} { 236 t.Run(fmt.Sprintf("restart=%d", r), func(t *testing.T) { 237 datadriven.RunTest(t, "testdata/block", func(t *testing.T, d *datadriven.TestData) string { 238 switch d.Cmd { 239 case "build": 240 w := &blockWriter{restartInterval: r} 241 for _, e := range strings.Split(strings.TrimSpace(d.Input), ",") { 242 w.add(makeIkey(e), nil) 243 } 244 block = w.finish() 245 return "" 246 247 case "iter": 248 iter, err := newBlockIter(bytes.Compare, block) 249 if err != nil { 250 return err.Error() 251 } 252 253 iter.globalSeqNum, err = scanGlobalSeqNum(d) 254 if err != nil { 255 return err.Error() 256 } 257 return itertest.RunInternalIterCmd(t, d, iter, itertest.Condensed) 258 259 default: 260 return fmt.Sprintf("unknown command: %s", d.Cmd) 261 } 262 }) 263 }) 264 } 265 } 266 267 func TestBlockIterKeyStability(t *testing.T) { 268 w := &blockWriter{restartInterval: 1} 269 expected := [][]byte{ 270 []byte("apple"), 271 []byte("apricot"), 272 []byte("banana"), 273 } 274 for i := range expected { 275 w.add(InternalKey{UserKey: expected[i]}, nil) 276 } 277 block := w.finish() 278 279 i, err := newBlockIter(bytes.Compare, block) 280 require.NoError(t, err) 281 282 // Check that the supplied slice resides within the bounds of the block. 283 check := func(v []byte) { 284 t.Helper() 285 begin := unsafe.Pointer(&v[0]) 286 end := unsafe.Pointer(uintptr(begin) + uintptr(len(v))) 287 blockBegin := unsafe.Pointer(&block[0]) 288 blockEnd := unsafe.Pointer(uintptr(blockBegin) + uintptr(len(block))) 289 if uintptr(begin) < uintptr(blockBegin) || uintptr(end) > uintptr(blockEnd) { 290 t.Fatalf("key %p-%p resides outside of block %p-%p", begin, end, blockBegin, blockEnd) 291 } 292 } 293 294 // Check that various means of iterating over the data match our expected 295 // values. Note that this is only guaranteed because of the usage of a 296 // restart-interval of 1 so that prefix compression was not performed. 297 for j := range expected { 298 keys := [][]byte{} 299 for key, _ := i.SeekGE(expected[j], base.SeekGEFlagsNone); key != nil; key, _ = i.Next() { 300 check(key.UserKey) 301 keys = append(keys, key.UserKey) 302 } 303 require.EqualValues(t, expected[j:], keys) 304 } 305 306 for j := range expected { 307 keys := [][]byte{} 308 for key, _ := i.SeekLT(expected[j], base.SeekLTFlagsNone); key != nil; key, _ = i.Prev() { 309 check(key.UserKey) 310 keys = append(keys, key.UserKey) 311 } 312 for i, j := 0, len(keys)-1; i < j; i, j = i+1, j-1 { 313 keys[i], keys[j] = keys[j], keys[i] 314 } 315 require.EqualValues(t, expected[:j], keys) 316 } 317 } 318 319 // Regression test for a bug in blockIter.Next where it was failing to handle 320 // the case where it is switching from reverse to forward iteration. When that 321 // switch occurs we need to populate blockIter.fullKey so that prefix 322 // decompression works properly. 323 func TestBlockIterReverseDirections(t *testing.T) { 324 w := &blockWriter{restartInterval: 4} 325 keys := [][]byte{ 326 []byte("apple0"), 327 []byte("apple1"), 328 []byte("apple2"), 329 []byte("banana"), 330 []byte("carrot"), 331 } 332 for i := range keys { 333 w.add(InternalKey{UserKey: keys[i]}, nil) 334 } 335 block := w.finish() 336 337 for targetPos := 0; targetPos < w.restartInterval; targetPos++ { 338 t.Run("", func(t *testing.T) { 339 i, err := newBlockIter(bytes.Compare, block) 340 require.NoError(t, err) 341 342 pos := 3 343 if key, _ := i.SeekLT([]byte("carrot"), base.SeekLTFlagsNone); !bytes.Equal(keys[pos], key.UserKey) { 344 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 345 } 346 for pos > targetPos { 347 pos-- 348 if key, _ := i.Prev(); !bytes.Equal(keys[pos], key.UserKey) { 349 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 350 } 351 } 352 pos++ 353 if key, _ := i.Next(); !bytes.Equal(keys[pos], key.UserKey) { 354 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 355 } 356 }) 357 } 358 } 359 360 func BenchmarkBlockIterSeekGE(b *testing.B) { 361 const blockSize = 32 << 10 362 363 for _, restartInterval := range []int{16} { 364 b.Run(fmt.Sprintf("restart=%d", restartInterval), 365 func(b *testing.B) { 366 w := &blockWriter{ 367 restartInterval: restartInterval, 368 } 369 370 var ikey InternalKey 371 var keys [][]byte 372 for i := 0; w.estimatedSize() < blockSize; i++ { 373 key := []byte(fmt.Sprintf("%05d", i)) 374 keys = append(keys, key) 375 ikey.UserKey = key 376 w.add(ikey, nil) 377 } 378 379 it, err := newBlockIter(bytes.Compare, w.finish()) 380 if err != nil { 381 b.Fatal(err) 382 } 383 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 384 385 b.ResetTimer() 386 for i := 0; i < b.N; i++ { 387 k := keys[rng.Intn(len(keys))] 388 it.SeekGE(k, base.SeekGEFlagsNone) 389 if testing.Verbose() { 390 if !it.valid() { 391 b.Fatal("expected to find key") 392 } 393 if !bytes.Equal(k, it.Key().UserKey) { 394 b.Fatalf("expected %s, but found %s", k, it.Key().UserKey) 395 } 396 } 397 } 398 }) 399 } 400 } 401 402 func BenchmarkBlockIterSeekLT(b *testing.B) { 403 const blockSize = 32 << 10 404 405 for _, restartInterval := range []int{16} { 406 b.Run(fmt.Sprintf("restart=%d", restartInterval), 407 func(b *testing.B) { 408 w := &blockWriter{ 409 restartInterval: restartInterval, 410 } 411 412 var ikey InternalKey 413 var keys [][]byte 414 for i := 0; w.estimatedSize() < blockSize; i++ { 415 key := []byte(fmt.Sprintf("%05d", i)) 416 keys = append(keys, key) 417 ikey.UserKey = key 418 w.add(ikey, nil) 419 } 420 421 it, err := newBlockIter(bytes.Compare, w.finish()) 422 if err != nil { 423 b.Fatal(err) 424 } 425 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 426 427 b.ResetTimer() 428 for i := 0; i < b.N; i++ { 429 j := rng.Intn(len(keys)) 430 it.SeekLT(keys[j], base.SeekLTFlagsNone) 431 if testing.Verbose() { 432 if j == 0 { 433 if it.valid() { 434 b.Fatal("unexpected key") 435 } 436 } else { 437 if !it.valid() { 438 b.Fatal("expected to find key") 439 } 440 k := keys[j-1] 441 if !bytes.Equal(k, it.Key().UserKey) { 442 b.Fatalf("expected %s, but found %s", k, it.Key().UserKey) 443 } 444 } 445 } 446 } 447 }) 448 } 449 } 450 451 func BenchmarkBlockIterNext(b *testing.B) { 452 const blockSize = 32 << 10 453 454 for _, restartInterval := range []int{16} { 455 b.Run(fmt.Sprintf("restart=%d", restartInterval), 456 func(b *testing.B) { 457 w := &blockWriter{ 458 restartInterval: restartInterval, 459 } 460 461 var ikey InternalKey 462 for i := 0; w.estimatedSize() < blockSize; i++ { 463 ikey.UserKey = []byte(fmt.Sprintf("%05d", i)) 464 w.add(ikey, nil) 465 } 466 467 it, err := newBlockIter(bytes.Compare, w.finish()) 468 if err != nil { 469 b.Fatal(err) 470 } 471 472 b.ResetTimer() 473 for i := 0; i < b.N; i++ { 474 if !it.valid() { 475 it.First() 476 } 477 it.Next() 478 } 479 }) 480 } 481 } 482 483 func BenchmarkBlockIterPrev(b *testing.B) { 484 const blockSize = 32 << 10 485 486 for _, restartInterval := range []int{16} { 487 b.Run(fmt.Sprintf("restart=%d", restartInterval), 488 func(b *testing.B) { 489 w := &blockWriter{ 490 restartInterval: restartInterval, 491 } 492 493 var ikey InternalKey 494 for i := 0; w.estimatedSize() < blockSize; i++ { 495 ikey.UserKey = []byte(fmt.Sprintf("%05d", i)) 496 w.add(ikey, nil) 497 } 498 499 it, err := newBlockIter(bytes.Compare, w.finish()) 500 if err != nil { 501 b.Fatal(err) 502 } 503 504 b.ResetTimer() 505 for i := 0; i < b.N; i++ { 506 if !it.valid() { 507 it.Last() 508 } 509 it.Prev() 510 } 511 }) 512 } 513 }