github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/block_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "fmt" 10 "strconv" 11 "strings" 12 "testing" 13 "time" 14 "unsafe" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/stretchr/testify/require" 19 "golang.org/x/exp/rand" 20 ) 21 22 func ikey(s string) InternalKey { 23 return InternalKey{UserKey: []byte(s)} 24 } 25 26 func TestBlockWriter(t *testing.T) { 27 w := &rawBlockWriter{ 28 blockWriter: blockWriter{restartInterval: 16}, 29 } 30 w.add(ikey("apple"), nil) 31 w.add(ikey("apricot"), nil) 32 w.add(ikey("banana"), nil) 33 block := w.finish() 34 35 expected := []byte( 36 "\x00\x05\x00apple" + 37 "\x02\x05\x00ricot" + 38 "\x00\x06\x00banana" + 39 "\x00\x00\x00\x00\x01\x00\x00\x00") 40 if !bytes.Equal(expected, block) { 41 t.Fatalf("expected\n%q\nfound\n%q", expected, block) 42 } 43 } 44 45 func TestBlockWriterWithPrefix(t *testing.T) { 46 w := &rawBlockWriter{ 47 blockWriter: blockWriter{restartInterval: 2}, 48 } 49 curKey := func() string { 50 return string(base.DecodeInternalKey(w.curKey).UserKey) 51 } 52 addAdapter := func( 53 key InternalKey, 54 value []byte, 55 addValuePrefix bool, 56 valuePrefix valuePrefix, 57 setHasSameKeyPrefix bool) { 58 w.addWithOptionalValuePrefix( 59 key, false, value, len(key.UserKey), addValuePrefix, valuePrefix, setHasSameKeyPrefix) 60 } 61 addAdapter( 62 ikey("apple"), []byte("red"), false, 0, true) 63 require.Equal(t, "apple", curKey()) 64 require.Equal(t, "red", string(w.curValue)) 65 addAdapter( 66 ikey("apricot"), []byte("orange"), true, '\xff', false) 67 require.Equal(t, "apricot", curKey()) 68 require.Equal(t, "orange", string(w.curValue)) 69 // Even though this call has setHasSameKeyPrefix=true, the previous call, 70 // which was after the last restart set it to false. So the restart encoded 71 // with banana has this cumulative bit set to false. 72 addAdapter( 73 ikey("banana"), []byte("yellow"), true, '\x00', true) 74 require.Equal(t, "banana", curKey()) 75 require.Equal(t, "yellow", string(w.curValue)) 76 addAdapter( 77 ikey("cherry"), []byte("red"), false, 0, true) 78 require.Equal(t, "cherry", curKey()) 79 require.Equal(t, "red", string(w.curValue)) 80 // All intervening calls has setHasSameKeyPrefix=true, so the cumulative bit 81 // will be set to true in this restart. 82 addAdapter( 83 ikey("mango"), []byte("juicy"), false, 0, true) 84 require.Equal(t, "mango", curKey()) 85 require.Equal(t, "juicy", string(w.curValue)) 86 87 block := w.finish() 88 89 expected := []byte( 90 "\x00\x0d\x03apple\x00\x00\x00\x00\x00\x00\x00\x00red" + 91 "\x02\x0d\x07ricot\x00\x00\x00\x00\x00\x00\x00\x00\xfforange" + 92 "\x00\x0e\x07banana\x00\x00\x00\x00\x00\x00\x00\x00\x00yellow" + 93 "\x00\x0e\x03cherry\x00\x00\x00\x00\x00\x00\x00\x00red" + 94 "\x00\x0d\x05mango\x00\x00\x00\x00\x00\x00\x00\x00juicy" + 95 // Restarts are: 96 // 00000000 (restart at apple), 2a000000 (restart at banana), 56000080 (restart at mango) 97 // 03000000 (number of restart, i.e., 3). The restart at mango has 1 in the most significant 98 // bit of the uint32, so the last byte in the little endian encoding is \x80. 99 "\x00\x00\x00\x00\x2a\x00\x00\x00\x56\x00\x00\x80\x03\x00\x00\x00") 100 if !bytes.Equal(expected, block) { 101 t.Fatalf("expected\n%x\nfound\n%x", expected, block) 102 } 103 } 104 105 func testBlockCleared(t *testing.T, w, b *blockWriter) { 106 require.Equal(t, w.restartInterval, b.restartInterval) 107 require.Equal(t, w.nEntries, b.nEntries) 108 require.Equal(t, w.nextRestart, b.nextRestart) 109 require.Equal(t, len(w.buf), len(b.buf)) 110 require.Equal(t, len(w.restarts), len(b.restarts)) 111 require.Equal(t, len(w.curKey), len(b.curKey)) 112 require.Equal(t, len(w.prevKey), len(b.prevKey)) 113 require.Equal(t, len(w.curValue), len(b.curValue)) 114 require.Equal(t, w.tmp, b.tmp) 115 116 // Make sure that we didn't lose the allocated byte slices. 117 require.True(t, cap(w.buf) > 0 && cap(b.buf) == 0) 118 require.True(t, cap(w.restarts) > 0 && cap(b.restarts) == 0) 119 require.True(t, cap(w.curKey) > 0 && cap(b.curKey) == 0) 120 require.True(t, cap(w.prevKey) > 0 && cap(b.prevKey) == 0) 121 require.True(t, cap(w.curValue) > 0 && cap(b.curValue) == 0) 122 } 123 124 func TestBlockClear(t *testing.T) { 125 w := blockWriter{restartInterval: 16} 126 w.add(ikey("apple"), nil) 127 w.add(ikey("apricot"), nil) 128 w.add(ikey("banana"), nil) 129 130 w.clear() 131 132 // Once a block is cleared, we expect its fields to be cleared, but we expect 133 // it to keep its allocated byte slices. 134 b := blockWriter{} 135 testBlockCleared(t, &w, &b) 136 } 137 138 func TestInvalidInternalKeyDecoding(t *testing.T) { 139 // Invalid keys since they don't have an 8 byte trailer. 140 testCases := []string{ 141 "", 142 "\x01\x02\x03\x04\x05\x06\x07", 143 "foo", 144 } 145 for _, tc := range testCases { 146 i := blockIter{} 147 i.decodeInternalKey([]byte(tc)) 148 require.Nil(t, i.ikey.UserKey) 149 require.Equal(t, uint64(InternalKeyKindInvalid), i.ikey.Trailer) 150 } 151 } 152 153 func TestBlockIter(t *testing.T) { 154 // k is a block that maps three keys "apple", "apricot", "banana" to empty strings. 155 k := block([]byte( 156 "\x00\x05\x00apple" + 157 "\x02\x05\x00ricot" + 158 "\x00\x06\x00banana" + 159 "\x00\x00\x00\x00\x01\x00\x00\x00")) 160 var testcases = []struct { 161 index int 162 key string 163 }{ 164 {0, ""}, 165 {0, "a"}, 166 {0, "aaaaaaaaaaaaaaa"}, 167 {0, "app"}, 168 {0, "apple"}, 169 {1, "appliance"}, 170 {1, "apricos"}, 171 {1, "apricot"}, 172 {2, "azzzzzzzzzzzzzz"}, 173 {2, "b"}, 174 {2, "banan"}, 175 {2, "banana"}, 176 {3, "banana\x00"}, 177 {3, "c"}, 178 } 179 for _, tc := range testcases { 180 i, err := newRawBlockIter(bytes.Compare, k) 181 require.NoError(t, err) 182 i.SeekGE([]byte(tc.key)) 183 for j, keyWant := range []string{"apple", "apricot", "banana"}[tc.index:] { 184 if !i.Valid() { 185 t.Fatalf("key=%q, index=%d, j=%d: Valid got false, keyWant true", tc.key, tc.index, j) 186 } 187 if keyGot := string(i.Key().UserKey); keyGot != keyWant { 188 t.Fatalf("key=%q, index=%d, j=%d: got %q, keyWant %q", tc.key, tc.index, j, keyGot, keyWant) 189 } 190 i.Next() 191 } 192 if i.Valid() { 193 t.Fatalf("key=%q, index=%d: Valid got true, keyWant false", tc.key, tc.index) 194 } 195 if err := i.Close(); err != nil { 196 t.Fatalf("key=%q, index=%d: got err=%v", tc.key, tc.index, err) 197 } 198 } 199 200 { 201 i, err := newRawBlockIter(bytes.Compare, k) 202 require.NoError(t, err) 203 i.Last() 204 for j, keyWant := range []string{"banana", "apricot", "apple"} { 205 if !i.Valid() { 206 t.Fatalf("j=%d: Valid got false, want true", j) 207 } 208 if keyGot := string(i.Key().UserKey); keyGot != keyWant { 209 t.Fatalf("j=%d: got %q, want %q", j, keyGot, keyWant) 210 } 211 i.Prev() 212 } 213 if i.Valid() { 214 t.Fatalf("Valid got true, want false") 215 } 216 if err := i.Close(); err != nil { 217 t.Fatalf("got err=%v", err) 218 } 219 } 220 } 221 222 func TestBlockIter2(t *testing.T) { 223 makeIkey := func(s string) InternalKey { 224 j := strings.Index(s, ":") 225 seqNum, err := strconv.Atoi(s[j+1:]) 226 if err != nil { 227 panic(err) 228 } 229 return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet) 230 } 231 232 var block []byte 233 234 for _, r := range []int{1, 2, 3, 4} { 235 t.Run(fmt.Sprintf("restart=%d", r), func(t *testing.T) { 236 datadriven.RunTest(t, "testdata/block", func(t *testing.T, d *datadriven.TestData) string { 237 switch d.Cmd { 238 case "build": 239 w := &blockWriter{restartInterval: r} 240 for _, e := range strings.Split(strings.TrimSpace(d.Input), ",") { 241 w.add(makeIkey(e), nil) 242 } 243 block = w.finish() 244 return "" 245 246 case "iter": 247 iter, err := newBlockIter(bytes.Compare, block) 248 if err != nil { 249 return err.Error() 250 } 251 252 iter.globalSeqNum, err = scanGlobalSeqNum(d) 253 if err != nil { 254 return err.Error() 255 } 256 257 var b bytes.Buffer 258 for _, line := range strings.Split(d.Input, "\n") { 259 parts := strings.Fields(line) 260 if len(parts) == 0 { 261 continue 262 } 263 switch parts[0] { 264 case "seek-ge": 265 if len(parts) != 2 { 266 return "seek-ge <key>\n" 267 } 268 iter.SeekGE([]byte(strings.TrimSpace(parts[1])), base.SeekGEFlagsNone) 269 case "seek-lt": 270 if len(parts) != 2 { 271 return "seek-lt <key>\n" 272 } 273 iter.SeekLT([]byte(strings.TrimSpace(parts[1])), base.SeekLTFlagsNone) 274 case "first": 275 iter.First() 276 case "last": 277 iter.Last() 278 case "next": 279 iter.Next() 280 case "prev": 281 iter.Prev() 282 } 283 if iter.valid() { 284 fmt.Fprintf(&b, "<%s:%d>", iter.Key().UserKey, iter.Key().SeqNum()) 285 } else if err := iter.Error(); err != nil { 286 fmt.Fprintf(&b, "<err=%v>", err) 287 } else { 288 fmt.Fprintf(&b, ".") 289 } 290 } 291 b.WriteString("\n") 292 return b.String() 293 294 default: 295 return fmt.Sprintf("unknown command: %s", d.Cmd) 296 } 297 }) 298 }) 299 } 300 } 301 302 func TestBlockIterKeyStability(t *testing.T) { 303 w := &blockWriter{restartInterval: 1} 304 expected := [][]byte{ 305 []byte("apple"), 306 []byte("apricot"), 307 []byte("banana"), 308 } 309 for i := range expected { 310 w.add(InternalKey{UserKey: expected[i]}, nil) 311 } 312 block := w.finish() 313 314 i, err := newBlockIter(bytes.Compare, block) 315 require.NoError(t, err) 316 317 // Check that the supplied slice resides within the bounds of the block. 318 check := func(v []byte) { 319 t.Helper() 320 begin := unsafe.Pointer(&v[0]) 321 end := unsafe.Pointer(uintptr(begin) + uintptr(len(v))) 322 blockBegin := unsafe.Pointer(&block[0]) 323 blockEnd := unsafe.Pointer(uintptr(blockBegin) + uintptr(len(block))) 324 if uintptr(begin) < uintptr(blockBegin) || uintptr(end) > uintptr(blockEnd) { 325 t.Fatalf("key %p-%p resides outside of block %p-%p", begin, end, blockBegin, blockEnd) 326 } 327 } 328 329 // Check that various means of iterating over the data match our expected 330 // values. Note that this is only guaranteed because of the usage of a 331 // restart-interval of 1 so that prefix compression was not performed. 332 for j := range expected { 333 keys := [][]byte{} 334 for key, _ := i.SeekGE(expected[j], base.SeekGEFlagsNone); key != nil; key, _ = i.Next() { 335 check(key.UserKey) 336 keys = append(keys, key.UserKey) 337 } 338 require.EqualValues(t, expected[j:], keys) 339 } 340 341 for j := range expected { 342 keys := [][]byte{} 343 for key, _ := i.SeekLT(expected[j], base.SeekLTFlagsNone); key != nil; key, _ = i.Prev() { 344 check(key.UserKey) 345 keys = append(keys, key.UserKey) 346 } 347 for i, j := 0, len(keys)-1; i < j; i, j = i+1, j-1 { 348 keys[i], keys[j] = keys[j], keys[i] 349 } 350 require.EqualValues(t, expected[:j], keys) 351 } 352 } 353 354 // Regression test for a bug in blockIter.Next where it was failing to handle 355 // the case where it is switching from reverse to forward iteration. When that 356 // switch occurs we need to populate blockIter.fullKey so that prefix 357 // decompression works properly. 358 func TestBlockIterReverseDirections(t *testing.T) { 359 w := &blockWriter{restartInterval: 4} 360 keys := [][]byte{ 361 []byte("apple0"), 362 []byte("apple1"), 363 []byte("apple2"), 364 []byte("banana"), 365 []byte("carrot"), 366 } 367 for i := range keys { 368 w.add(InternalKey{UserKey: keys[i]}, nil) 369 } 370 block := w.finish() 371 372 for targetPos := 0; targetPos < w.restartInterval; targetPos++ { 373 t.Run("", func(t *testing.T) { 374 i, err := newBlockIter(bytes.Compare, block) 375 require.NoError(t, err) 376 377 pos := 3 378 if key, _ := i.SeekLT([]byte("carrot"), base.SeekLTFlagsNone); !bytes.Equal(keys[pos], key.UserKey) { 379 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 380 } 381 for pos > targetPos { 382 pos-- 383 if key, _ := i.Prev(); !bytes.Equal(keys[pos], key.UserKey) { 384 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 385 } 386 } 387 pos++ 388 if key, _ := i.Next(); !bytes.Equal(keys[pos], key.UserKey) { 389 t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey) 390 } 391 }) 392 } 393 } 394 395 func BenchmarkBlockIterSeekGE(b *testing.B) { 396 const blockSize = 32 << 10 397 398 for _, restartInterval := range []int{16} { 399 b.Run(fmt.Sprintf("restart=%d", restartInterval), 400 func(b *testing.B) { 401 w := &blockWriter{ 402 restartInterval: restartInterval, 403 } 404 405 var ikey InternalKey 406 var keys [][]byte 407 for i := 0; w.estimatedSize() < blockSize; i++ { 408 key := []byte(fmt.Sprintf("%05d", i)) 409 keys = append(keys, key) 410 ikey.UserKey = key 411 w.add(ikey, nil) 412 } 413 414 it, err := newBlockIter(bytes.Compare, w.finish()) 415 if err != nil { 416 b.Fatal(err) 417 } 418 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 419 420 b.ResetTimer() 421 for i := 0; i < b.N; i++ { 422 k := keys[rng.Intn(len(keys))] 423 it.SeekGE(k, base.SeekGEFlagsNone) 424 if testing.Verbose() { 425 if !it.valid() { 426 b.Fatal("expected to find key") 427 } 428 if !bytes.Equal(k, it.Key().UserKey) { 429 b.Fatalf("expected %s, but found %s", k, it.Key().UserKey) 430 } 431 } 432 } 433 }) 434 } 435 } 436 437 func BenchmarkBlockIterSeekLT(b *testing.B) { 438 const blockSize = 32 << 10 439 440 for _, restartInterval := range []int{16} { 441 b.Run(fmt.Sprintf("restart=%d", restartInterval), 442 func(b *testing.B) { 443 w := &blockWriter{ 444 restartInterval: restartInterval, 445 } 446 447 var ikey InternalKey 448 var keys [][]byte 449 for i := 0; w.estimatedSize() < blockSize; i++ { 450 key := []byte(fmt.Sprintf("%05d", i)) 451 keys = append(keys, key) 452 ikey.UserKey = key 453 w.add(ikey, nil) 454 } 455 456 it, err := newBlockIter(bytes.Compare, w.finish()) 457 if err != nil { 458 b.Fatal(err) 459 } 460 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 461 462 b.ResetTimer() 463 for i := 0; i < b.N; i++ { 464 j := rng.Intn(len(keys)) 465 it.SeekLT(keys[j], base.SeekLTFlagsNone) 466 if testing.Verbose() { 467 if j == 0 { 468 if it.valid() { 469 b.Fatal("unexpected key") 470 } 471 } else { 472 if !it.valid() { 473 b.Fatal("expected to find key") 474 } 475 k := keys[j-1] 476 if !bytes.Equal(k, it.Key().UserKey) { 477 b.Fatalf("expected %s, but found %s", k, it.Key().UserKey) 478 } 479 } 480 } 481 } 482 }) 483 } 484 } 485 486 func BenchmarkBlockIterNext(b *testing.B) { 487 const blockSize = 32 << 10 488 489 for _, restartInterval := range []int{16} { 490 b.Run(fmt.Sprintf("restart=%d", restartInterval), 491 func(b *testing.B) { 492 w := &blockWriter{ 493 restartInterval: restartInterval, 494 } 495 496 var ikey InternalKey 497 for i := 0; w.estimatedSize() < blockSize; i++ { 498 ikey.UserKey = []byte(fmt.Sprintf("%05d", i)) 499 w.add(ikey, nil) 500 } 501 502 it, err := newBlockIter(bytes.Compare, w.finish()) 503 if err != nil { 504 b.Fatal(err) 505 } 506 507 b.ResetTimer() 508 for i := 0; i < b.N; i++ { 509 if !it.valid() { 510 it.First() 511 } 512 it.Next() 513 } 514 }) 515 } 516 } 517 518 func BenchmarkBlockIterPrev(b *testing.B) { 519 const blockSize = 32 << 10 520 521 for _, restartInterval := range []int{16} { 522 b.Run(fmt.Sprintf("restart=%d", restartInterval), 523 func(b *testing.B) { 524 w := &blockWriter{ 525 restartInterval: restartInterval, 526 } 527 528 var ikey InternalKey 529 for i := 0; w.estimatedSize() < blockSize; i++ { 530 ikey.UserKey = []byte(fmt.Sprintf("%05d", i)) 531 w.add(ikey, nil) 532 } 533 534 it, err := newBlockIter(bytes.Compare, w.finish()) 535 if err != nil { 536 b.Fatal(err) 537 } 538 539 b.ResetTimer() 540 for i := 0; i < b.N; i++ { 541 if !it.valid() { 542 it.Last() 543 } 544 it.Prev() 545 } 546 }) 547 } 548 }