github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/table_test.go (about) 1 // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bufio" 9 "bytes" 10 "context" 11 "encoding/binary" 12 "fmt" 13 "io" 14 "math" 15 "os" 16 "path/filepath" 17 "sort" 18 "strings" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/errors" 23 "github.com/cockroachdb/pebble/bloom" 24 "github.com/cockroachdb/pebble/internal/base" 25 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 26 "github.com/cockroachdb/pebble/vfs" 27 "github.com/kr/pretty" 28 "github.com/stretchr/testify/require" 29 "golang.org/x/exp/rand" 30 ) 31 32 // nonsenseWords are words that aren't in testdata/h.txt. 33 var nonsenseWords = []string{ 34 // Edge cases. 35 "", 36 "\x00", 37 "\xff", 38 "`", 39 "a\x00", 40 "aaaaaa", 41 "pol\x00nius", 42 "youth\x00", 43 "youti", 44 "zzzzzz", 45 // Capitalized versions of actual words in testdata/h.txt. 46 "A", 47 "Hamlet", 48 "thEE", 49 "YOUTH", 50 // The following were generated by http://soybomb.com/tricks/words/ 51 "pectures", 52 "exectly", 53 "tricatrippian", 54 "recens", 55 "whiratroce", 56 "troped", 57 "balmous", 58 "droppewry", 59 "toilizing", 60 "crocias", 61 "eathrass", 62 "cheakden", 63 "speablett", 64 "skirinies", 65 "prefing", 66 "bonufacision", 67 } 68 69 var ( 70 wordCount = map[string]string{} 71 minWord = "" 72 maxWord = "" 73 ) 74 75 func init() { 76 f, err := os.Open(filepath.FromSlash("testdata/h.txt")) 77 if err != nil { 78 panic(err) 79 } 80 defer f.Close() 81 r := bufio.NewReader(f) 82 83 for first := true; ; { 84 s, err := r.ReadBytes('\n') 85 if err == io.EOF { 86 break 87 } 88 if err != nil { 89 panic(err) 90 } 91 k := strings.TrimSpace(string(s[8:])) 92 v := strings.TrimSpace(string(s[:8])) 93 wordCount[k] = v 94 95 if first { 96 first = false 97 minWord = k 98 maxWord = k 99 continue 100 } 101 if minWord > k { 102 minWord = k 103 } 104 if maxWord < k { 105 maxWord = k 106 } 107 } 108 109 if len(wordCount) != 1710 { 110 panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710)) 111 } 112 113 for _, s := range nonsenseWords { 114 if _, ok := wordCount[s]; ok { 115 panic(fmt.Sprintf("nonsense word %q was in h.txt", s)) 116 } 117 } 118 } 119 120 func check(f vfs.File, comparer *Comparer, fp FilterPolicy) error { 121 opts := ReaderOptions{ 122 Comparer: comparer, 123 } 124 if fp != nil { 125 opts.Filters = map[string]FilterPolicy{ 126 fp.Name(): fp, 127 } 128 } 129 130 r, err := newReader(f, opts) 131 if err != nil { 132 return err 133 } 134 135 // Check that each key/value pair in wordCount is also in the table. 136 words := make([]string, 0, len(wordCount)) 137 for k, v := range wordCount { 138 words = append(words, k) 139 // Check using Get. 140 if v1, err := r.get([]byte(k)); string(v1) != string(v) || err != nil { 141 return errors.Errorf("Get %q: got (%q, %v), want (%q, %v)", k, v1, err, v, error(nil)) 142 } else if len(v1) != cap(v1) { 143 return errors.Errorf("Get %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1)) 144 } 145 146 // Check using SeekGE. 147 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 148 if err != nil { 149 return err 150 } 151 i := newIterAdapter(iter) 152 if !i.SeekGE([]byte(k), base.SeekGEFlagsNone) || string(i.Key().UserKey) != k { 153 return errors.Errorf("Find %q: key was not in the table", k) 154 } 155 if k1 := i.Key().UserKey; len(k1) != cap(k1) { 156 return errors.Errorf("Find %q: len(k1)=%d, cap(k1)=%d", k, len(k1), cap(k1)) 157 } 158 if string(i.Value()) != v { 159 return errors.Errorf("Find %q: got value %q, want %q", k, i.Value(), v) 160 } 161 if v1 := i.Value(); len(v1) != cap(v1) { 162 return errors.Errorf("Find %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1)) 163 } 164 165 // Check using SeekLT. 166 if !i.SeekLT([]byte(k), base.SeekLTFlagsNone) { 167 i.First() 168 } else { 169 i.Next() 170 } 171 if string(i.Key().UserKey) != k { 172 return errors.Errorf("Find %q: key was not in the table", k) 173 } 174 if k1 := i.Key().UserKey; len(k1) != cap(k1) { 175 return errors.Errorf("Find %q: len(k1)=%d, cap(k1)=%d", k, len(k1), cap(k1)) 176 } 177 if string(i.Value()) != v { 178 return errors.Errorf("Find %q: got value %q, want %q", k, i.Value(), v) 179 } 180 if v1 := i.Value(); len(v1) != cap(v1) { 181 return errors.Errorf("Find %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1)) 182 } 183 184 if err := i.Close(); err != nil { 185 return err 186 } 187 } 188 189 // Check that nonsense words are not in the table. 190 for _, s := range nonsenseWords { 191 // Check using Get. 192 if _, err := r.get([]byte(s)); err != base.ErrNotFound { 193 return errors.Errorf("Get %q: got %v, want ErrNotFound", s, err) 194 } 195 196 // Check using Find. 197 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 198 if err != nil { 199 return err 200 } 201 i := newIterAdapter(iter) 202 if i.SeekGE([]byte(s), base.SeekGEFlagsNone) && s == string(i.Key().UserKey) { 203 return errors.Errorf("Find %q: unexpectedly found key in the table", s) 204 } 205 if err := i.Close(); err != nil { 206 return err 207 } 208 } 209 210 // Check that the number of keys >= a given start key matches the expected number. 211 var countTests = []struct { 212 count int 213 start string 214 }{ 215 // cat h.txt | cut -c 9- | wc -l gives 1710. 216 {1710, ""}, 217 // cat h.txt | cut -c 9- | grep -v "^[a-b]" | wc -l gives 1522. 218 {1522, "c"}, 219 // cat h.txt | cut -c 9- | grep -v "^[a-j]" | wc -l gives 940. 220 {940, "k"}, 221 // cat h.txt | cut -c 9- | grep -v "^[a-x]" | wc -l gives 12. 222 {12, "y"}, 223 // cat h.txt | cut -c 9- | grep -v "^[a-z]" | wc -l gives 0. 224 {0, "~"}, 225 } 226 for _, ct := range countTests { 227 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 228 if err != nil { 229 return err 230 } 231 n, i := 0, newIterAdapter(iter) 232 for valid := i.SeekGE([]byte(ct.start), base.SeekGEFlagsNone); valid; valid = i.Next() { 233 n++ 234 } 235 if n != ct.count { 236 return errors.Errorf("count %q: got %d, want %d", ct.start, n, ct.count) 237 } 238 n = 0 239 for valid := i.Last(); valid; valid = i.Prev() { 240 if bytes.Compare(i.Key().UserKey, []byte(ct.start)) < 0 { 241 break 242 } 243 n++ 244 } 245 if n != ct.count { 246 return errors.Errorf("count %q: got %d, want %d", ct.start, n, ct.count) 247 } 248 if err := i.Close(); err != nil { 249 return err 250 } 251 } 252 253 // Check lower/upper bounds behavior. Randomly choose a lower and upper bound 254 // and then guarantee that iteration finds the expected number if entries. 255 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 256 sort.Strings(words) 257 for i := 0; i < 10; i++ { 258 lowerIdx := -1 259 upperIdx := len(words) 260 if rng.Intn(5) != 0 { 261 lowerIdx = rng.Intn(len(words)) 262 } 263 if rng.Intn(5) != 0 { 264 upperIdx = rng.Intn(len(words)) 265 } 266 if lowerIdx > upperIdx { 267 lowerIdx, upperIdx = upperIdx, lowerIdx 268 } 269 270 var lower, upper []byte 271 if lowerIdx >= 0 { 272 lower = []byte(words[lowerIdx]) 273 } else { 274 lowerIdx = 0 275 } 276 if upperIdx < len(words) { 277 upper = []byte(words[upperIdx]) 278 } 279 280 iter, err := r.NewIter(lower, upper) 281 if err != nil { 282 return err 283 } 284 i := newIterAdapter(iter) 285 286 if lower == nil { 287 n := 0 288 for valid := i.First(); valid; valid = i.Next() { 289 n++ 290 } 291 if expected := upperIdx; expected != n { 292 return errors.Errorf("expected %d, but found %d", expected, n) 293 } 294 } 295 296 if upper == nil { 297 n := 0 298 for valid := i.Last(); valid; valid = i.Prev() { 299 n++ 300 } 301 if expected := len(words) - lowerIdx; expected != n { 302 return errors.Errorf("expected %d, but found %d", expected, n) 303 } 304 } 305 306 if lower != nil { 307 n := 0 308 for valid := i.SeekGE(lower, base.SeekGEFlagsNone); valid; valid = i.Next() { 309 n++ 310 } 311 if expected := upperIdx - lowerIdx; expected != n { 312 return errors.Errorf("expected %d, but found %d", expected, n) 313 } 314 } 315 316 if upper != nil { 317 n := 0 318 for valid := i.SeekLT(upper, base.SeekLTFlagsNone); valid; valid = i.Prev() { 319 n++ 320 } 321 if expected := upperIdx - lowerIdx; expected != n { 322 return errors.Errorf("expected %d, but found %d", expected, n) 323 } 324 } 325 326 if err := i.Close(); err != nil { 327 return err 328 } 329 } 330 331 return r.Close() 332 } 333 334 var ( 335 memFileSystem = vfs.NewMem() 336 tmpFileCount int 337 ) 338 339 func build( 340 compression Compression, 341 fp FilterPolicy, 342 ftype FilterType, 343 comparer *Comparer, 344 propCollector func() TablePropertyCollector, 345 blockSize int, 346 indexBlockSize int, 347 ) (vfs.File, error) { 348 // Create a sorted list of wordCount's keys. 349 keys := make([]string, len(wordCount)) 350 i := 0 351 for k := range wordCount { 352 keys[i] = k 353 i++ 354 } 355 sort.Strings(keys) 356 357 // Write the key/value pairs to a new table, in increasing key order. 358 filename := fmt.Sprintf("/tmp%d", tmpFileCount) 359 f0, err := memFileSystem.Create(filename) 360 if err != nil { 361 return nil, err 362 } 363 tmpFileCount++ 364 365 writerOpts := WriterOptions{ 366 BlockSize: blockSize, 367 Comparer: comparer, 368 Compression: compression, 369 FilterPolicy: fp, 370 FilterType: ftype, 371 IndexBlockSize: indexBlockSize, 372 MergerName: "nullptr", 373 } 374 if propCollector != nil { 375 writerOpts.TablePropertyCollectors = append(writerOpts.TablePropertyCollectors, propCollector) 376 } 377 378 w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts) 379 // Use rangeDelV1Format for testing byte equality with RocksDB. 380 w.rangeDelV1Format = true 381 var rangeDelLength int 382 var rangeDelCounter int 383 var rangeDelStart InternalKey 384 for i, k := range keys { 385 v := wordCount[k] 386 ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet) 387 if err := w.Add(ikey, []byte(v)); err != nil { 388 return nil, err 389 } 390 // This mirrors the logic in `make-table.cc`. It adds range deletions of 391 // increasing length for every 100 keys added. 392 if i%100 == 0 { 393 rangeDelStart = ikey.Clone() 394 rangeDelCounter = 0 395 rangeDelLength++ 396 } 397 rangeDelCounter++ 398 399 if rangeDelCounter == rangeDelLength { 400 if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil { 401 return nil, err 402 } 403 } 404 } 405 if err := w.Close(); err != nil { 406 return nil, err 407 } 408 409 // Re-open that filename for reading. 410 f1, err := memFileSystem.Open(filename) 411 if err != nil { 412 return nil, err 413 } 414 return f1, nil 415 } 416 417 func testReader(t *testing.T, filename string, comparer *Comparer, fp FilterPolicy) { 418 // Check that we can read a pre-made table. 419 f, err := vfs.Default.Open(filepath.FromSlash("testdata/" + filename)) 420 if err != nil { 421 t.Error(err) 422 return 423 } 424 err = check(f, comparer, fp) 425 if err != nil { 426 t.Error(err) 427 return 428 } 429 } 430 431 func TestReaderLevelDB(t *testing.T) { testReader(t, "h.ldb", nil, nil) } 432 func TestReaderDefaultCompression(t *testing.T) { testReader(t, "h.sst", nil, nil) } 433 func TestReaderNoCompression(t *testing.T) { testReader(t, "h.no-compression.sst", nil, nil) } 434 func TestReaderBlockBloomIgnored(t *testing.T) { 435 testReader(t, "h.block-bloom.no-compression.sst", nil, nil) 436 } 437 func TestReaderTableBloomIgnored(t *testing.T) { 438 testReader(t, "h.table-bloom.no-compression.sst", nil, nil) 439 } 440 441 func TestReaderBloomUsed(t *testing.T) { 442 // wantActualNegatives is the minimum number of nonsense words (i.e. false 443 // positives or true negatives) to run through our filter. Some nonsense 444 // words might be rejected even before the filtering step, if they are out 445 // of the [minWord, maxWord] range of keys in the table. 446 wantActualNegatives := 0 447 for _, s := range nonsenseWords { 448 if minWord < s && s < maxWord { 449 wantActualNegatives++ 450 } 451 } 452 453 files := []struct { 454 path string 455 comparer *Comparer 456 }{ 457 {"h.table-bloom.no-compression.sst", nil}, 458 {"h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", fixtureComparer}, 459 } 460 for _, tc := range files { 461 t.Run(tc.path, func(t *testing.T) { 462 for _, degenerate := range []bool{false, true} { 463 t.Run(fmt.Sprintf("degenerate=%t", degenerate), func(t *testing.T) { 464 c := &countingFilterPolicy{ 465 FilterPolicy: bloom.FilterPolicy(10), 466 degenerate: degenerate, 467 } 468 testReader(t, tc.path, tc.comparer, c) 469 470 if c.truePositives != len(wordCount) { 471 t.Errorf("degenerate=%t: true positives: got %d, want %d", degenerate, c.truePositives, len(wordCount)) 472 } 473 if c.falseNegatives != 0 { 474 t.Errorf("degenerate=%t: false negatives: got %d, want %d", degenerate, c.falseNegatives, 0) 475 } 476 477 if got := c.falsePositives + c.trueNegatives; got < wantActualNegatives { 478 t.Errorf("degenerate=%t: actual negatives (false positives + true negatives): "+ 479 "got %d (%d + %d), want >= %d", 480 degenerate, got, c.falsePositives, c.trueNegatives, wantActualNegatives) 481 } 482 483 if !degenerate { 484 // The true negative count should be much greater than the false 485 // positive count. 486 if c.trueNegatives < 10*c.falsePositives { 487 t.Errorf("degenerate=%t: true negative to false positive ratio (%d:%d) is too small", 488 degenerate, c.trueNegatives, c.falsePositives) 489 } 490 } 491 }) 492 } 493 }) 494 } 495 } 496 497 func TestBloomFilterFalsePositiveRate(t *testing.T) { 498 f, err := os.Open(filepath.FromSlash("testdata/h.table-bloom.no-compression.sst")) 499 require.NoError(t, err) 500 501 c := &countingFilterPolicy{ 502 FilterPolicy: bloom.FilterPolicy(1), 503 } 504 r, err := newReader(f, ReaderOptions{ 505 Filters: map[string]FilterPolicy{ 506 c.Name(): c, 507 }, 508 }) 509 require.NoError(t, err) 510 511 const n = 10000 512 // key is a buffer that will be re-used for n Get calls, each with a 513 // different key. The "m" in the 2-byte prefix means that the key falls in 514 // the [minWord, maxWord] range and so will not be rejected prior to 515 // applying the Bloom filter. The "!" in the 2-byte prefix means that the 516 // key is not actually in the table. The filter will only see actual 517 // negatives: false positives or true negatives. 518 key := []byte("m!....") 519 for i := 0; i < n; i++ { 520 binary.LittleEndian.PutUint32(key[2:6], uint32(i)) 521 r.get(key) 522 } 523 524 if c.truePositives != 0 { 525 t.Errorf("true positives: got %d, want 0", c.truePositives) 526 } 527 if c.falseNegatives != 0 { 528 t.Errorf("false negatives: got %d, want 0", c.falseNegatives) 529 } 530 if got := c.falsePositives + c.trueNegatives; got != n { 531 t.Errorf("actual negatives (false positives + true negatives): got %d (%d + %d), want %d", 532 got, c.falsePositives, c.trueNegatives, n) 533 } 534 535 // According the the comments in the C++ LevelDB code, the false positive 536 // rate should be approximately 1% for for bloom.FilterPolicy(10). The 10 537 // was the parameter used to write the .sst file. When reading the file, 538 // the 1 in the bloom.FilterPolicy(1) above doesn't matter, only the 539 // bloom.FilterPolicy matters. 540 if got := float64(100*c.falsePositives) / n; got < 0.2 || 5 < got { 541 t.Errorf("false positive rate: got %v%%, want approximately 1%%", got) 542 } 543 544 require.NoError(t, r.Close()) 545 } 546 547 type countingFilterPolicy struct { 548 FilterPolicy 549 degenerate bool 550 551 truePositives int 552 falsePositives int 553 falseNegatives int 554 trueNegatives int 555 } 556 557 func (c *countingFilterPolicy) MayContain(ftype FilterType, filter, key []byte) bool { 558 got := true 559 if c.degenerate { 560 // When degenerate is true, we override the embedded FilterPolicy's 561 // MayContain method to always return true. Doing so is a valid, if 562 // inefficient, implementation of the FilterPolicy interface. 563 } else { 564 got = c.FilterPolicy.MayContain(ftype, filter, key) 565 } 566 _, want := wordCount[string(key)] 567 568 switch { 569 case got && want: 570 c.truePositives++ 571 case got && !want: 572 c.falsePositives++ 573 case !got && want: 574 c.falseNegatives++ 575 case !got && !want: 576 c.trueNegatives++ 577 } 578 return got 579 } 580 581 func TestWriterRoundTrip(t *testing.T) { 582 blockSizes := []int{100, 1000, 2048, 4096, math.MaxInt32} 583 for _, blockSize := range blockSizes { 584 for _, indexBlockSize := range blockSizes { 585 for name, fp := range map[string]FilterPolicy{ 586 "none": nil, 587 "bloom10bit": bloom.FilterPolicy(10), 588 } { 589 t.Run(fmt.Sprintf("bloom=%s", name), func(t *testing.T) { 590 f, err := build(DefaultCompression, fp, TableFilter, 591 nil, nil, blockSize, indexBlockSize) 592 require.NoError(t, err) 593 594 // Check that we can read a freshly made table. 595 require.NoError(t, check(f, nil, nil)) 596 }) 597 } 598 } 599 } 600 } 601 602 func TestFinalBlockIsWritten(t *testing.T) { 603 keys := []string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J"} 604 valueLengths := []int{0, 1, 22, 28, 33, 40, 50, 61, 87, 100, 143, 200} 605 xxx := bytes.Repeat([]byte("x"), valueLengths[len(valueLengths)-1]) 606 for _, blockSize := range []int{5, 10, 25, 50, 100} { 607 for _, indexBlockSize := range []int{5, 10, 25, 50, 100, math.MaxInt32} { 608 for nk := 0; nk <= len(keys); nk++ { 609 loop: 610 for _, vLen := range valueLengths { 611 got, memFS := 0, vfs.NewMem() 612 613 wf, err := memFS.Create("foo") 614 if err != nil { 615 t.Errorf("nk=%d, vLen=%d: memFS create: %v", nk, vLen, err) 616 continue 617 } 618 w := NewWriter(objstorageprovider.NewFileWritable(wf), WriterOptions{ 619 BlockSize: blockSize, 620 IndexBlockSize: indexBlockSize, 621 }) 622 for _, k := range keys[:nk] { 623 if err := w.Add(InternalKey{UserKey: []byte(k)}, xxx[:vLen]); err != nil { 624 t.Errorf("nk=%d, vLen=%d: set: %v", nk, vLen, err) 625 continue loop 626 } 627 } 628 if err := w.Close(); err != nil { 629 t.Errorf("nk=%d, vLen=%d: writer close: %v", nk, vLen, err) 630 continue 631 } 632 633 rf, err := memFS.Open("foo") 634 if err != nil { 635 t.Errorf("nk=%d, vLen=%d: memFS open: %v", nk, vLen, err) 636 continue 637 } 638 r, err := newReader(rf, ReaderOptions{}) 639 if err != nil { 640 t.Errorf("nk=%d, vLen=%d: reader open: %v", nk, vLen, err) 641 } 642 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 643 require.NoError(t, err) 644 i := newIterAdapter(iter) 645 for valid := i.First(); valid; valid = i.Next() { 646 got++ 647 } 648 if err := i.Close(); err != nil { 649 t.Errorf("nk=%d, vLen=%d: Iterator close: %v", nk, vLen, err) 650 continue 651 } 652 if err := r.Close(); err != nil { 653 t.Errorf("nk=%d, vLen=%d: reader close: %v", nk, vLen, err) 654 continue 655 } 656 657 if got != nk { 658 t.Errorf("nk=%2d, vLen=%3d: got %2d keys, want %2d", nk, vLen, got, nk) 659 continue 660 } 661 } 662 } 663 } 664 } 665 } 666 667 func TestReaderGlobalSeqNum(t *testing.T) { 668 f, err := os.Open(filepath.FromSlash("testdata/h.sst")) 669 require.NoError(t, err) 670 671 r, err := newReader(f, ReaderOptions{}) 672 require.NoError(t, err) 673 674 const globalSeqNum = 42 675 r.Properties.GlobalSeqNum = globalSeqNum 676 677 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 678 require.NoError(t, err) 679 i := newIterAdapter(iter) 680 for valid := i.First(); valid; valid = i.Next() { 681 if globalSeqNum != i.Key().SeqNum() { 682 t.Fatalf("expected %d, but found %d", globalSeqNum, i.Key().SeqNum()) 683 } 684 } 685 require.NoError(t, i.Close()) 686 require.NoError(t, r.Close()) 687 } 688 689 func TestMetaIndexEntriesSorted(t *testing.T) { 690 f, err := build(DefaultCompression, nil, /* filter policy */ 691 TableFilter, nil, nil, 4096, 4096) 692 require.NoError(t, err) 693 694 r, err := newReader(f, ReaderOptions{}) 695 require.NoError(t, err) 696 697 b, err := r.readBlock(context.Background(), r.metaIndexBH, nil, nil, nil, nil) 698 require.NoError(t, err) 699 defer b.Release() 700 701 i, err := newRawBlockIter(bytes.Compare, b.Get()) 702 require.NoError(t, err) 703 704 var keys []string 705 for valid := i.First(); valid; valid = i.Next() { 706 keys = append(keys, string(i.Key().UserKey)) 707 } 708 if !sort.StringsAreSorted(keys) { 709 t.Fatalf("metaindex block out of order: %v", keys) 710 } 711 712 require.NoError(t, i.Close()) 713 require.NoError(t, r.Close()) 714 } 715 716 func TestFooterRoundTrip(t *testing.T) { 717 buf := make([]byte, 100+maxFooterLen) 718 for format := TableFormatLevelDB; format < TableFormatMax; format++ { 719 t.Run(fmt.Sprintf("format=%s", format), func(t *testing.T) { 720 checksums := []ChecksumType{ChecksumTypeCRC32c} 721 if format != TableFormatLevelDB { 722 checksums = []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64} 723 } 724 for _, checksum := range checksums { 725 t.Run(fmt.Sprintf("checksum=%d", checksum), func(t *testing.T) { 726 footer := footer{ 727 format: format, 728 checksum: checksum, 729 metaindexBH: BlockHandle{Offset: 1, Length: 2}, 730 indexBH: BlockHandle{Offset: 3, Length: 4}, 731 } 732 for _, offset := range []int64{0, 1, 100} { 733 t.Run(fmt.Sprintf("offset=%d", offset), func(t *testing.T) { 734 mem := vfs.NewMem() 735 f, err := mem.Create("test") 736 require.NoError(t, err) 737 738 _, err = f.Write(buf[:offset]) 739 require.NoError(t, err) 740 741 encoded := footer.encode(buf[100:]) 742 _, err = f.Write(encoded) 743 require.NoError(t, err) 744 require.NoError(t, f.Close()) 745 746 footer.footerBH.Offset = uint64(offset) 747 footer.footerBH.Length = uint64(len(encoded)) 748 749 f, err = mem.Open("test") 750 require.NoError(t, err) 751 752 readable, err := NewSimpleReadable(f) 753 require.NoError(t, err) 754 755 result, err := readFooter(readable) 756 require.NoError(t, err) 757 require.NoError(t, readable.Close()) 758 759 if diff := pretty.Diff(footer, result); diff != nil { 760 t.Fatalf("expected %+v, but found %+v\n%s", 761 footer, result, strings.Join(diff, "\n")) 762 } 763 }) 764 } 765 }) 766 } 767 }) 768 } 769 } 770 771 func TestReadFooter(t *testing.T) { 772 encode := func(format TableFormat, checksum ChecksumType) string { 773 f := footer{ 774 format: format, 775 checksum: checksum, 776 } 777 return string(f.encode(make([]byte, maxFooterLen))) 778 } 779 780 testCases := []struct { 781 encoded string 782 expected string 783 }{ 784 {strings.Repeat("a", minFooterLen-1), "file size is too small"}, 785 {strings.Repeat("a", levelDBFooterLen), "bad magic number"}, 786 {strings.Repeat("a", rocksDBFooterLen), "bad magic number"}, 787 {encode(TableFormatLevelDB, 0)[1:], "file size is too small"}, 788 {encode(TableFormatRocksDBv2, 0)[1:], "footer too short"}, 789 {encode(TableFormatRocksDBv2, ChecksumTypeNone), "unsupported checksum type"}, 790 {encode(TableFormatRocksDBv2, ChecksumTypeXXHash), "unsupported checksum type"}, 791 } 792 for _, c := range testCases { 793 t.Run("", func(t *testing.T) { 794 mem := vfs.NewMem() 795 f, err := mem.Create("test") 796 require.NoError(t, err) 797 798 _, err = f.Write([]byte(c.encoded)) 799 require.NoError(t, err) 800 require.NoError(t, f.Close()) 801 802 f, err = mem.Open("test") 803 require.NoError(t, err) 804 805 readable, err := NewSimpleReadable(f) 806 require.NoError(t, err) 807 808 if _, err := readFooter(readable); err == nil { 809 t.Fatalf("expected %q, but found success", c.expected) 810 } else if !strings.Contains(err.Error(), c.expected) { 811 t.Fatalf("expected %q, but found %v", c.expected, err) 812 } 813 }) 814 } 815 } 816 817 type errorPropCollector struct{} 818 819 func (errorPropCollector) Add(key InternalKey, _ []byte) error { 820 return errors.Errorf("add %s failed", key) 821 } 822 823 func (errorPropCollector) Finish(_ map[string]string) error { 824 return errors.Errorf("finish failed") 825 } 826 827 func (errorPropCollector) Name() string { 828 return "errorPropCollector" 829 } 830 831 func TestTablePropertyCollectorErrors(t *testing.T) { 832 833 var testcases map[string]func(w *Writer) error = map[string]func(w *Writer) error{ 834 "add a#0,1 failed": func(w *Writer) error { 835 return w.Set([]byte("a"), []byte("b")) 836 }, 837 "add c#0,0 failed": func(w *Writer) error { 838 return w.Delete([]byte("c")) 839 }, 840 "add d#0,15 failed": func(w *Writer) error { 841 return w.DeleteRange([]byte("d"), []byte("e")) 842 }, 843 "add f#0,2 failed": func(w *Writer) error { 844 return w.Merge([]byte("f"), []byte("g")) 845 }, 846 "finish failed": func(w *Writer) error { 847 return w.Close() 848 }, 849 } 850 851 for e, fun := range testcases { 852 mem := vfs.NewMem() 853 f, err := mem.Create("foo") 854 require.NoError(t, err) 855 856 var opts WriterOptions 857 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 858 func() TablePropertyCollector { 859 return errorPropCollector{} 860 }) 861 862 w := NewWriter(objstorageprovider.NewFileWritable(f), opts) 863 864 require.Regexp(t, e, fun(w)) 865 } 866 }