github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/reader_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/binary" 11 "fmt" 12 "io" 13 "math" 14 "os" 15 "path" 16 "path/filepath" 17 "strings" 18 "testing" 19 "time" 20 21 "github.com/cockroachdb/datadriven" 22 "github.com/cockroachdb/errors" 23 "github.com/cockroachdb/pebble/bloom" 24 "github.com/cockroachdb/pebble/internal/base" 25 "github.com/cockroachdb/pebble/internal/cache" 26 "github.com/cockroachdb/pebble/internal/humanize" 27 "github.com/cockroachdb/pebble/internal/manifest" 28 "github.com/cockroachdb/pebble/internal/testkeys" 29 "github.com/cockroachdb/pebble/objstorage" 30 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 31 "github.com/cockroachdb/pebble/vfs" 32 "github.com/cockroachdb/pebble/vfs/errorfs" 33 "github.com/stretchr/testify/require" 34 "golang.org/x/exp/rand" 35 ) 36 37 // get is a testing helper that simulates a read and helps verify bloom filters 38 // until they are available through iterators. 39 func (r *Reader) get(key []byte) (value []byte, err error) { 40 if r.err != nil { 41 return nil, r.err 42 } 43 44 if r.tableFilter != nil { 45 dataH, err := r.readFilter(context.Background(), nil /* stats */, nil) 46 if err != nil { 47 return nil, err 48 } 49 var lookupKey []byte 50 if r.Split != nil { 51 lookupKey = key[:r.Split(key)] 52 } else { 53 lookupKey = key 54 } 55 mayContain := r.tableFilter.mayContain(dataH.Get(), lookupKey) 56 dataH.Release() 57 if !mayContain { 58 return nil, base.ErrNotFound 59 } 60 } 61 62 i, err := r.NewIter(nil /* lower */, nil /* upper */) 63 if err != nil { 64 return nil, err 65 } 66 var v base.LazyValue 67 ikey, v := i.SeekGE(key, base.SeekGEFlagsNone) 68 value, _, err = v.Value(nil) 69 if err != nil { 70 return nil, err 71 } 72 73 if ikey == nil || r.Compare(key, ikey.UserKey) != 0 { 74 err := i.Close() 75 if err == nil { 76 err = base.ErrNotFound 77 } 78 return nil, err 79 } 80 81 // The value will be "freed" when the iterator is closed, so make a copy 82 // which will outlast the lifetime of the iterator. 83 newValue := make([]byte, len(value)) 84 copy(newValue, value) 85 if err := i.Close(); err != nil { 86 return nil, err 87 } 88 return newValue, nil 89 } 90 91 // iterAdapter adapts the new Iterator API which returns the key and value from 92 // positioning methods (Seek*, First, Last, Next, Prev) to the old API which 93 // returned a boolean corresponding to Valid. Only used by test code. 94 type iterAdapter struct { 95 Iterator 96 key *InternalKey 97 val []byte 98 } 99 100 func newIterAdapter(iter Iterator) *iterAdapter { 101 return &iterAdapter{ 102 Iterator: iter, 103 } 104 } 105 106 func (i *iterAdapter) update(key *InternalKey, val base.LazyValue) bool { 107 i.key = key 108 if v, _, err := val.Value(nil); err != nil { 109 i.key = nil 110 i.val = nil 111 } else { 112 i.val = v 113 } 114 return i.key != nil 115 } 116 117 func (i *iterAdapter) String() string { 118 return "iter-adapter" 119 } 120 121 func (i *iterAdapter) SeekGE(key []byte, flags base.SeekGEFlags) bool { 122 return i.update(i.Iterator.SeekGE(key, flags)) 123 } 124 125 func (i *iterAdapter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) bool { 126 return i.update(i.Iterator.SeekPrefixGE(prefix, key, flags)) 127 } 128 129 func (i *iterAdapter) SeekLT(key []byte, flags base.SeekLTFlags) bool { 130 return i.update(i.Iterator.SeekLT(key, flags)) 131 } 132 133 func (i *iterAdapter) First() bool { 134 return i.update(i.Iterator.First()) 135 } 136 137 func (i *iterAdapter) Last() bool { 138 return i.update(i.Iterator.Last()) 139 } 140 141 func (i *iterAdapter) Next() bool { 142 return i.update(i.Iterator.Next()) 143 } 144 145 func (i *iterAdapter) NextPrefix(succKey []byte) bool { 146 return i.update(i.Iterator.NextPrefix(succKey)) 147 } 148 149 func (i *iterAdapter) NextIgnoreResult() { 150 i.Iterator.Next() 151 i.update(nil, base.LazyValue{}) 152 } 153 154 func (i *iterAdapter) Prev() bool { 155 return i.update(i.Iterator.Prev()) 156 } 157 158 func (i *iterAdapter) Key() *InternalKey { 159 return i.key 160 } 161 162 func (i *iterAdapter) Value() []byte { 163 return i.val 164 } 165 166 func (i *iterAdapter) Valid() bool { 167 return i.key != nil 168 } 169 170 func (i *iterAdapter) SetBounds(lower, upper []byte) { 171 i.Iterator.SetBounds(lower, upper) 172 i.key = nil 173 } 174 175 func (i *iterAdapter) SetContext(ctx context.Context) { 176 i.Iterator.SetContext(ctx) 177 } 178 179 func TestVirtualReader(t *testing.T) { 180 // A faux filenum used to create fake filemetadata for testing. 181 var fileNum int = 1 182 nextFileNum := func() base.FileNum { 183 fileNum++ 184 return base.FileNum(fileNum - 1) 185 } 186 187 // Set during the latest build command. 188 var r *Reader 189 var meta manifest.PhysicalFileMeta 190 var bp BufferPool 191 192 // Set during the latest virtualize command. 193 var vMeta1 manifest.VirtualFileMeta 194 var v VirtualReader 195 196 defer func() { 197 if r != nil { 198 require.NoError(t, r.Close()) 199 bp.Release() 200 } 201 }() 202 203 createPhysicalMeta := func(w *WriterMetadata, r *Reader) (manifest.PhysicalFileMeta, error) { 204 meta := &manifest.FileMetadata{} 205 meta.FileNum = nextFileNum() 206 meta.CreationTime = time.Now().Unix() 207 meta.Size = w.Size 208 meta.SmallestSeqNum = w.SmallestSeqNum 209 meta.LargestSeqNum = w.LargestSeqNum 210 211 if w.HasPointKeys { 212 meta.ExtendPointKeyBounds(r.Compare, w.SmallestPoint, w.LargestPoint) 213 } 214 if w.HasRangeDelKeys { 215 meta.ExtendPointKeyBounds(r.Compare, w.SmallestRangeDel, w.LargestRangeDel) 216 } 217 if w.HasRangeKeys { 218 meta.ExtendRangeKeyBounds(r.Compare, w.SmallestRangeKey, w.LargestRangeKey) 219 } 220 meta.InitPhysicalBacking() 221 222 if err := meta.Validate(r.Compare, r.opts.Comparer.FormatKey); err != nil { 223 return manifest.PhysicalFileMeta{}, err 224 } 225 226 return meta.PhysicalMeta(), nil 227 } 228 229 formatWMeta := func(m *WriterMetadata) string { 230 var b bytes.Buffer 231 if m.HasPointKeys { 232 fmt.Fprintf(&b, "point: [%s-%s]\n", m.SmallestPoint, m.LargestPoint) 233 } 234 if m.HasRangeDelKeys { 235 fmt.Fprintf(&b, "rangedel: [%s-%s]\n", m.SmallestRangeDel, m.LargestRangeDel) 236 } 237 if m.HasRangeKeys { 238 fmt.Fprintf(&b, "rangekey: [%s-%s]\n", m.SmallestRangeKey, m.LargestRangeKey) 239 } 240 fmt.Fprintf(&b, "seqnums: [%d-%d]\n", m.SmallestSeqNum, m.LargestSeqNum) 241 return b.String() 242 } 243 244 formatVirtualReader := func(v *VirtualReader) string { 245 var b bytes.Buffer 246 fmt.Fprintf(&b, "bounds: [%s-%s]\n", v.vState.lower, v.vState.upper) 247 fmt.Fprintf(&b, "filenum: %s\n", v.vState.fileNum.String()) 248 fmt.Fprintf( 249 &b, "props: %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d\n", 250 "NumEntries", 251 v.Properties.NumEntries, 252 "RawKeySize", 253 v.Properties.RawKeySize, 254 "RawValueSize", 255 v.Properties.RawValueSize, 256 "RawPointTombstoneKeySize", 257 v.Properties.RawPointTombstoneKeySize, 258 "RawPointTombstoneValueSize", 259 v.Properties.RawPointTombstoneValueSize, 260 "NumSizedDeletions", 261 v.Properties.NumSizedDeletions, 262 "NumDeletions", 263 v.Properties.NumDeletions, 264 "NumRangeDeletions", 265 v.Properties.NumRangeDeletions, 266 "NumRangeKeyDels", 267 v.Properties.NumRangeKeyDels, 268 "NumRangeKeySets", 269 v.Properties.NumRangeKeySets, 270 "ValueBlocksSize", 271 v.Properties.ValueBlocksSize, 272 ) 273 return b.String() 274 } 275 276 datadriven.RunTest(t, "testdata/virtual_reader", func(t *testing.T, td *datadriven.TestData) string { 277 switch td.Cmd { 278 case "build": 279 if r != nil { 280 bp.Release() 281 _ = r.Close() 282 r = nil 283 meta.FileMetadata = nil 284 vMeta1.FileMetadata = nil 285 v = VirtualReader{} 286 } 287 var wMeta *WriterMetadata 288 var err error 289 writerOpts := &WriterOptions{ 290 TableFormat: TableFormatMax, 291 } 292 // Use a single level index by default. 293 writerOpts.IndexBlockSize = 100000 294 if len(td.CmdArgs) == 1 { 295 if td.CmdArgs[0].String() == "twoLevel" { 296 // Force a two level index. 297 writerOpts.IndexBlockSize = 1 298 writerOpts.BlockSize = 1 299 } 300 } 301 wMeta, r, err = runBuildCmd(td, writerOpts, 0) 302 if err != nil { 303 return err.Error() 304 } 305 bp.Init(5) 306 307 // Create a fake filemetada using the writer meta. 308 meta, err = createPhysicalMeta(wMeta, r) 309 if err != nil { 310 return err.Error() 311 } 312 r.fileNum = meta.FileBacking.DiskFileNum 313 return formatWMeta(wMeta) 314 315 case "virtualize": 316 // virtualize will split the previously built physical sstable into 317 // a single sstable with virtual bounds. The command assumes that 318 // the bounds for the virtual sstable are valid. For the purposes of 319 // this command the bounds must be valid keys. In general, and for 320 // this command, range key/range del spans must also not span across 321 // virtual sstable bounds. 322 if meta.FileMetadata == nil { 323 return "build must be called at least once before virtualize" 324 } 325 if vMeta1.FileMetadata != nil { 326 vMeta1.FileMetadata = nil 327 v = VirtualReader{} 328 } 329 vMeta := &manifest.FileMetadata{ 330 FileBacking: meta.FileBacking, 331 SmallestSeqNum: meta.SmallestSeqNum, 332 LargestSeqNum: meta.LargestSeqNum, 333 Virtual: true, 334 } 335 // Parse the virtualization bounds. 336 bounds := strings.Split(td.CmdArgs[0].String(), "-") 337 vMeta.Smallest = base.ParseInternalKey(bounds[0]) 338 vMeta.Largest = base.ParseInternalKey(bounds[1]) 339 vMeta.FileNum = nextFileNum() 340 var err error 341 vMeta.Size, err = r.EstimateDiskUsage(vMeta.Smallest.UserKey, vMeta.Largest.UserKey) 342 if err != nil { 343 return err.Error() 344 } 345 vMeta.ValidateVirtual(meta.FileMetadata) 346 347 vMeta1 = vMeta.VirtualMeta() 348 v = MakeVirtualReader(r, vMeta1, false /* isForeign */) 349 return formatVirtualReader(&v) 350 351 case "citer": 352 // Creates a compaction iterator from the virtual reader, and then 353 // just scans the keyspace. Which is all a compaction iterator is 354 // used for. This tests the First and Next calls. 355 if vMeta1.FileMetadata == nil { 356 return "virtualize must be called before creating compaction iters" 357 } 358 359 var rp ReaderProvider 360 var bytesIterated uint64 361 iter, err := v.NewCompactionIter(&bytesIterated, CategoryAndQoS{}, nil, rp, &bp) 362 if err != nil { 363 return err.Error() 364 } 365 366 var buf bytes.Buffer 367 for key, val := iter.First(); key != nil; key, val = iter.Next() { 368 fmt.Fprintf(&buf, "%s:%s\n", key.String(), val.InPlaceValue()) 369 } 370 err = iter.Close() 371 if err != nil { 372 return err.Error() 373 } 374 return buf.String() 375 376 case "constrain": 377 if vMeta1.FileMetadata == nil { 378 return "virtualize must be called before constrain" 379 } 380 splits := strings.Split(td.CmdArgs[0].String(), ",") 381 of, ol := []byte(splits[0]), []byte(splits[1]) 382 inclusive, f, l := v.vState.constrainBounds(of, ol, splits[2] == "true") 383 var buf bytes.Buffer 384 buf.Write(f) 385 buf.WriteByte(',') 386 buf.Write(l) 387 buf.WriteByte(',') 388 if inclusive { 389 buf.WriteString("true") 390 } else { 391 buf.WriteString("false") 392 } 393 buf.WriteByte('\n') 394 return buf.String() 395 396 case "scan-range-del": 397 if vMeta1.FileMetadata == nil { 398 return "virtualize must be called before scan-range-del" 399 } 400 iter, err := v.NewRawRangeDelIter() 401 if err != nil { 402 return err.Error() 403 } 404 if iter == nil { 405 return "" 406 } 407 defer iter.Close() 408 409 var buf bytes.Buffer 410 for s := iter.First(); s != nil; s = iter.Next() { 411 fmt.Fprintf(&buf, "%s\n", s) 412 } 413 return buf.String() 414 415 case "scan-range-key": 416 if vMeta1.FileMetadata == nil { 417 return "virtualize must be called before scan-range-key" 418 } 419 iter, err := v.NewRawRangeKeyIter() 420 if err != nil { 421 return err.Error() 422 } 423 if iter == nil { 424 return "" 425 } 426 defer iter.Close() 427 428 var buf bytes.Buffer 429 for s := iter.First(); s != nil; s = iter.Next() { 430 fmt.Fprintf(&buf, "%s\n", s) 431 } 432 return buf.String() 433 434 case "iter": 435 if vMeta1.FileMetadata == nil { 436 return "virtualize must be called before iter" 437 } 438 var lower, upper []byte 439 if len(td.CmdArgs) > 0 { 440 splits := strings.Split(td.CmdArgs[0].String(), "-") 441 lower, upper = []byte(splits[0]), []byte(splits[1]) 442 } 443 444 var stats base.InternalIteratorStats 445 iter, err := v.NewIterWithBlockPropertyFiltersAndContextEtc( 446 context.Background(), lower, upper, nil, false, false, 447 &stats, CategoryAndQoS{}, nil, TrivialReaderProvider{Reader: r}) 448 if err != nil { 449 return err.Error() 450 } 451 return runIterCmd(td, iter, true, runIterCmdStats(&stats)) 452 453 default: 454 return fmt.Sprintf("unknown command: %s", td.Cmd) 455 } 456 }) 457 } 458 459 func TestReader(t *testing.T) { 460 writerOpts := map[string]WriterOptions{ 461 // No bloom filters. 462 "default": {}, 463 "bloom10bit": { 464 // The standard policy. 465 FilterPolicy: bloom.FilterPolicy(10), 466 FilterType: base.TableFilter, 467 }, 468 "bloom1bit": { 469 // A policy with many false positives. 470 FilterPolicy: bloom.FilterPolicy(1), 471 FilterType: base.TableFilter, 472 }, 473 "bloom100bit": { 474 // A policy unlikely to have false positives. 475 FilterPolicy: bloom.FilterPolicy(100), 476 FilterType: base.TableFilter, 477 }, 478 } 479 480 blockSizes := map[string]int{ 481 "1bytes": 1, 482 "5bytes": 5, 483 "10bytes": 10, 484 "25bytes": 25, 485 "Maxbytes": math.MaxInt32, 486 } 487 488 opts := map[string]*Comparer{ 489 "default": testkeys.Comparer, 490 "prefixFilter": fixtureComparer, 491 } 492 493 testDirs := map[string]string{ 494 "default": "testdata/reader", 495 "prefixFilter": "testdata/prefixreader", 496 } 497 498 for format := TableFormatPebblev2; format <= TableFormatMax; format++ { 499 for dName, blockSize := range blockSizes { 500 for iName, indexBlockSize := range blockSizes { 501 for lName, tableOpt := range writerOpts { 502 for oName, cmp := range opts { 503 tableOpt.BlockSize = blockSize 504 tableOpt.Comparer = cmp 505 tableOpt.IndexBlockSize = indexBlockSize 506 tableOpt.TableFormat = format 507 508 t.Run( 509 fmt.Sprintf("format=%d,opts=%s,writerOpts=%s,blockSize=%s,indexSize=%s", 510 format, oName, lName, dName, iName), 511 func(t *testing.T) { 512 runTestReader( 513 t, tableOpt, testDirs[oName], nil /* Reader */, true) 514 }) 515 } 516 } 517 } 518 } 519 } 520 } 521 522 func TestReaderHideObsolete(t *testing.T) { 523 blockSizes := map[string]int{ 524 "1bytes": 1, 525 "5bytes": 5, 526 "10bytes": 10, 527 "25bytes": 25, 528 "Maxbytes": math.MaxInt32, 529 } 530 for dName, blockSize := range blockSizes { 531 opts := WriterOptions{ 532 TableFormat: TableFormatPebblev4, 533 BlockSize: blockSize, 534 IndexBlockSize: blockSize, 535 Comparer: testkeys.Comparer, 536 } 537 t.Run(fmt.Sprintf("blockSize=%s", dName), func(t *testing.T) { 538 runTestReader( 539 t, opts, "testdata/reader_hide_obsolete", 540 nil /* Reader */, true) 541 }) 542 } 543 } 544 545 func TestHamletReader(t *testing.T) { 546 for _, fixture := range TestFixtures { 547 f, err := os.Open(filepath.Join("testdata", fixture.Filename)) 548 require.NoError(t, err) 549 550 r, err := newReader(f, ReaderOptions{}) 551 require.NoError(t, err) 552 553 t.Run( 554 fmt.Sprintf("sst=%s", fixture.Filename), 555 func(t *testing.T) { 556 runTestReader(t, WriterOptions{}, "testdata/hamletreader", r, false) 557 }, 558 ) 559 } 560 } 561 562 func forEveryTableFormat[I any]( 563 t *testing.T, formatTable [NumTableFormats]I, runTest func(*testing.T, TableFormat, I), 564 ) { 565 t.Helper() 566 for tf := TableFormatUnspecified + 1; tf <= TableFormatMax; tf++ { 567 t.Run(tf.String(), func(t *testing.T) { 568 runTest(t, tf, formatTable[tf]) 569 }) 570 } 571 } 572 573 func TestReaderStats(t *testing.T) { 574 forEveryTableFormat[string](t, 575 [NumTableFormats]string{ 576 TableFormatUnspecified: "", 577 TableFormatLevelDB: "testdata/readerstats_LevelDB", 578 TableFormatRocksDBv2: "testdata/readerstats_LevelDB", 579 TableFormatPebblev1: "testdata/readerstats_LevelDB", 580 TableFormatPebblev2: "testdata/readerstats_LevelDB", 581 TableFormatPebblev3: "testdata/readerstats_Pebblev3", 582 TableFormatPebblev4: "testdata/readerstats_Pebblev3", 583 }, func(t *testing.T, format TableFormat, dir string) { 584 if dir == "" { 585 t.Skip() 586 } 587 writerOpt := WriterOptions{ 588 BlockSize: 32 << 10, 589 IndexBlockSize: 32 << 10, 590 Comparer: testkeys.Comparer, 591 TableFormat: format, 592 } 593 runTestReader(t, writerOpt, dir, nil /* Reader */, false /* printValue */) 594 }) 595 } 596 597 func TestReaderWithBlockPropertyFilter(t *testing.T) { 598 // Some of these tests examine internal iterator state, so they require 599 // determinism. When the invariants tag is set, disableBoundsOpt may disable 600 // the bounds optimization depending on the iterator pointer address. This 601 // can add nondeterminism to the internal iterator statae. Disable this 602 // nondeterminism for the duration of this test. 603 ensureBoundsOptDeterminism = true 604 defer func() { ensureBoundsOptDeterminism = false }() 605 606 forEveryTableFormat[string](t, 607 [NumTableFormats]string{ 608 TableFormatUnspecified: "", // Block properties unsupported 609 TableFormatLevelDB: "", // Block properties unsupported 610 TableFormatRocksDBv2: "", // Block properties unsupported 611 TableFormatPebblev1: "", // Block properties unsupported 612 TableFormatPebblev2: "testdata/reader_bpf/Pebblev2", 613 TableFormatPebblev3: "testdata/reader_bpf/Pebblev3", 614 TableFormatPebblev4: "testdata/reader_bpf/Pebblev3", 615 }, func(t *testing.T, format TableFormat, dir string) { 616 if dir == "" { 617 t.Skip("Block-properties unsupported") 618 } 619 writerOpt := WriterOptions{ 620 Comparer: testkeys.Comparer, 621 TableFormat: format, 622 BlockPropertyCollectors: []func() BlockPropertyCollector{NewTestKeysBlockPropertyCollector}, 623 } 624 runTestReader(t, writerOpt, dir, nil /* Reader */, false) 625 }) 626 } 627 628 func TestInjectedErrors(t *testing.T) { 629 for _, fixture := range TestFixtures { 630 run := func(i int) (reterr error) { 631 f, err := vfs.Default.Open(filepath.Join("testdata", fixture.Filename)) 632 require.NoError(t, err) 633 634 r, err := newReader(errorfs.WrapFile(f, errorfs.ErrInjected.If(errorfs.OnIndex(int32(i)))), ReaderOptions{}) 635 if err != nil { 636 return firstError(err, f.Close()) 637 } 638 defer func() { reterr = firstError(reterr, r.Close()) }() 639 640 _, err = r.EstimateDiskUsage([]byte("borrower"), []byte("lender")) 641 if err != nil { 642 return err 643 } 644 645 iter, err := r.NewIter(nil, nil) 646 if err != nil { 647 return err 648 } 649 defer func() { reterr = firstError(reterr, iter.Close()) }() 650 for k, v := iter.First(); k != nil; k, v = iter.Next() { 651 val, _, err := v.Value(nil) 652 if err != nil { 653 return err 654 } 655 if val == nil { 656 break 657 } 658 } 659 if err = iter.Error(); err != nil { 660 return err 661 } 662 return nil 663 } 664 for i := 0; ; i++ { 665 err := run(i) 666 if errors.Is(err, errorfs.ErrInjected) { 667 t.Logf("%q, index %d: %s", fixture.Filename, i, err) 668 continue 669 } 670 if err != nil { 671 t.Errorf("%q, index %d: non-injected error: %+v", fixture.Filename, i, err) 672 break 673 } 674 t.Logf("%q: no error at index %d", fixture.Filename, i) 675 break 676 } 677 } 678 } 679 680 func TestInvalidReader(t *testing.T) { 681 invalid, err := NewSimpleReadable(vfs.NewMemFile([]byte("invalid sst bytes"))) 682 if err != nil { 683 t.Fatal(err) 684 } 685 testCases := []struct { 686 readable objstorage.Readable 687 expected string 688 }{ 689 {nil, "nil file"}, 690 {invalid, "invalid table"}, 691 } 692 for _, tc := range testCases { 693 r, err := NewReader(tc.readable, ReaderOptions{}) 694 if !strings.Contains(err.Error(), tc.expected) { 695 t.Fatalf("expected %q, but found %q", tc.expected, err.Error()) 696 } 697 if r != nil { 698 t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error()) 699 } 700 } 701 } 702 703 func indexLayoutString(t *testing.T, r *Reader) string { 704 indexH, err := r.readIndex(context.Background(), nil, nil) 705 require.NoError(t, err) 706 defer indexH.Release() 707 var buf strings.Builder 708 twoLevelIndex := r.Properties.IndexType == twoLevelIndex 709 buf.WriteString("index entries:\n") 710 iter, err := newBlockIter(r.Compare, indexH.Get()) 711 defer func() { 712 require.NoError(t, iter.Close()) 713 }() 714 require.NoError(t, err) 715 for key, value := iter.First(); key != nil; key, value = iter.Next() { 716 bh, err := decodeBlockHandleWithProperties(value.InPlaceValue()) 717 require.NoError(t, err) 718 fmt.Fprintf(&buf, " %s: size %d\n", string(key.UserKey), bh.Length) 719 if twoLevelIndex { 720 b, err := r.readBlock( 721 context.Background(), bh.BlockHandle, nil, nil, nil, nil, nil) 722 require.NoError(t, err) 723 defer b.Release() 724 iter2, err := newBlockIter(r.Compare, b.Get()) 725 defer func() { 726 require.NoError(t, iter2.Close()) 727 }() 728 require.NoError(t, err) 729 for key, value := iter2.First(); key != nil; key, value = iter2.Next() { 730 bh, err := decodeBlockHandleWithProperties(value.InPlaceValue()) 731 require.NoError(t, err) 732 fmt.Fprintf(&buf, " %s: size %d\n", string(key.UserKey), bh.Length) 733 } 734 } 735 } 736 return buf.String() 737 } 738 739 func runTestReader(t *testing.T, o WriterOptions, dir string, r *Reader, printValue bool) { 740 datadriven.Walk(t, dir, func(t *testing.T, path string) { 741 defer func() { 742 if r != nil { 743 r.Close() 744 r = nil 745 } 746 }() 747 748 datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { 749 switch d.Cmd { 750 case "build": 751 if r != nil { 752 r.Close() 753 r = nil 754 } 755 var cacheSize int 756 var printLayout bool 757 d.MaybeScanArgs(t, "cache-size", &cacheSize) 758 d.MaybeScanArgs(t, "print-layout", &printLayout) 759 d.MaybeScanArgs(t, "block-size", &o.BlockSize) 760 d.MaybeScanArgs(t, "index-block-size", &o.IndexBlockSize) 761 762 var err error 763 _, r, err = runBuildCmd(d, &o, cacheSize) 764 if err != nil { 765 return err.Error() 766 } 767 if printLayout { 768 return indexLayoutString(t, r) 769 } 770 return "" 771 772 case "iter": 773 seqNum, err := scanGlobalSeqNum(d) 774 if err != nil { 775 return err.Error() 776 } 777 var stats base.InternalIteratorStats 778 r.Properties.GlobalSeqNum = seqNum 779 var bpfs []BlockPropertyFilter 780 if d.HasArg("block-property-filter") { 781 var filterMin, filterMax uint64 782 d.ScanArgs(t, "block-property-filter", &filterMin, &filterMax) 783 bpf := NewTestKeysBlockPropertyFilter(filterMin, filterMax) 784 bpfs = append(bpfs, bpf) 785 } 786 hideObsoletePoints := false 787 if d.HasArg("hide-obsolete-points") { 788 d.ScanArgs(t, "hide-obsolete-points", &hideObsoletePoints) 789 if hideObsoletePoints { 790 hideObsoletePoints, bpfs = r.TryAddBlockPropertyFilterForHideObsoletePoints( 791 InternalKeySeqNumMax, InternalKeySeqNumMax-1, bpfs) 792 require.True(t, hideObsoletePoints) 793 } 794 } 795 var filterer *BlockPropertiesFilterer 796 if len(bpfs) > 0 { 797 filterer = newBlockPropertiesFilterer(bpfs, nil) 798 intersects, err := 799 filterer.intersectsUserPropsAndFinishInit(r.Properties.UserProperties) 800 if err != nil { 801 return err.Error() 802 } 803 if !intersects { 804 return "table does not intersect BlockPropertyFilter" 805 } 806 } 807 iter, err := r.NewIterWithBlockPropertyFiltersAndContextEtc( 808 context.Background(), 809 nil, /* lower */ 810 nil, /* upper */ 811 filterer, 812 hideObsoletePoints, 813 true, /* use filter block */ 814 &stats, 815 CategoryAndQoS{}, 816 nil, 817 TrivialReaderProvider{Reader: r}, 818 ) 819 if err != nil { 820 return err.Error() 821 } 822 return runIterCmd(d, iter, printValue, runIterCmdStats(&stats)) 823 824 case "get": 825 var b bytes.Buffer 826 for _, k := range strings.Split(d.Input, "\n") { 827 v, err := r.get([]byte(k)) 828 if err != nil { 829 fmt.Fprintf(&b, "<err: %s>\n", err) 830 } else { 831 fmt.Fprintln(&b, string(v)) 832 } 833 } 834 return b.String() 835 default: 836 return fmt.Sprintf("unknown command: %s", d.Cmd) 837 } 838 }) 839 }) 840 } 841 842 func TestReaderCheckComparerMerger(t *testing.T) { 843 const testTable = "test" 844 845 testComparer := &base.Comparer{ 846 Name: "test.comparer", 847 Compare: base.DefaultComparer.Compare, 848 Equal: base.DefaultComparer.Equal, 849 Separator: base.DefaultComparer.Separator, 850 Successor: base.DefaultComparer.Successor, 851 } 852 testMerger := &base.Merger{ 853 Name: "test.merger", 854 Merge: base.DefaultMerger.Merge, 855 } 856 writerOpts := WriterOptions{ 857 Comparer: testComparer, 858 MergerName: "test.merger", 859 } 860 861 mem := vfs.NewMem() 862 f0, err := mem.Create(testTable) 863 require.NoError(t, err) 864 865 w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts) 866 require.NoError(t, w.Set([]byte("test"), nil)) 867 require.NoError(t, w.Close()) 868 869 testCases := []struct { 870 comparers []*base.Comparer 871 mergers []*base.Merger 872 expected string 873 }{ 874 { 875 []*base.Comparer{testComparer}, 876 []*base.Merger{testMerger}, 877 "", 878 }, 879 { 880 []*base.Comparer{testComparer, base.DefaultComparer}, 881 []*base.Merger{testMerger, base.DefaultMerger}, 882 "", 883 }, 884 { 885 []*base.Comparer{}, 886 []*base.Merger{testMerger}, 887 "unknown comparer test.comparer", 888 }, 889 { 890 []*base.Comparer{base.DefaultComparer}, 891 []*base.Merger{testMerger}, 892 "unknown comparer test.comparer", 893 }, 894 { 895 []*base.Comparer{testComparer}, 896 []*base.Merger{}, 897 "unknown merger test.merger", 898 }, 899 { 900 []*base.Comparer{testComparer}, 901 []*base.Merger{base.DefaultMerger}, 902 "unknown merger test.merger", 903 }, 904 } 905 906 for _, c := range testCases { 907 t.Run("", func(t *testing.T) { 908 f1, err := mem.Open(testTable) 909 require.NoError(t, err) 910 911 comparers := make(Comparers) 912 for _, comparer := range c.comparers { 913 comparers[comparer.Name] = comparer 914 } 915 mergers := make(Mergers) 916 for _, merger := range c.mergers { 917 mergers[merger.Name] = merger 918 } 919 920 r, err := newReader(f1, ReaderOptions{}, comparers, mergers) 921 if err != nil { 922 if r != nil { 923 t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error()) 924 } 925 if !strings.HasSuffix(err.Error(), c.expected) { 926 t.Fatalf("expected %q, but found %q", c.expected, err.Error()) 927 } 928 } else if c.expected != "" { 929 t.Fatalf("expected %q, but found success", c.expected) 930 } 931 if r != nil { 932 _ = r.Close() 933 } 934 }) 935 } 936 } 937 func checkValidPrefix(prefix, key []byte) bool { 938 return prefix == nil || bytes.HasPrefix(key, prefix) 939 } 940 941 func testBytesIteratedWithCompression( 942 t *testing.T, 943 compression Compression, 944 allowedSizeDeviationPercent uint64, 945 blockSizes []int, 946 maxNumEntries []uint64, 947 ) { 948 for i, blockSize := range blockSizes { 949 for _, indexBlockSize := range blockSizes { 950 for _, numEntries := range []uint64{0, 1, maxNumEntries[i]} { 951 r := buildTestTable(t, numEntries, blockSize, indexBlockSize, compression) 952 var bytesIterated, prevIterated uint64 953 var pool BufferPool 954 pool.Init(5) 955 citer, err := r.NewCompactionIter( 956 &bytesIterated, CategoryAndQoS{}, nil, TrivialReaderProvider{Reader: r}, &pool) 957 require.NoError(t, err) 958 959 for key, _ := citer.First(); key != nil; key, _ = citer.Next() { 960 if bytesIterated < prevIterated { 961 t.Fatalf("bytesIterated moved backward: %d < %d", bytesIterated, prevIterated) 962 } 963 prevIterated = bytesIterated 964 } 965 966 expected := r.Properties.DataSize 967 allowedSizeDeviation := expected * allowedSizeDeviationPercent / 100 968 // There is some inaccuracy due to compression estimation. 969 if bytesIterated < expected-allowedSizeDeviation || bytesIterated > expected+allowedSizeDeviation { 970 t.Fatalf("bytesIterated: got %d, want %d", bytesIterated, expected) 971 } 972 973 require.NoError(t, citer.Close()) 974 require.NoError(t, r.Close()) 975 pool.Release() 976 } 977 } 978 } 979 } 980 981 func TestBytesIterated(t *testing.T) { 982 blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32} 983 t.Run("Compressed", func(t *testing.T) { 984 testBytesIteratedWithCompression(t, SnappyCompression, 1, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5}) 985 }) 986 t.Run("Uncompressed", func(t *testing.T) { 987 testBytesIteratedWithCompression(t, NoCompression, 0, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5}) 988 }) 989 t.Run("Zstd", func(t *testing.T) { 990 // compression with zstd is extremely slow with small block size (esp the nocgo version). 991 // use less numEntries to make the test run at reasonable speed (under 10 seconds). 992 maxNumEntries := []uint64{1e2, 1e2, 1e3, 4e3, 1e5} 993 if useStandardZstdLib { 994 maxNumEntries = []uint64{1e3, 1e3, 1e4, 4e4, 1e5} 995 } 996 testBytesIteratedWithCompression(t, ZstdCompression, 1, blockSizes, maxNumEntries) 997 }) 998 } 999 1000 func TestCompactionIteratorSetupForCompaction(t *testing.T) { 1001 tmpDir := path.Join(t.TempDir()) 1002 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.Default, tmpDir)) 1003 require.NoError(t, err) 1004 defer provider.Close() 1005 blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32} 1006 for _, blockSize := range blockSizes { 1007 for _, indexBlockSize := range blockSizes { 1008 for _, numEntries := range []uint64{0, 1, 1e5} { 1009 r := buildTestTableWithProvider(t, provider, numEntries, blockSize, indexBlockSize, DefaultCompression) 1010 var bytesIterated uint64 1011 var pool BufferPool 1012 pool.Init(5) 1013 citer, err := r.NewCompactionIter( 1014 &bytesIterated, CategoryAndQoS{}, nil, TrivialReaderProvider{Reader: r}, &pool) 1015 require.NoError(t, err) 1016 switch i := citer.(type) { 1017 case *compactionIterator: 1018 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1019 // Each key has one version, so no value block, regardless of 1020 // sstable version. 1021 require.Nil(t, i.vbRH) 1022 case *twoLevelCompactionIterator: 1023 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1024 // Each key has one version, so no value block, regardless of 1025 // sstable version. 1026 require.Nil(t, i.vbRH) 1027 default: 1028 require.Failf(t, fmt.Sprintf("unknown compaction iterator type: %T", citer), "") 1029 } 1030 require.NoError(t, citer.Close()) 1031 require.NoError(t, r.Close()) 1032 pool.Release() 1033 } 1034 } 1035 } 1036 } 1037 1038 func TestReadaheadSetupForV3TablesWithMultipleVersions(t *testing.T) { 1039 tmpDir := path.Join(t.TempDir()) 1040 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.Default, tmpDir)) 1041 require.NoError(t, err) 1042 defer provider.Close() 1043 f0, _, err := provider.Create(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.CreateOptions{}) 1044 require.NoError(t, err) 1045 1046 w := NewWriter(f0, WriterOptions{ 1047 TableFormat: TableFormatPebblev3, 1048 Comparer: testkeys.Comparer, 1049 }) 1050 keys := testkeys.Alpha(1) 1051 keyBuf := make([]byte, 1+testkeys.MaxSuffixLen) 1052 // Write a few keys with multiple timestamps (MVCC versions). 1053 for i := int64(0); i < 2; i++ { 1054 for j := int64(2); j >= 1; j-- { 1055 n := testkeys.WriteKeyAt(keyBuf[:], keys, i, j) 1056 key := keyBuf[:n] 1057 require.NoError(t, w.Set(key, key)) 1058 } 1059 } 1060 require.NoError(t, w.Close()) 1061 f1, err := provider.OpenForReading(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.OpenOptions{}) 1062 require.NoError(t, err) 1063 r, err := NewReader(f1, ReaderOptions{Comparer: testkeys.Comparer}) 1064 require.NoError(t, err) 1065 defer r.Close() 1066 { 1067 var pool BufferPool 1068 pool.Init(5) 1069 citer, err := r.NewCompactionIter( 1070 nil, CategoryAndQoS{}, nil, TrivialReaderProvider{Reader: r}, &pool) 1071 require.NoError(t, err) 1072 defer citer.Close() 1073 i := citer.(*compactionIterator) 1074 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1075 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.vbRH)) 1076 } 1077 { 1078 iter, err := r.NewIter(nil, nil) 1079 require.NoError(t, err) 1080 defer iter.Close() 1081 i := iter.(*singleLevelIterator) 1082 require.False(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1083 require.False(t, objstorageprovider.TestingCheckMaxReadahead(i.vbRH)) 1084 } 1085 } 1086 1087 func TestReaderChecksumErrors(t *testing.T) { 1088 for _, checksumType := range []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64} { 1089 t.Run(fmt.Sprintf("checksum-type=%d", checksumType), func(t *testing.T) { 1090 for _, twoLevelIndex := range []bool{false, true} { 1091 t.Run(fmt.Sprintf("two-level-index=%t", twoLevelIndex), func(t *testing.T) { 1092 mem := vfs.NewMem() 1093 1094 { 1095 // Create an sstable with 3 data blocks. 1096 f, err := mem.Create("test") 1097 require.NoError(t, err) 1098 1099 const blockSize = 32 1100 indexBlockSize := 4096 1101 if twoLevelIndex { 1102 indexBlockSize = 1 1103 } 1104 1105 w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{ 1106 BlockSize: blockSize, 1107 IndexBlockSize: indexBlockSize, 1108 Checksum: checksumType, 1109 }) 1110 require.NoError(t, w.Set(bytes.Repeat([]byte("a"), blockSize), nil)) 1111 require.NoError(t, w.Set(bytes.Repeat([]byte("b"), blockSize), nil)) 1112 require.NoError(t, w.Set(bytes.Repeat([]byte("c"), blockSize), nil)) 1113 require.NoError(t, w.Close()) 1114 } 1115 1116 // Load the layout so that we no the location of the data blocks. 1117 var layout *Layout 1118 { 1119 f, err := mem.Open("test") 1120 require.NoError(t, err) 1121 1122 r, err := newReader(f, ReaderOptions{}) 1123 require.NoError(t, err) 1124 layout, err = r.Layout() 1125 require.NoError(t, err) 1126 require.EqualValues(t, len(layout.Data), 3) 1127 require.NoError(t, r.Close()) 1128 } 1129 1130 for _, bh := range layout.Data { 1131 // Read the sstable and corrupt the first byte in the target data 1132 // block. 1133 orig, err := mem.Open("test") 1134 require.NoError(t, err) 1135 data, err := io.ReadAll(orig) 1136 require.NoError(t, err) 1137 require.NoError(t, orig.Close()) 1138 1139 // Corrupt the first byte in the block. 1140 data[bh.Offset] ^= 0xff 1141 1142 corrupted, err := mem.Create("corrupted") 1143 require.NoError(t, err) 1144 _, err = corrupted.Write(data) 1145 require.NoError(t, err) 1146 require.NoError(t, corrupted.Close()) 1147 1148 // Verify that we encounter a checksum mismatch error while iterating 1149 // over the sstable. 1150 corrupted, err = mem.Open("corrupted") 1151 require.NoError(t, err) 1152 1153 r, err := newReader(corrupted, ReaderOptions{}) 1154 require.NoError(t, err) 1155 1156 iter, err := r.NewIter(nil, nil) 1157 require.NoError(t, err) 1158 for k, _ := iter.First(); k != nil; k, _ = iter.Next() { 1159 } 1160 require.Regexp(t, `checksum mismatch`, iter.Error()) 1161 require.Regexp(t, `checksum mismatch`, iter.Close()) 1162 1163 iter, err = r.NewIter(nil, nil) 1164 require.NoError(t, err) 1165 for k, _ := iter.Last(); k != nil; k, _ = iter.Prev() { 1166 } 1167 require.Regexp(t, `checksum mismatch`, iter.Error()) 1168 require.Regexp(t, `checksum mismatch`, iter.Close()) 1169 1170 require.NoError(t, r.Close()) 1171 } 1172 }) 1173 } 1174 }) 1175 } 1176 } 1177 1178 func TestValidateBlockChecksums(t *testing.T) { 1179 seed := uint64(time.Now().UnixNano()) 1180 rng := rand.New(rand.NewSource(seed)) 1181 t.Logf("using seed = %d", seed) 1182 1183 var allFiles []string 1184 for _, fixture := range TestFixtures { 1185 allFiles = append(allFiles, fixture.Filename) 1186 } 1187 1188 type corruptionLocation int 1189 const ( 1190 corruptionLocationData corruptionLocation = iota 1191 corruptionLocationIndex 1192 corruptionLocationTopIndex 1193 corruptionLocationFilter 1194 corruptionLocationRangeDel 1195 corruptionLocationProperties 1196 corruptionLocationMetaIndex 1197 ) 1198 1199 testCases := []struct { 1200 name string 1201 files []string 1202 corruptionLocations []corruptionLocation 1203 }{ 1204 { 1205 name: "no corruption", 1206 corruptionLocations: []corruptionLocation{}, 1207 }, 1208 { 1209 name: "data block corruption", 1210 corruptionLocations: []corruptionLocation{ 1211 corruptionLocationData, 1212 }, 1213 }, 1214 { 1215 name: "index block corruption", 1216 corruptionLocations: []corruptionLocation{ 1217 corruptionLocationIndex, 1218 }, 1219 }, 1220 { 1221 name: "top index block corruption", 1222 files: []string{ 1223 "h.no-compression.two_level_index.sst", 1224 }, 1225 corruptionLocations: []corruptionLocation{ 1226 corruptionLocationTopIndex, 1227 }, 1228 }, 1229 { 1230 name: "filter block corruption", 1231 files: []string{ 1232 "h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 1233 "h.table-bloom.no-compression.sst", 1234 "h.table-bloom.sst", 1235 }, 1236 corruptionLocations: []corruptionLocation{ 1237 corruptionLocationFilter, 1238 }, 1239 }, 1240 { 1241 name: "range deletion block corruption", 1242 corruptionLocations: []corruptionLocation{ 1243 corruptionLocationRangeDel, 1244 }, 1245 }, 1246 { 1247 name: "properties block corruption", 1248 corruptionLocations: []corruptionLocation{ 1249 corruptionLocationProperties, 1250 }, 1251 }, 1252 { 1253 name: "metaindex block corruption", 1254 corruptionLocations: []corruptionLocation{ 1255 corruptionLocationMetaIndex, 1256 }, 1257 }, 1258 { 1259 name: "multiple blocks corrupted", 1260 corruptionLocations: []corruptionLocation{ 1261 corruptionLocationData, 1262 corruptionLocationIndex, 1263 corruptionLocationRangeDel, 1264 corruptionLocationProperties, 1265 corruptionLocationMetaIndex, 1266 }, 1267 }, 1268 } 1269 1270 testFn := func(t *testing.T, file string, corruptionLocations []corruptionLocation) { 1271 // Create a copy of the SSTable that we can freely corrupt. 1272 f, err := os.Open(filepath.Join("testdata", file)) 1273 require.NoError(t, err) 1274 1275 pathCopy := path.Join(t.TempDir(), path.Base(file)) 1276 fCopy, err := os.OpenFile(pathCopy, os.O_CREATE|os.O_RDWR, 0600) 1277 require.NoError(t, err) 1278 defer fCopy.Close() 1279 1280 _, err = io.Copy(fCopy, f) 1281 require.NoError(t, err) 1282 err = fCopy.Sync() 1283 require.NoError(t, err) 1284 require.NoError(t, f.Close()) 1285 1286 filter := bloom.FilterPolicy(10) 1287 r, err := newReader(fCopy, ReaderOptions{ 1288 Filters: map[string]FilterPolicy{ 1289 filter.Name(): filter, 1290 }, 1291 }) 1292 require.NoError(t, err) 1293 defer func() { require.NoError(t, r.Close()) }() 1294 1295 // Prior to corruption, validation is successful. 1296 require.NoError(t, r.ValidateBlockChecksums()) 1297 1298 // If we are not testing for corruption, we can stop here. 1299 if len(corruptionLocations) == 0 { 1300 return 1301 } 1302 1303 // Perform bit flips in various corruption locations. 1304 layout, err := r.Layout() 1305 require.NoError(t, err) 1306 for _, location := range corruptionLocations { 1307 var bh BlockHandle 1308 switch location { 1309 case corruptionLocationData: 1310 bh = layout.Data[rng.Intn(len(layout.Data))].BlockHandle 1311 case corruptionLocationIndex: 1312 bh = layout.Index[rng.Intn(len(layout.Index))] 1313 case corruptionLocationTopIndex: 1314 bh = layout.TopIndex 1315 case corruptionLocationFilter: 1316 bh = layout.Filter 1317 case corruptionLocationRangeDel: 1318 bh = layout.RangeDel 1319 case corruptionLocationProperties: 1320 bh = layout.Properties 1321 case corruptionLocationMetaIndex: 1322 bh = layout.MetaIndex 1323 default: 1324 t.Fatalf("unknown location") 1325 } 1326 1327 // Corrupt a random byte within the selected block. 1328 pos := int64(bh.Offset) + rng.Int63n(int64(bh.Length)) 1329 t.Logf("altering file=%s @ offset = %d", file, pos) 1330 1331 b := make([]byte, 1) 1332 n, err := fCopy.ReadAt(b, pos) 1333 require.NoError(t, err) 1334 require.Equal(t, 1, n) 1335 t.Logf("data (before) = %08b", b) 1336 1337 b[0] ^= 0xff 1338 t.Logf("data (after) = %08b", b) 1339 1340 _, err = fCopy.WriteAt(b, pos) 1341 require.NoError(t, err) 1342 } 1343 1344 // Write back to the file. 1345 err = fCopy.Sync() 1346 require.NoError(t, err) 1347 1348 // Confirm that checksum validation fails. 1349 err = r.ValidateBlockChecksums() 1350 require.Error(t, err) 1351 require.Regexp(t, `checksum mismatch`, err.Error()) 1352 } 1353 1354 for _, tc := range testCases { 1355 t.Run(tc.name, func(t *testing.T) { 1356 // By default, test across all files, unless overridden. 1357 files := tc.files 1358 if files == nil { 1359 files = allFiles 1360 } 1361 for _, file := range files { 1362 t.Run(file, func(t *testing.T) { 1363 testFn(t, file, tc.corruptionLocations) 1364 }) 1365 } 1366 }) 1367 } 1368 } 1369 1370 func TestReader_TableFormat(t *testing.T) { 1371 test := func(t *testing.T, want TableFormat) { 1372 fs := vfs.NewMem() 1373 f, err := fs.Create("test") 1374 require.NoError(t, err) 1375 1376 opts := WriterOptions{TableFormat: want} 1377 w := NewWriter(objstorageprovider.NewFileWritable(f), opts) 1378 err = w.Close() 1379 require.NoError(t, err) 1380 1381 f, err = fs.Open("test") 1382 require.NoError(t, err) 1383 r, err := newReader(f, ReaderOptions{}) 1384 require.NoError(t, err) 1385 defer r.Close() 1386 1387 got, err := r.TableFormat() 1388 require.NoError(t, err) 1389 require.Equal(t, want, got) 1390 } 1391 1392 for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { 1393 t.Run(tf.String(), func(t *testing.T) { 1394 test(t, tf) 1395 }) 1396 } 1397 } 1398 1399 func buildTestTable( 1400 t *testing.T, numEntries uint64, blockSize, indexBlockSize int, compression Compression, 1401 ) *Reader { 1402 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.NewMem(), "" /* dirName */)) 1403 require.NoError(t, err) 1404 defer provider.Close() 1405 return buildTestTableWithProvider(t, provider, numEntries, blockSize, indexBlockSize, compression) 1406 } 1407 1408 func buildTestTableWithProvider( 1409 t *testing.T, 1410 provider objstorage.Provider, 1411 numEntries uint64, 1412 blockSize, indexBlockSize int, 1413 compression Compression, 1414 ) *Reader { 1415 f0, _, err := provider.Create(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.CreateOptions{}) 1416 require.NoError(t, err) 1417 1418 w := NewWriter(f0, WriterOptions{ 1419 BlockSize: blockSize, 1420 IndexBlockSize: indexBlockSize, 1421 Compression: compression, 1422 FilterPolicy: nil, 1423 }) 1424 1425 var ikey InternalKey 1426 for i := uint64(0); i < numEntries; i++ { 1427 key := make([]byte, 8+i%3) 1428 value := make([]byte, i%100) 1429 binary.BigEndian.PutUint64(key, i) 1430 ikey.UserKey = key 1431 w.Add(ikey, value) 1432 } 1433 1434 require.NoError(t, w.Close()) 1435 1436 // Re-open that Filename for reading. 1437 f1, err := provider.OpenForReading(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.OpenOptions{}) 1438 require.NoError(t, err) 1439 1440 c := cache.New(128 << 20) 1441 defer c.Unref() 1442 r, err := NewReader(f1, ReaderOptions{ 1443 Cache: c, 1444 }) 1445 require.NoError(t, err) 1446 return r 1447 } 1448 1449 func buildBenchmarkTable( 1450 b *testing.B, options WriterOptions, confirmTwoLevelIndex bool, offset int, 1451 ) (*Reader, [][]byte) { 1452 mem := vfs.NewMem() 1453 f0, err := mem.Create("bench") 1454 if err != nil { 1455 b.Fatal(err) 1456 } 1457 1458 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1459 1460 var keys [][]byte 1461 var ikey InternalKey 1462 for i := uint64(0); i < 1e6; i++ { 1463 key := make([]byte, 8) 1464 binary.BigEndian.PutUint64(key, i+uint64(offset)) 1465 keys = append(keys, key) 1466 ikey.UserKey = key 1467 w.Add(ikey, nil) 1468 } 1469 1470 if err := w.Close(); err != nil { 1471 b.Fatal(err) 1472 } 1473 1474 // Re-open that Filename for reading. 1475 f1, err := mem.Open("bench") 1476 if err != nil { 1477 b.Fatal(err) 1478 } 1479 c := cache.New(128 << 20) 1480 defer c.Unref() 1481 r, err := newReader(f1, ReaderOptions{ 1482 Cache: c, 1483 }) 1484 if err != nil { 1485 b.Fatal(err) 1486 } 1487 if confirmTwoLevelIndex && r.Properties.IndexPartitions == 0 { 1488 b.Fatalf("should have constructed two level index") 1489 } 1490 return r, keys 1491 } 1492 1493 var basicBenchmarks = []struct { 1494 name string 1495 options WriterOptions 1496 }{ 1497 { 1498 name: "restart=16,compression=Snappy", 1499 options: WriterOptions{ 1500 BlockSize: 32 << 10, 1501 BlockRestartInterval: 16, 1502 FilterPolicy: nil, 1503 Compression: SnappyCompression, 1504 TableFormat: TableFormatPebblev2, 1505 }, 1506 }, 1507 { 1508 name: "restart=16,compression=ZSTD", 1509 options: WriterOptions{ 1510 BlockSize: 32 << 10, 1511 BlockRestartInterval: 16, 1512 FilterPolicy: nil, 1513 Compression: ZstdCompression, 1514 TableFormat: TableFormatPebblev2, 1515 }, 1516 }, 1517 } 1518 1519 func BenchmarkTableIterSeekGE(b *testing.B) { 1520 for _, bm := range basicBenchmarks { 1521 b.Run(bm.name, 1522 func(b *testing.B) { 1523 r, keys := buildBenchmarkTable(b, bm.options, false, 0) 1524 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1525 require.NoError(b, err) 1526 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1527 1528 b.ResetTimer() 1529 for i := 0; i < b.N; i++ { 1530 it.SeekGE(keys[rng.Intn(len(keys))], base.SeekGEFlagsNone) 1531 } 1532 1533 b.StopTimer() 1534 it.Close() 1535 r.Close() 1536 }) 1537 } 1538 } 1539 1540 func BenchmarkTableIterSeekLT(b *testing.B) { 1541 for _, bm := range basicBenchmarks { 1542 b.Run(bm.name, 1543 func(b *testing.B) { 1544 r, keys := buildBenchmarkTable(b, bm.options, false, 0) 1545 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1546 require.NoError(b, err) 1547 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1548 1549 b.ResetTimer() 1550 for i := 0; i < b.N; i++ { 1551 it.SeekLT(keys[rng.Intn(len(keys))], base.SeekLTFlagsNone) 1552 } 1553 1554 b.StopTimer() 1555 it.Close() 1556 r.Close() 1557 }) 1558 } 1559 } 1560 1561 func BenchmarkTableIterNext(b *testing.B) { 1562 for _, bm := range basicBenchmarks { 1563 b.Run(bm.name, 1564 func(b *testing.B) { 1565 r, _ := buildBenchmarkTable(b, bm.options, false, 0) 1566 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1567 require.NoError(b, err) 1568 1569 b.ResetTimer() 1570 var sum int64 1571 var key *InternalKey 1572 for i := 0; i < b.N; i++ { 1573 if key == nil { 1574 key, _ = it.First() 1575 } 1576 sum += int64(binary.BigEndian.Uint64(key.UserKey)) 1577 key, _ = it.Next() 1578 } 1579 if testing.Verbose() { 1580 fmt.Fprint(io.Discard, sum) 1581 } 1582 1583 b.StopTimer() 1584 it.Close() 1585 r.Close() 1586 }) 1587 } 1588 } 1589 1590 func BenchmarkTableIterPrev(b *testing.B) { 1591 for _, bm := range basicBenchmarks { 1592 b.Run(bm.name, 1593 func(b *testing.B) { 1594 r, _ := buildBenchmarkTable(b, bm.options, false, 0) 1595 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1596 require.NoError(b, err) 1597 1598 b.ResetTimer() 1599 var sum int64 1600 var key *InternalKey 1601 for i := 0; i < b.N; i++ { 1602 if key == nil { 1603 key, _ = it.Last() 1604 } 1605 sum += int64(binary.BigEndian.Uint64(key.UserKey)) 1606 key, _ = it.Prev() 1607 } 1608 if testing.Verbose() { 1609 fmt.Fprint(io.Discard, sum) 1610 } 1611 1612 b.StopTimer() 1613 it.Close() 1614 r.Close() 1615 }) 1616 } 1617 } 1618 1619 func BenchmarkLayout(b *testing.B) { 1620 r, _ := buildBenchmarkTable(b, WriterOptions{}, false, 0) 1621 b.ResetTimer() 1622 for i := 0; i < b.N; i++ { 1623 r.Layout() 1624 } 1625 b.StopTimer() 1626 r.Close() 1627 } 1628 1629 func BenchmarkSeqSeekGEExhausted(b *testing.B) { 1630 // Snappy with no bloom filter. 1631 options := basicBenchmarks[0].options 1632 1633 for _, twoLevelIndex := range []bool{false, true} { 1634 switch twoLevelIndex { 1635 case false: 1636 options.IndexBlockSize = 0 1637 case true: 1638 options.IndexBlockSize = 512 1639 } 1640 const offsetCount = 5000 1641 reader, keys := buildBenchmarkTable(b, options, twoLevelIndex, offsetCount) 1642 var preKeys [][]byte 1643 for i := 0; i < offsetCount; i++ { 1644 key := make([]byte, 8) 1645 binary.BigEndian.PutUint64(key, uint64(i)) 1646 preKeys = append(preKeys, key) 1647 } 1648 var postKeys [][]byte 1649 for i := 0; i < offsetCount; i++ { 1650 key := make([]byte, 8) 1651 binary.BigEndian.PutUint64(key, uint64(i+offsetCount+len(keys))) 1652 postKeys = append(postKeys, key) 1653 } 1654 for _, exhaustedBounds := range []bool{false, true} { 1655 for _, prefixSeek := range []bool{false, true} { 1656 exhausted := "file" 1657 if exhaustedBounds { 1658 exhausted = "bounds" 1659 } 1660 seekKind := "ge" 1661 if prefixSeek { 1662 seekKind = "prefix-ge" 1663 } 1664 b.Run(fmt.Sprintf( 1665 "two-level=%t/exhausted=%s/seek=%s", twoLevelIndex, exhausted, seekKind), 1666 func(b *testing.B) { 1667 var upper []byte 1668 var seekKeys [][]byte 1669 if exhaustedBounds { 1670 seekKeys = preKeys 1671 upper = keys[0] 1672 } else { 1673 seekKeys = postKeys 1674 } 1675 it, err := reader.NewIter(nil /* lower */, upper) 1676 require.NoError(b, err) 1677 b.ResetTimer() 1678 pos := 0 1679 var seekGEFlags SeekGEFlags 1680 for i := 0; i < b.N; i++ { 1681 seekKey := seekKeys[0] 1682 var k *InternalKey 1683 if prefixSeek { 1684 k, _ = it.SeekPrefixGE(seekKey, seekKey, seekGEFlags) 1685 } else { 1686 k, _ = it.SeekGE(seekKey, seekGEFlags) 1687 } 1688 if k != nil { 1689 b.Fatal("found a key") 1690 } 1691 if it.Error() != nil { 1692 b.Fatalf("%s", it.Error().Error()) 1693 } 1694 pos++ 1695 if pos == len(seekKeys) { 1696 pos = 0 1697 seekGEFlags = seekGEFlags.DisableTrySeekUsingNext() 1698 } else { 1699 seekGEFlags = seekGEFlags.EnableTrySeekUsingNext() 1700 } 1701 } 1702 b.StopTimer() 1703 it.Close() 1704 }) 1705 } 1706 } 1707 reader.Close() 1708 } 1709 } 1710 1711 func BenchmarkIteratorScanManyVersions(b *testing.B) { 1712 options := WriterOptions{ 1713 BlockSize: 32 << 10, 1714 BlockRestartInterval: 16, 1715 FilterPolicy: nil, 1716 Compression: SnappyCompression, 1717 Comparer: testkeys.Comparer, 1718 } 1719 // 10,000 key prefixes, each with 100 versions. 1720 const keyCount = 10000 1721 const sharedPrefixLen = 32 1722 const unsharedPrefixLen = 8 1723 const versionCount = 100 1724 1725 // Take the very large keyspace consisting of alphabetic characters of 1726 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 1727 // picking every 1 key every keyCount keys. 1728 keys := testkeys.Alpha(unsharedPrefixLen) 1729 keys = keys.EveryN(keys.Count() / keyCount) 1730 if keys.Count() < keyCount { 1731 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 1732 } 1733 keyBuf := make([]byte, sharedPrefixLen+unsharedPrefixLen+testkeys.MaxSuffixLen) 1734 for i := 0; i < sharedPrefixLen; i++ { 1735 keyBuf[i] = 'A' + byte(i) 1736 } 1737 // v2 sstable is 115,178,070 bytes. v3 sstable is 107,181,105 bytes with 1738 // 99,049,269 bytes in value blocks. 1739 setupBench := func(b *testing.B, tableFormat TableFormat, cacheSize int64) *Reader { 1740 mem := vfs.NewMem() 1741 f0, err := mem.Create("bench") 1742 require.NoError(b, err) 1743 options.TableFormat = tableFormat 1744 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1745 val := make([]byte, 100) 1746 rng := rand.New(rand.NewSource(100)) 1747 for i := int64(0); i < keys.Count(); i++ { 1748 for v := 0; v < versionCount; v++ { 1749 n := testkeys.WriteKeyAt(keyBuf[sharedPrefixLen:], keys, i, int64(versionCount-v+1)) 1750 key := keyBuf[:n+sharedPrefixLen] 1751 rng.Read(val) 1752 require.NoError(b, w.Set(key, val)) 1753 } 1754 } 1755 require.NoError(b, w.Close()) 1756 c := cache.New(cacheSize) 1757 defer c.Unref() 1758 // Re-open the Filename for reading. 1759 f0, err = mem.Open("bench") 1760 require.NoError(b, err) 1761 r, err := newReader(f0, ReaderOptions{ 1762 Cache: c, 1763 Comparer: testkeys.Comparer, 1764 }) 1765 require.NoError(b, err) 1766 return r 1767 } 1768 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 1769 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 1770 // 150MiB results in a high cache hit rate for both formats. 20MiB 1771 // results in a high cache hit rate for the data blocks in 1772 // TableFormatPebblev3. 1773 for _, cacheSize := range []int64{20 << 20, 150 << 20} { 1774 b.Run(fmt.Sprintf("cache-size=%s", humanize.Bytes.Int64(cacheSize)), 1775 func(b *testing.B) { 1776 r := setupBench(b, format, cacheSize) 1777 defer func() { 1778 require.NoError(b, r.Close()) 1779 }() 1780 for _, readValue := range []bool{false, true} { 1781 b.Run(fmt.Sprintf("read-value=%t", readValue), func(b *testing.B) { 1782 iter, err := r.NewIter(nil, nil) 1783 require.NoError(b, err) 1784 var k *InternalKey 1785 var v base.LazyValue 1786 var valBuf [100]byte 1787 b.ResetTimer() 1788 for i := 0; i < b.N; i++ { 1789 if k == nil { 1790 k, _ = iter.First() 1791 if k == nil { 1792 b.Fatalf("k is nil") 1793 } 1794 } 1795 k, v = iter.Next() 1796 if k != nil && readValue { 1797 _, callerOwned, err := v.Value(valBuf[:]) 1798 if err != nil { 1799 b.Fatal(err) 1800 } else if callerOwned { 1801 b.Fatalf("unexpected callerOwned: %t", callerOwned) 1802 } 1803 } 1804 } 1805 }) 1806 } 1807 }) 1808 } 1809 }) 1810 } 1811 } 1812 1813 func BenchmarkIteratorScanNextPrefix(b *testing.B) { 1814 options := WriterOptions{ 1815 BlockSize: 32 << 10, 1816 BlockRestartInterval: 16, 1817 FilterPolicy: nil, 1818 Compression: SnappyCompression, 1819 TableFormat: TableFormatPebblev3, 1820 Comparer: testkeys.Comparer, 1821 } 1822 const keyCount = 10000 1823 const sharedPrefixLen = 32 1824 const unsharedPrefixLen = 8 1825 val := make([]byte, 100) 1826 rand.New(rand.NewSource(100)).Read(val) 1827 1828 // Take the very large keyspace consisting of alphabetic characters of 1829 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 1830 // picking every 1 key every keyCount keys. 1831 keys := testkeys.Alpha(unsharedPrefixLen) 1832 keys = keys.EveryN(keys.Count() / keyCount) 1833 if keys.Count() < keyCount { 1834 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 1835 } 1836 keyBuf := make([]byte, sharedPrefixLen+unsharedPrefixLen+testkeys.MaxSuffixLen) 1837 for i := 0; i < sharedPrefixLen; i++ { 1838 keyBuf[i] = 'A' + byte(i) 1839 } 1840 setupBench := func(b *testing.B, versCount int) (r *Reader, succKeys [][]byte) { 1841 mem := vfs.NewMem() 1842 f0, err := mem.Create("bench") 1843 require.NoError(b, err) 1844 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1845 for i := int64(0); i < keys.Count(); i++ { 1846 for v := 0; v < versCount; v++ { 1847 n := testkeys.WriteKeyAt(keyBuf[sharedPrefixLen:], keys, i, int64(versCount-v+1)) 1848 key := keyBuf[:n+sharedPrefixLen] 1849 require.NoError(b, w.Set(key, val)) 1850 if v == 0 { 1851 prefixLen := testkeys.Comparer.Split(key) 1852 prefixKey := key[:prefixLen] 1853 succKey := testkeys.Comparer.ImmediateSuccessor(nil, prefixKey) 1854 succKeys = append(succKeys, succKey) 1855 } 1856 } 1857 } 1858 require.NoError(b, w.Close()) 1859 // NB: This 200MiB cache is sufficient for even the largest file: 10,000 1860 // keys * 100 versions = 1M keys, where each key-value pair is ~140 bytes 1861 // = 140MB. So we are not measuring the caching benefit of 1862 // TableFormatPebblev3 storing older values in value blocks. 1863 c := cache.New(200 << 20) 1864 defer c.Unref() 1865 // Re-open the Filename for reading. 1866 f0, err = mem.Open("bench") 1867 require.NoError(b, err) 1868 r, err = newReader(f0, ReaderOptions{ 1869 Cache: c, 1870 Comparer: testkeys.Comparer, 1871 }) 1872 require.NoError(b, err) 1873 return r, succKeys 1874 } 1875 // Analysis of some sample results with TableFormatPebblev2: 1876 // versions=1/method=seek-ge-10 22107622 53.57 ns/op 1877 // versions=1/method=next-prefix-10 36292837 33.07 ns/op 1878 // versions=2/method=seek-ge-10 14429138 82.92 ns/op 1879 // versions=2/method=next-prefix-10 19676055 60.78 ns/op 1880 // versions=10/method=seek-ge-10 1453726 825.2 ns/op 1881 // versions=10/method=next-prefix-10 2450498 489.6 ns/op 1882 // versions=100/method=seek-ge-10 965143 1257 ns/op 1883 // versions=100/method=next-prefix-10 1000000 1054 ns/op 1884 // 1885 // With 1 version, both SeekGE and NextPrefix will be able to complete after 1886 // doing a single call to blockIter.Next. However, SeekGE has to do two key 1887 // comparisons unlike the one key comparison in NextPrefix. This is because 1888 // SeekGE also compares *before* calling Next since it is possible that the 1889 // preceding SeekGE is already at the right place. 1890 // 1891 // With 2 versions, both will do two calls to blockIter.Next. The difference 1892 // in the cost is the same as in the 1 version case. 1893 // 1894 // With 10 versions, it is still likely that the desired key is in the same 1895 // data block. NextPrefix will seek only the blockIter. And in the rare case 1896 // that the key is in the next data block, it will step the index block (not 1897 // seek). In comparison, SeekGE will seek the index block too. 1898 // 1899 // With 100 versions we more often cross from one data block to the next, so 1900 // the difference in cost declines. 1901 // 1902 // Some sample results with TableFormatPebblev3: 1903 1904 // versions=1/method=seek-ge-10 18702609 53.90 ns/op 1905 // versions=1/method=next-prefix-10 77440167 15.41 ns/op 1906 // versions=2/method=seek-ge-10 13554286 87.91 ns/op 1907 // versions=2/method=next-prefix-10 62148526 19.25 ns/op 1908 // versions=10/method=seek-ge-10 1316676 910.5 ns/op 1909 // versions=10/method=next-prefix-10 18829448 62.61 ns/op 1910 // versions=100/method=seek-ge-10 1166139 1025 ns/op 1911 // versions=100/method=next-prefix-10 4443386 265.3 ns/op 1912 // 1913 // NextPrefix is much cheaper than in TableFormatPebblev2 with larger number 1914 // of versions. It is also cheaper with 1 and 2 versions since 1915 // setHasSamePrefix=false eliminates a key comparison. 1916 for _, versionCount := range []int{1, 2, 10, 100} { 1917 b.Run(fmt.Sprintf("versions=%d", versionCount), func(b *testing.B) { 1918 r, succKeys := setupBench(b, versionCount) 1919 defer func() { 1920 require.NoError(b, r.Close()) 1921 }() 1922 for _, method := range []string{"seek-ge", "next-prefix"} { 1923 b.Run(fmt.Sprintf("method=%s", method), func(b *testing.B) { 1924 for _, readValue := range []bool{false, true} { 1925 b.Run(fmt.Sprintf("read-value=%t", readValue), func(b *testing.B) { 1926 iter, err := r.NewIter(nil, nil) 1927 require.NoError(b, err) 1928 var nextFunc func(index int) (*InternalKey, base.LazyValue) 1929 switch method { 1930 case "seek-ge": 1931 nextFunc = func(index int) (*InternalKey, base.LazyValue) { 1932 var flags base.SeekGEFlags 1933 return iter.SeekGE(succKeys[index], flags.EnableTrySeekUsingNext()) 1934 } 1935 case "next-prefix": 1936 nextFunc = func(index int) (*InternalKey, base.LazyValue) { 1937 return iter.NextPrefix(succKeys[index]) 1938 } 1939 default: 1940 b.Fatalf("unknown method %s", method) 1941 } 1942 n := keys.Count() 1943 j := n 1944 var k *InternalKey 1945 var v base.LazyValue 1946 var valBuf [100]byte 1947 b.ResetTimer() 1948 for i := 0; i < b.N; i++ { 1949 if k == nil { 1950 if j != n { 1951 b.Fatalf("unexpected %d != %d", j, n) 1952 } 1953 k, _ = iter.First() 1954 j = 0 1955 } else { 1956 k, v = nextFunc(int(j - 1)) 1957 if k != nil && readValue { 1958 _, callerOwned, err := v.Value(valBuf[:]) 1959 if err != nil { 1960 b.Fatal(err) 1961 } else if callerOwned { 1962 b.Fatalf("unexpected callerOwned: %t", callerOwned) 1963 } 1964 } 1965 1966 } 1967 if k != nil { 1968 j++ 1969 } 1970 } 1971 }) 1972 } 1973 }) 1974 } 1975 }) 1976 } 1977 } 1978 1979 func BenchmarkIteratorScanObsolete(b *testing.B) { 1980 options := WriterOptions{ 1981 BlockSize: 32 << 10, 1982 BlockRestartInterval: 16, 1983 FilterPolicy: nil, 1984 Compression: SnappyCompression, 1985 Comparer: testkeys.Comparer, 1986 } 1987 const keyCount = 1 << 20 1988 const keyLen = 10 1989 1990 // Take the very large keyspace consisting of alphabetic characters of 1991 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 1992 // picking every 1 key every keyCount keys. 1993 keys := testkeys.Alpha(keyLen) 1994 keys = keys.EveryN(keys.Count() / keyCount) 1995 if keys.Count() < keyCount { 1996 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 1997 } 1998 expectedKeyCount := keys.Count() 1999 keyBuf := make([]byte, keyLen) 2000 setupBench := func(b *testing.B, tableFormat TableFormat, cacheSize int64) *Reader { 2001 mem := vfs.NewMem() 2002 f0, err := mem.Create("bench") 2003 require.NoError(b, err) 2004 options.TableFormat = tableFormat 2005 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 2006 val := make([]byte, 100) 2007 rng := rand.New(rand.NewSource(100)) 2008 for i := int64(0); i < keys.Count(); i++ { 2009 n := testkeys.WriteKey(keyBuf, keys, i) 2010 key := keyBuf[:n] 2011 rng.Read(val) 2012 forceObsolete := true 2013 if i == 0 { 2014 forceObsolete = false 2015 } 2016 require.NoError(b, w.AddWithForceObsolete( 2017 base.MakeInternalKey(key, 0, InternalKeyKindSet), val, forceObsolete)) 2018 } 2019 require.NoError(b, w.Close()) 2020 c := cache.New(cacheSize) 2021 defer c.Unref() 2022 // Re-open the Filename for reading. 2023 f0, err = mem.Open("bench") 2024 require.NoError(b, err) 2025 r, err := newReader(f0, ReaderOptions{ 2026 Cache: c, 2027 Comparer: testkeys.Comparer, 2028 }) 2029 require.NoError(b, err) 2030 return r 2031 } 2032 for _, format := range []TableFormat{TableFormatPebblev3, TableFormatPebblev4} { 2033 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 2034 // 150MiB results in a high cache hit rate for both formats. 2035 for _, cacheSize := range []int64{1, 150 << 20} { 2036 b.Run(fmt.Sprintf("cache-size=%s", humanize.Bytes.Int64(cacheSize)), 2037 func(b *testing.B) { 2038 r := setupBench(b, format, cacheSize) 2039 defer func() { 2040 require.NoError(b, r.Close()) 2041 }() 2042 for _, hideObsoletePoints := range []bool{false, true} { 2043 b.Run(fmt.Sprintf("hide-obsolete=%t", hideObsoletePoints), func(b *testing.B) { 2044 var filterer *BlockPropertiesFilterer 2045 if format == TableFormatPebblev4 && hideObsoletePoints { 2046 filterer = newBlockPropertiesFilterer( 2047 []BlockPropertyFilter{obsoleteKeyBlockPropertyFilter{}}, nil) 2048 intersects, err := 2049 filterer.intersectsUserPropsAndFinishInit(r.Properties.UserProperties) 2050 if err != nil { 2051 b.Fatalf("%s", err.Error()) 2052 } 2053 if !intersects { 2054 b.Fatalf("sstable does not intersect") 2055 } 2056 } 2057 iter, err := r.NewIterWithBlockPropertyFiltersAndContextEtc( 2058 context.Background(), nil, nil, filterer, hideObsoletePoints, 2059 true, nil, CategoryAndQoS{}, nil, 2060 TrivialReaderProvider{Reader: r}) 2061 require.NoError(b, err) 2062 b.ResetTimer() 2063 for i := 0; i < b.N; i++ { 2064 count := int64(0) 2065 k, _ := iter.First() 2066 for k != nil { 2067 count++ 2068 k, _ = iter.Next() 2069 } 2070 if format == TableFormatPebblev4 && hideObsoletePoints { 2071 if count != 1 { 2072 b.Fatalf("found %d points", count) 2073 } 2074 } else { 2075 if count != expectedKeyCount { 2076 b.Fatalf("found %d points", count) 2077 } 2078 } 2079 } 2080 }) 2081 } 2082 }) 2083 } 2084 }) 2085 } 2086 } 2087 2088 func newReader(r ReadableFile, o ReaderOptions, extraOpts ...ReaderOption) (*Reader, error) { 2089 readable, err := NewSimpleReadable(r) 2090 if err != nil { 2091 return nil, err 2092 } 2093 return NewReader(readable, o, extraOpts...) 2094 }