github.com/cockroachdb/pebble@v1.1.2/sstable/reader_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/binary" 11 "fmt" 12 "io" 13 "math" 14 "os" 15 "path" 16 "path/filepath" 17 "strings" 18 "testing" 19 "time" 20 21 "github.com/cockroachdb/datadriven" 22 "github.com/cockroachdb/errors" 23 "github.com/cockroachdb/pebble/bloom" 24 "github.com/cockroachdb/pebble/internal/base" 25 "github.com/cockroachdb/pebble/internal/cache" 26 "github.com/cockroachdb/pebble/internal/humanize" 27 "github.com/cockroachdb/pebble/internal/manifest" 28 "github.com/cockroachdb/pebble/internal/testkeys" 29 "github.com/cockroachdb/pebble/objstorage" 30 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 31 "github.com/cockroachdb/pebble/vfs" 32 "github.com/cockroachdb/pebble/vfs/errorfs" 33 "github.com/stretchr/testify/require" 34 "golang.org/x/exp/rand" 35 ) 36 37 // get is a testing helper that simulates a read and helps verify bloom filters 38 // until they are available through iterators. 39 func (r *Reader) get(key []byte) (value []byte, err error) { 40 if r.err != nil { 41 return nil, r.err 42 } 43 44 if r.tableFilter != nil { 45 dataH, err := r.readFilter(context.Background(), nil /* stats */) 46 if err != nil { 47 return nil, err 48 } 49 var lookupKey []byte 50 if r.Split != nil { 51 lookupKey = key[:r.Split(key)] 52 } else { 53 lookupKey = key 54 } 55 mayContain := r.tableFilter.mayContain(dataH.Get(), lookupKey) 56 dataH.Release() 57 if !mayContain { 58 return nil, base.ErrNotFound 59 } 60 } 61 62 i, err := r.NewIter(nil /* lower */, nil /* upper */) 63 if err != nil { 64 return nil, err 65 } 66 var v base.LazyValue 67 ikey, v := i.SeekGE(key, base.SeekGEFlagsNone) 68 value, _, err = v.Value(nil) 69 if err != nil { 70 return nil, err 71 } 72 73 if ikey == nil || r.Compare(key, ikey.UserKey) != 0 { 74 err := i.Close() 75 if err == nil { 76 err = base.ErrNotFound 77 } 78 return nil, err 79 } 80 81 // The value will be "freed" when the iterator is closed, so make a copy 82 // which will outlast the lifetime of the iterator. 83 newValue := make([]byte, len(value)) 84 copy(newValue, value) 85 if err := i.Close(); err != nil { 86 return nil, err 87 } 88 return newValue, nil 89 } 90 91 // iterAdapter adapts the new Iterator API which returns the key and value from 92 // positioning methods (Seek*, First, Last, Next, Prev) to the old API which 93 // returned a boolean corresponding to Valid. Only used by test code. 94 type iterAdapter struct { 95 Iterator 96 key *InternalKey 97 val []byte 98 } 99 100 func newIterAdapter(iter Iterator) *iterAdapter { 101 return &iterAdapter{ 102 Iterator: iter, 103 } 104 } 105 106 func (i *iterAdapter) update(key *InternalKey, val base.LazyValue) bool { 107 i.key = key 108 if v, _, err := val.Value(nil); err != nil { 109 i.key = nil 110 i.val = nil 111 } else { 112 i.val = v 113 } 114 return i.key != nil 115 } 116 117 func (i *iterAdapter) String() string { 118 return "iter-adapter" 119 } 120 121 func (i *iterAdapter) SeekGE(key []byte, flags base.SeekGEFlags) bool { 122 return i.update(i.Iterator.SeekGE(key, flags)) 123 } 124 125 func (i *iterAdapter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) bool { 126 return i.update(i.Iterator.SeekPrefixGE(prefix, key, flags)) 127 } 128 129 func (i *iterAdapter) SeekLT(key []byte, flags base.SeekLTFlags) bool { 130 return i.update(i.Iterator.SeekLT(key, flags)) 131 } 132 133 func (i *iterAdapter) First() bool { 134 return i.update(i.Iterator.First()) 135 } 136 137 func (i *iterAdapter) Last() bool { 138 return i.update(i.Iterator.Last()) 139 } 140 141 func (i *iterAdapter) Next() bool { 142 return i.update(i.Iterator.Next()) 143 } 144 145 func (i *iterAdapter) NextPrefix(succKey []byte) bool { 146 return i.update(i.Iterator.NextPrefix(succKey)) 147 } 148 149 func (i *iterAdapter) NextIgnoreResult() { 150 i.Iterator.Next() 151 i.update(nil, base.LazyValue{}) 152 } 153 154 func (i *iterAdapter) Prev() bool { 155 return i.update(i.Iterator.Prev()) 156 } 157 158 func (i *iterAdapter) Key() *InternalKey { 159 return i.key 160 } 161 162 func (i *iterAdapter) Value() []byte { 163 return i.val 164 } 165 166 func (i *iterAdapter) Valid() bool { 167 return i.key != nil 168 } 169 170 func (i *iterAdapter) SetBounds(lower, upper []byte) { 171 i.Iterator.SetBounds(lower, upper) 172 i.key = nil 173 } 174 175 func TestVirtualReader(t *testing.T) { 176 // A faux filenum used to create fake filemetadata for testing. 177 var fileNum int = 1 178 nextFileNum := func() base.FileNum { 179 fileNum++ 180 return base.FileNum(fileNum - 1) 181 } 182 183 // Set during the latest build command. 184 var r *Reader 185 var meta manifest.PhysicalFileMeta 186 var bp BufferPool 187 188 // Set during the latest virtualize command. 189 var vMeta1 manifest.VirtualFileMeta 190 var v VirtualReader 191 192 defer func() { 193 if r != nil { 194 require.NoError(t, r.Close()) 195 bp.Release() 196 } 197 }() 198 199 createPhysicalMeta := func(w *WriterMetadata, r *Reader) (manifest.PhysicalFileMeta, error) { 200 meta := &manifest.FileMetadata{} 201 meta.FileNum = nextFileNum() 202 meta.CreationTime = time.Now().Unix() 203 meta.Size = w.Size 204 meta.SmallestSeqNum = w.SmallestSeqNum 205 meta.LargestSeqNum = w.LargestSeqNum 206 207 if w.HasPointKeys { 208 meta.ExtendPointKeyBounds(r.Compare, w.SmallestPoint, w.LargestPoint) 209 } 210 if w.HasRangeDelKeys { 211 meta.ExtendPointKeyBounds(r.Compare, w.SmallestRangeDel, w.LargestRangeDel) 212 } 213 if w.HasRangeKeys { 214 meta.ExtendRangeKeyBounds(r.Compare, w.SmallestRangeKey, w.LargestRangeKey) 215 } 216 meta.InitPhysicalBacking() 217 218 if err := meta.Validate(r.Compare, r.opts.Comparer.FormatKey); err != nil { 219 return manifest.PhysicalFileMeta{}, err 220 } 221 222 return meta.PhysicalMeta(), nil 223 } 224 225 formatWMeta := func(m *WriterMetadata) string { 226 var b bytes.Buffer 227 if m.HasPointKeys { 228 fmt.Fprintf(&b, "point: [%s-%s]\n", m.SmallestPoint, m.LargestPoint) 229 } 230 if m.HasRangeDelKeys { 231 fmt.Fprintf(&b, "rangedel: [%s-%s]\n", m.SmallestRangeDel, m.LargestRangeDel) 232 } 233 if m.HasRangeKeys { 234 fmt.Fprintf(&b, "rangekey: [%s-%s]\n", m.SmallestRangeKey, m.LargestRangeKey) 235 } 236 fmt.Fprintf(&b, "seqnums: [%d-%d]\n", m.SmallestSeqNum, m.LargestSeqNum) 237 return b.String() 238 } 239 240 formatVirtualReader := func(v *VirtualReader) string { 241 var b bytes.Buffer 242 fmt.Fprintf(&b, "bounds: [%s-%s]\n", v.vState.lower, v.vState.upper) 243 fmt.Fprintf(&b, "filenum: %s\n", v.vState.fileNum.String()) 244 fmt.Fprintf( 245 &b, "props: %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d\n", 246 "NumEntries", 247 v.Properties.NumEntries, 248 "RawKeySize", 249 v.Properties.RawKeySize, 250 "RawValueSize", 251 v.Properties.RawValueSize, 252 "RawPointTombstoneKeySize", 253 v.Properties.RawPointTombstoneKeySize, 254 "RawPointTombstoneValueSize", 255 v.Properties.RawPointTombstoneValueSize, 256 "NumSizedDeletions", 257 v.Properties.NumSizedDeletions, 258 "NumDeletions", 259 v.Properties.NumDeletions, 260 "NumRangeDeletions", 261 v.Properties.NumRangeDeletions, 262 "NumRangeKeyDels", 263 v.Properties.NumRangeKeyDels, 264 "NumRangeKeySets", 265 v.Properties.NumRangeKeySets, 266 "ValueBlocksSize", 267 v.Properties.ValueBlocksSize, 268 ) 269 return b.String() 270 } 271 272 datadriven.RunTest(t, "testdata/virtual_reader", func(t *testing.T, td *datadriven.TestData) string { 273 switch td.Cmd { 274 case "build": 275 if r != nil { 276 bp.Release() 277 _ = r.Close() 278 r = nil 279 meta.FileMetadata = nil 280 vMeta1.FileMetadata = nil 281 v = VirtualReader{} 282 } 283 var wMeta *WriterMetadata 284 var err error 285 writerOpts := &WriterOptions{ 286 TableFormat: TableFormatMax, 287 } 288 // Use a single level index by default. 289 writerOpts.IndexBlockSize = 100000 290 if len(td.CmdArgs) == 1 { 291 if td.CmdArgs[0].String() == "twoLevel" { 292 // Force a two level index. 293 writerOpts.IndexBlockSize = 1 294 writerOpts.BlockSize = 1 295 } 296 } 297 wMeta, r, err = runBuildCmd(td, writerOpts, 0) 298 if err != nil { 299 return err.Error() 300 } 301 bp.Init(5) 302 303 // Create a fake filemetada using the writer meta. 304 meta, err = createPhysicalMeta(wMeta, r) 305 if err != nil { 306 return err.Error() 307 } 308 r.fileNum = meta.FileBacking.DiskFileNum 309 return formatWMeta(wMeta) 310 311 case "virtualize": 312 // virtualize will split the previously built physical sstable into 313 // a single sstable with virtual bounds. The command assumes that 314 // the bounds for the virtual sstable are valid. For the purposes of 315 // this command the bounds must be valid keys. In general, and for 316 // this command, range key/range del spans must also not span across 317 // virtual sstable bounds. 318 if meta.FileMetadata == nil { 319 return "build must be called at least once before virtualize" 320 } 321 if vMeta1.FileMetadata != nil { 322 vMeta1.FileMetadata = nil 323 v = VirtualReader{} 324 } 325 vMeta := &manifest.FileMetadata{ 326 FileBacking: meta.FileBacking, 327 SmallestSeqNum: meta.SmallestSeqNum, 328 LargestSeqNum: meta.LargestSeqNum, 329 Virtual: true, 330 } 331 // Parse the virtualization bounds. 332 bounds := strings.Split(td.CmdArgs[0].String(), "-") 333 vMeta.Smallest = base.ParseInternalKey(bounds[0]) 334 vMeta.Largest = base.ParseInternalKey(bounds[1]) 335 vMeta.FileNum = nextFileNum() 336 var err error 337 vMeta.Size, err = r.EstimateDiskUsage(vMeta.Smallest.UserKey, vMeta.Largest.UserKey) 338 if err != nil { 339 return err.Error() 340 } 341 vMeta.ValidateVirtual(meta.FileMetadata) 342 343 vMeta1 = vMeta.VirtualMeta() 344 v = MakeVirtualReader(r, vMeta1, false /* isForeign */) 345 return formatVirtualReader(&v) 346 347 case "citer": 348 // Creates a compaction iterator from the virtual reader, and then 349 // just scans the keyspace. Which is all a compaction iterator is 350 // used for. This tests the First and Next calls. 351 if vMeta1.FileMetadata == nil { 352 return "virtualize must be called before creating compaction iters" 353 } 354 355 var rp ReaderProvider 356 var bytesIterated uint64 357 iter, err := v.NewCompactionIter(&bytesIterated, rp, &bp) 358 if err != nil { 359 return err.Error() 360 } 361 362 var buf bytes.Buffer 363 for key, val := iter.First(); key != nil; key, val = iter.Next() { 364 fmt.Fprintf(&buf, "%s:%s\n", key.String(), val.InPlaceValue()) 365 } 366 err = iter.Close() 367 if err != nil { 368 return err.Error() 369 } 370 return buf.String() 371 372 case "constrain": 373 if vMeta1.FileMetadata == nil { 374 return "virtualize must be called before constrain" 375 } 376 splits := strings.Split(td.CmdArgs[0].String(), ",") 377 of, ol := []byte(splits[0]), []byte(splits[1]) 378 inclusive, f, l := v.vState.constrainBounds(of, ol, splits[2] == "true") 379 var buf bytes.Buffer 380 buf.Write(f) 381 buf.WriteByte(',') 382 buf.Write(l) 383 buf.WriteByte(',') 384 if inclusive { 385 buf.WriteString("true") 386 } else { 387 buf.WriteString("false") 388 } 389 buf.WriteByte('\n') 390 return buf.String() 391 392 case "scan-range-del": 393 if vMeta1.FileMetadata == nil { 394 return "virtualize must be called before scan-range-del" 395 } 396 iter, err := v.NewRawRangeDelIter() 397 if err != nil { 398 return err.Error() 399 } 400 if iter == nil { 401 return "" 402 } 403 defer iter.Close() 404 405 var buf bytes.Buffer 406 for s := iter.First(); s != nil; s = iter.Next() { 407 fmt.Fprintf(&buf, "%s\n", s) 408 } 409 return buf.String() 410 411 case "scan-range-key": 412 if vMeta1.FileMetadata == nil { 413 return "virtualize must be called before scan-range-key" 414 } 415 iter, err := v.NewRawRangeKeyIter() 416 if err != nil { 417 return err.Error() 418 } 419 if iter == nil { 420 return "" 421 } 422 defer iter.Close() 423 424 var buf bytes.Buffer 425 for s := iter.First(); s != nil; s = iter.Next() { 426 fmt.Fprintf(&buf, "%s\n", s) 427 } 428 return buf.String() 429 430 case "iter": 431 if vMeta1.FileMetadata == nil { 432 return "virtualize must be called before iter" 433 } 434 var lower, upper []byte 435 if len(td.CmdArgs) > 0 { 436 splits := strings.Split(td.CmdArgs[0].String(), "-") 437 lower, upper = []byte(splits[0]), []byte(splits[1]) 438 } 439 440 var stats base.InternalIteratorStats 441 iter, err := v.NewIterWithBlockPropertyFiltersAndContextEtc( 442 context.Background(), lower, upper, nil, false, false, 443 &stats, TrivialReaderProvider{Reader: r}) 444 if err != nil { 445 return err.Error() 446 } 447 return runIterCmd(td, iter, true, runIterCmdStats(&stats)) 448 449 default: 450 return fmt.Sprintf("unknown command: %s", td.Cmd) 451 } 452 }) 453 } 454 455 func TestReader(t *testing.T) { 456 writerOpts := map[string]WriterOptions{ 457 // No bloom filters. 458 "default": {}, 459 "bloom10bit": { 460 // The standard policy. 461 FilterPolicy: bloom.FilterPolicy(10), 462 FilterType: base.TableFilter, 463 }, 464 "bloom1bit": { 465 // A policy with many false positives. 466 FilterPolicy: bloom.FilterPolicy(1), 467 FilterType: base.TableFilter, 468 }, 469 "bloom100bit": { 470 // A policy unlikely to have false positives. 471 FilterPolicy: bloom.FilterPolicy(100), 472 FilterType: base.TableFilter, 473 }, 474 } 475 476 blockSizes := map[string]int{ 477 "1bytes": 1, 478 "5bytes": 5, 479 "10bytes": 10, 480 "25bytes": 25, 481 "Maxbytes": math.MaxInt32, 482 } 483 484 opts := map[string]*Comparer{ 485 "default": testkeys.Comparer, 486 "prefixFilter": fixtureComparer, 487 } 488 489 testDirs := map[string]string{ 490 "default": "testdata/reader", 491 "prefixFilter": "testdata/prefixreader", 492 } 493 494 for format := TableFormatPebblev2; format <= TableFormatMax; format++ { 495 for dName, blockSize := range blockSizes { 496 for iName, indexBlockSize := range blockSizes { 497 for lName, tableOpt := range writerOpts { 498 for oName, cmp := range opts { 499 tableOpt.BlockSize = blockSize 500 tableOpt.Comparer = cmp 501 tableOpt.IndexBlockSize = indexBlockSize 502 tableOpt.TableFormat = format 503 504 t.Run( 505 fmt.Sprintf("format=%d,opts=%s,writerOpts=%s,blockSize=%s,indexSize=%s", 506 format, oName, lName, dName, iName), 507 func(t *testing.T) { 508 runTestReader( 509 t, tableOpt, testDirs[oName], nil /* Reader */, true) 510 }) 511 } 512 } 513 } 514 } 515 } 516 } 517 518 func TestReaderHideObsolete(t *testing.T) { 519 blockSizes := map[string]int{ 520 "1bytes": 1, 521 "5bytes": 5, 522 "10bytes": 10, 523 "25bytes": 25, 524 "Maxbytes": math.MaxInt32, 525 } 526 for dName, blockSize := range blockSizes { 527 opts := WriterOptions{ 528 TableFormat: TableFormatPebblev4, 529 BlockSize: blockSize, 530 IndexBlockSize: blockSize, 531 Comparer: testkeys.Comparer, 532 } 533 t.Run(fmt.Sprintf("blockSize=%s", dName), func(t *testing.T) { 534 runTestReader( 535 t, opts, "testdata/reader_hide_obsolete", 536 nil /* Reader */, true) 537 }) 538 } 539 } 540 541 func TestHamletReader(t *testing.T) { 542 prebuiltSSTs := []string{ 543 "testdata/h.ldb", 544 "testdata/h.sst", 545 "testdata/h.no-compression.sst", 546 "testdata/h.no-compression.two_level_index.sst", 547 "testdata/h.block-bloom.no-compression.sst", 548 "testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 549 "testdata/h.table-bloom.no-compression.sst", 550 } 551 552 for _, prebuiltSST := range prebuiltSSTs { 553 f, err := os.Open(filepath.FromSlash(prebuiltSST)) 554 require.NoError(t, err) 555 556 r, err := newReader(f, ReaderOptions{}) 557 require.NoError(t, err) 558 559 t.Run( 560 fmt.Sprintf("sst=%s", prebuiltSST), 561 func(t *testing.T) { 562 runTestReader(t, WriterOptions{}, "testdata/hamletreader", r, false) 563 }, 564 ) 565 } 566 } 567 568 func forEveryTableFormat[I any]( 569 t *testing.T, formatTable [NumTableFormats]I, runTest func(*testing.T, TableFormat, I), 570 ) { 571 t.Helper() 572 for tf := TableFormatUnspecified + 1; tf <= TableFormatMax; tf++ { 573 t.Run(tf.String(), func(t *testing.T) { 574 runTest(t, tf, formatTable[tf]) 575 }) 576 } 577 } 578 579 func TestReaderStats(t *testing.T) { 580 forEveryTableFormat[string](t, 581 [NumTableFormats]string{ 582 TableFormatUnspecified: "", 583 TableFormatLevelDB: "testdata/readerstats_LevelDB", 584 TableFormatRocksDBv2: "testdata/readerstats_LevelDB", 585 TableFormatPebblev1: "testdata/readerstats_LevelDB", 586 TableFormatPebblev2: "testdata/readerstats_LevelDB", 587 TableFormatPebblev3: "testdata/readerstats_Pebblev3", 588 TableFormatPebblev4: "testdata/readerstats_Pebblev3", 589 }, func(t *testing.T, format TableFormat, dir string) { 590 if dir == "" { 591 t.Skip() 592 } 593 writerOpt := WriterOptions{ 594 BlockSize: 32 << 10, 595 IndexBlockSize: 32 << 10, 596 Comparer: testkeys.Comparer, 597 TableFormat: format, 598 } 599 runTestReader(t, writerOpt, dir, nil /* Reader */, false /* printValue */) 600 }) 601 } 602 603 func TestReaderWithBlockPropertyFilter(t *testing.T) { 604 // Some of these tests examine internal iterator state, so they require 605 // determinism. When the invariants tag is set, disableBoundsOpt may disable 606 // the bounds optimization depending on the iterator pointer address. This 607 // can add nondeterminism to the internal iterator statae. Disable this 608 // nondeterminism for the duration of this test. 609 ensureBoundsOptDeterminism = true 610 defer func() { ensureBoundsOptDeterminism = false }() 611 612 forEveryTableFormat[string](t, 613 [NumTableFormats]string{ 614 TableFormatUnspecified: "", // Block properties unsupported 615 TableFormatLevelDB: "", // Block properties unsupported 616 TableFormatRocksDBv2: "", // Block properties unsupported 617 TableFormatPebblev1: "", // Block properties unsupported 618 TableFormatPebblev2: "testdata/reader_bpf/Pebblev2", 619 TableFormatPebblev3: "testdata/reader_bpf/Pebblev3", 620 TableFormatPebblev4: "testdata/reader_bpf/Pebblev3", 621 }, func(t *testing.T, format TableFormat, dir string) { 622 if dir == "" { 623 t.Skip("Block-properties unsupported") 624 } 625 writerOpt := WriterOptions{ 626 Comparer: testkeys.Comparer, 627 TableFormat: format, 628 BlockPropertyCollectors: []func() BlockPropertyCollector{NewTestKeysBlockPropertyCollector}, 629 } 630 runTestReader(t, writerOpt, dir, nil /* Reader */, false) 631 }) 632 } 633 634 func TestInjectedErrors(t *testing.T) { 635 prebuiltSSTs := []string{ 636 "testdata/h.ldb", 637 "testdata/h.sst", 638 "testdata/h.no-compression.sst", 639 "testdata/h.no-compression.two_level_index.sst", 640 "testdata/h.block-bloom.no-compression.sst", 641 "testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 642 "testdata/h.table-bloom.no-compression.sst", 643 } 644 645 for _, prebuiltSST := range prebuiltSSTs { 646 run := func(i int) (reterr error) { 647 f, err := vfs.Default.Open(filepath.FromSlash(prebuiltSST)) 648 require.NoError(t, err) 649 650 r, err := newReader(errorfs.WrapFile(f, errorfs.OnIndex(int32(i))), ReaderOptions{}) 651 if err != nil { 652 return firstError(err, f.Close()) 653 } 654 defer func() { reterr = firstError(reterr, r.Close()) }() 655 656 _, err = r.EstimateDiskUsage([]byte("borrower"), []byte("lender")) 657 if err != nil { 658 return err 659 } 660 661 iter, err := r.NewIter(nil, nil) 662 if err != nil { 663 return err 664 } 665 defer func() { reterr = firstError(reterr, iter.Close()) }() 666 for k, v := iter.First(); k != nil; k, v = iter.Next() { 667 val, _, err := v.Value(nil) 668 if err != nil { 669 return err 670 } 671 if val == nil { 672 break 673 } 674 } 675 if err = iter.Error(); err != nil { 676 return err 677 } 678 return nil 679 } 680 for i := 0; ; i++ { 681 err := run(i) 682 if errors.Is(err, errorfs.ErrInjected) { 683 t.Logf("%q, index %d: %s", prebuiltSST, i, err) 684 continue 685 } 686 if err != nil { 687 t.Errorf("%q, index %d: non-injected error: %+v", prebuiltSST, i, err) 688 break 689 } 690 t.Logf("%q: no error at index %d", prebuiltSST, i) 691 break 692 } 693 } 694 } 695 696 func TestInvalidReader(t *testing.T) { 697 invalid, err := NewSimpleReadable(vfs.NewMemFile([]byte("invalid sst bytes"))) 698 if err != nil { 699 t.Fatal(err) 700 } 701 testCases := []struct { 702 readable objstorage.Readable 703 expected string 704 }{ 705 {nil, "nil file"}, 706 {invalid, "invalid table"}, 707 } 708 for _, tc := range testCases { 709 r, err := NewReader(tc.readable, ReaderOptions{}) 710 if !strings.Contains(err.Error(), tc.expected) { 711 t.Fatalf("expected %q, but found %q", tc.expected, err.Error()) 712 } 713 if r != nil { 714 t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error()) 715 } 716 } 717 } 718 719 func indexLayoutString(t *testing.T, r *Reader) string { 720 indexH, err := r.readIndex(context.Background(), nil) 721 require.NoError(t, err) 722 defer indexH.Release() 723 var buf strings.Builder 724 twoLevelIndex := r.Properties.IndexType == twoLevelIndex 725 buf.WriteString("index entries:\n") 726 iter, err := newBlockIter(r.Compare, indexH.Get()) 727 defer func() { 728 require.NoError(t, iter.Close()) 729 }() 730 require.NoError(t, err) 731 for key, value := iter.First(); key != nil; key, value = iter.Next() { 732 bh, err := decodeBlockHandleWithProperties(value.InPlaceValue()) 733 require.NoError(t, err) 734 fmt.Fprintf(&buf, " %s: size %d\n", string(key.UserKey), bh.Length) 735 if twoLevelIndex { 736 b, err := r.readBlock( 737 context.Background(), bh.BlockHandle, nil, nil, nil, nil) 738 require.NoError(t, err) 739 defer b.Release() 740 iter2, err := newBlockIter(r.Compare, b.Get()) 741 defer func() { 742 require.NoError(t, iter2.Close()) 743 }() 744 require.NoError(t, err) 745 for key, value := iter2.First(); key != nil; key, value = iter2.Next() { 746 bh, err := decodeBlockHandleWithProperties(value.InPlaceValue()) 747 require.NoError(t, err) 748 fmt.Fprintf(&buf, " %s: size %d\n", string(key.UserKey), bh.Length) 749 } 750 } 751 } 752 return buf.String() 753 } 754 755 func runTestReader(t *testing.T, o WriterOptions, dir string, r *Reader, printValue bool) { 756 datadriven.Walk(t, dir, func(t *testing.T, path string) { 757 defer func() { 758 if r != nil { 759 r.Close() 760 r = nil 761 } 762 }() 763 764 datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { 765 switch d.Cmd { 766 case "build": 767 if r != nil { 768 r.Close() 769 r = nil 770 } 771 var cacheSize int 772 var printLayout bool 773 d.MaybeScanArgs(t, "cache-size", &cacheSize) 774 d.MaybeScanArgs(t, "print-layout", &printLayout) 775 d.MaybeScanArgs(t, "block-size", &o.BlockSize) 776 d.MaybeScanArgs(t, "index-block-size", &o.IndexBlockSize) 777 778 var err error 779 _, r, err = runBuildCmd(d, &o, cacheSize) 780 if err != nil { 781 return err.Error() 782 } 783 if printLayout { 784 return indexLayoutString(t, r) 785 } 786 return "" 787 788 case "iter": 789 seqNum, err := scanGlobalSeqNum(d) 790 if err != nil { 791 return err.Error() 792 } 793 var stats base.InternalIteratorStats 794 r.Properties.GlobalSeqNum = seqNum 795 var bpfs []BlockPropertyFilter 796 if d.HasArg("block-property-filter") { 797 var filterMin, filterMax uint64 798 d.ScanArgs(t, "block-property-filter", &filterMin, &filterMax) 799 bpf := NewTestKeysBlockPropertyFilter(filterMin, filterMax) 800 bpfs = append(bpfs, bpf) 801 } 802 hideObsoletePoints := false 803 if d.HasArg("hide-obsolete-points") { 804 d.ScanArgs(t, "hide-obsolete-points", &hideObsoletePoints) 805 if hideObsoletePoints { 806 hideObsoletePoints, bpfs = r.TryAddBlockPropertyFilterForHideObsoletePoints( 807 InternalKeySeqNumMax, InternalKeySeqNumMax-1, bpfs) 808 require.True(t, hideObsoletePoints) 809 } 810 } 811 var filterer *BlockPropertiesFilterer 812 if len(bpfs) > 0 { 813 filterer = newBlockPropertiesFilterer(bpfs, nil) 814 intersects, err := 815 filterer.intersectsUserPropsAndFinishInit(r.Properties.UserProperties) 816 if err != nil { 817 return err.Error() 818 } 819 if !intersects { 820 return "table does not intersect BlockPropertyFilter" 821 } 822 } 823 iter, err := r.NewIterWithBlockPropertyFiltersAndContextEtc( 824 context.Background(), 825 nil, /* lower */ 826 nil, /* upper */ 827 filterer, 828 hideObsoletePoints, 829 true, /* use filter block */ 830 &stats, 831 TrivialReaderProvider{Reader: r}, 832 ) 833 if err != nil { 834 return err.Error() 835 } 836 return runIterCmd(d, iter, printValue, runIterCmdStats(&stats)) 837 838 case "get": 839 var b bytes.Buffer 840 for _, k := range strings.Split(d.Input, "\n") { 841 v, err := r.get([]byte(k)) 842 if err != nil { 843 fmt.Fprintf(&b, "<err: %s>\n", err) 844 } else { 845 fmt.Fprintln(&b, string(v)) 846 } 847 } 848 return b.String() 849 default: 850 return fmt.Sprintf("unknown command: %s", d.Cmd) 851 } 852 }) 853 }) 854 } 855 856 func TestReaderCheckComparerMerger(t *testing.T) { 857 const testTable = "test" 858 859 testComparer := &base.Comparer{ 860 Name: "test.comparer", 861 Compare: base.DefaultComparer.Compare, 862 Equal: base.DefaultComparer.Equal, 863 Separator: base.DefaultComparer.Separator, 864 Successor: base.DefaultComparer.Successor, 865 } 866 testMerger := &base.Merger{ 867 Name: "test.merger", 868 Merge: base.DefaultMerger.Merge, 869 } 870 writerOpts := WriterOptions{ 871 Comparer: testComparer, 872 MergerName: "test.merger", 873 } 874 875 mem := vfs.NewMem() 876 f0, err := mem.Create(testTable) 877 require.NoError(t, err) 878 879 w := NewWriter(objstorageprovider.NewFileWritable(f0), writerOpts) 880 require.NoError(t, w.Set([]byte("test"), nil)) 881 require.NoError(t, w.Close()) 882 883 testCases := []struct { 884 comparers []*base.Comparer 885 mergers []*base.Merger 886 expected string 887 }{ 888 { 889 []*base.Comparer{testComparer}, 890 []*base.Merger{testMerger}, 891 "", 892 }, 893 { 894 []*base.Comparer{testComparer, base.DefaultComparer}, 895 []*base.Merger{testMerger, base.DefaultMerger}, 896 "", 897 }, 898 { 899 []*base.Comparer{}, 900 []*base.Merger{testMerger}, 901 "unknown comparer test.comparer", 902 }, 903 { 904 []*base.Comparer{base.DefaultComparer}, 905 []*base.Merger{testMerger}, 906 "unknown comparer test.comparer", 907 }, 908 { 909 []*base.Comparer{testComparer}, 910 []*base.Merger{}, 911 "unknown merger test.merger", 912 }, 913 { 914 []*base.Comparer{testComparer}, 915 []*base.Merger{base.DefaultMerger}, 916 "unknown merger test.merger", 917 }, 918 } 919 920 for _, c := range testCases { 921 t.Run("", func(t *testing.T) { 922 f1, err := mem.Open(testTable) 923 require.NoError(t, err) 924 925 comparers := make(Comparers) 926 for _, comparer := range c.comparers { 927 comparers[comparer.Name] = comparer 928 } 929 mergers := make(Mergers) 930 for _, merger := range c.mergers { 931 mergers[merger.Name] = merger 932 } 933 934 r, err := newReader(f1, ReaderOptions{}, comparers, mergers) 935 if err != nil { 936 if r != nil { 937 t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error()) 938 } 939 if !strings.HasSuffix(err.Error(), c.expected) { 940 t.Fatalf("expected %q, but found %q", c.expected, err.Error()) 941 } 942 } else if c.expected != "" { 943 t.Fatalf("expected %q, but found success", c.expected) 944 } 945 if r != nil { 946 _ = r.Close() 947 } 948 }) 949 } 950 } 951 func checkValidPrefix(prefix, key []byte) bool { 952 return prefix == nil || bytes.HasPrefix(key, prefix) 953 } 954 955 func testBytesIteratedWithCompression( 956 t *testing.T, 957 compression Compression, 958 allowedSizeDeviationPercent uint64, 959 blockSizes []int, 960 maxNumEntries []uint64, 961 ) { 962 for i, blockSize := range blockSizes { 963 for _, indexBlockSize := range blockSizes { 964 for _, numEntries := range []uint64{0, 1, maxNumEntries[i]} { 965 r := buildTestTable(t, numEntries, blockSize, indexBlockSize, compression) 966 var bytesIterated, prevIterated uint64 967 var pool BufferPool 968 pool.Init(5) 969 citer, err := r.NewCompactionIter(&bytesIterated, TrivialReaderProvider{Reader: r}, &pool) 970 require.NoError(t, err) 971 972 for key, _ := citer.First(); key != nil; key, _ = citer.Next() { 973 if bytesIterated < prevIterated { 974 t.Fatalf("bytesIterated moved backward: %d < %d", bytesIterated, prevIterated) 975 } 976 prevIterated = bytesIterated 977 } 978 979 expected := r.Properties.DataSize 980 allowedSizeDeviation := expected * allowedSizeDeviationPercent / 100 981 // There is some inaccuracy due to compression estimation. 982 if bytesIterated < expected-allowedSizeDeviation || bytesIterated > expected+allowedSizeDeviation { 983 t.Fatalf("bytesIterated: got %d, want %d", bytesIterated, expected) 984 } 985 986 require.NoError(t, citer.Close()) 987 require.NoError(t, r.Close()) 988 pool.Release() 989 } 990 } 991 } 992 } 993 994 func TestBytesIterated(t *testing.T) { 995 blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32} 996 t.Run("Compressed", func(t *testing.T) { 997 testBytesIteratedWithCompression(t, SnappyCompression, 1, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5}) 998 }) 999 t.Run("Uncompressed", func(t *testing.T) { 1000 testBytesIteratedWithCompression(t, NoCompression, 0, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5}) 1001 }) 1002 t.Run("Zstd", func(t *testing.T) { 1003 // compression with zstd is extremely slow with small block size (esp the nocgo version). 1004 // use less numEntries to make the test run at reasonable speed (under 10 seconds). 1005 maxNumEntries := []uint64{1e2, 1e2, 1e3, 4e3, 1e5} 1006 if useStandardZstdLib { 1007 maxNumEntries = []uint64{1e3, 1e3, 1e4, 4e4, 1e5} 1008 } 1009 testBytesIteratedWithCompression(t, ZstdCompression, 1, blockSizes, maxNumEntries) 1010 }) 1011 } 1012 1013 func TestCompactionIteratorSetupForCompaction(t *testing.T) { 1014 tmpDir := path.Join(t.TempDir()) 1015 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.Default, tmpDir)) 1016 require.NoError(t, err) 1017 defer provider.Close() 1018 blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32} 1019 for _, blockSize := range blockSizes { 1020 for _, indexBlockSize := range blockSizes { 1021 for _, numEntries := range []uint64{0, 1, 1e5} { 1022 r := buildTestTableWithProvider(t, provider, numEntries, blockSize, indexBlockSize, DefaultCompression) 1023 var bytesIterated uint64 1024 var pool BufferPool 1025 pool.Init(5) 1026 citer, err := r.NewCompactionIter(&bytesIterated, TrivialReaderProvider{Reader: r}, &pool) 1027 require.NoError(t, err) 1028 switch i := citer.(type) { 1029 case *compactionIterator: 1030 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1031 // Each key has one version, so no value block, regardless of 1032 // sstable version. 1033 require.Nil(t, i.vbRH) 1034 case *twoLevelCompactionIterator: 1035 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1036 // Each key has one version, so no value block, regardless of 1037 // sstable version. 1038 require.Nil(t, i.vbRH) 1039 default: 1040 require.Failf(t, fmt.Sprintf("unknown compaction iterator type: %T", citer), "") 1041 } 1042 require.NoError(t, citer.Close()) 1043 require.NoError(t, r.Close()) 1044 pool.Release() 1045 } 1046 } 1047 } 1048 } 1049 1050 func TestReadaheadSetupForV3TablesWithMultipleVersions(t *testing.T) { 1051 tmpDir := path.Join(t.TempDir()) 1052 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.Default, tmpDir)) 1053 require.NoError(t, err) 1054 defer provider.Close() 1055 f0, _, err := provider.Create(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.CreateOptions{}) 1056 require.NoError(t, err) 1057 1058 w := NewWriter(f0, WriterOptions{ 1059 TableFormat: TableFormatPebblev3, 1060 Comparer: testkeys.Comparer, 1061 }) 1062 keys := testkeys.Alpha(1) 1063 keyBuf := make([]byte, 1+testkeys.MaxSuffixLen) 1064 // Write a few keys with multiple timestamps (MVCC versions). 1065 for i := int64(0); i < 2; i++ { 1066 for j := int64(2); j >= 1; j-- { 1067 n := testkeys.WriteKeyAt(keyBuf[:], keys, i, j) 1068 key := keyBuf[:n] 1069 require.NoError(t, w.Set(key, key)) 1070 } 1071 } 1072 require.NoError(t, w.Close()) 1073 f1, err := provider.OpenForReading(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.OpenOptions{}) 1074 require.NoError(t, err) 1075 r, err := NewReader(f1, ReaderOptions{Comparer: testkeys.Comparer}) 1076 require.NoError(t, err) 1077 defer r.Close() 1078 { 1079 var pool BufferPool 1080 pool.Init(5) 1081 citer, err := r.NewCompactionIter(nil, TrivialReaderProvider{Reader: r}, &pool) 1082 require.NoError(t, err) 1083 defer citer.Close() 1084 i := citer.(*compactionIterator) 1085 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1086 require.True(t, objstorageprovider.TestingCheckMaxReadahead(i.vbRH)) 1087 } 1088 { 1089 iter, err := r.NewIter(nil, nil) 1090 require.NoError(t, err) 1091 defer iter.Close() 1092 i := iter.(*singleLevelIterator) 1093 require.False(t, objstorageprovider.TestingCheckMaxReadahead(i.dataRH)) 1094 require.False(t, objstorageprovider.TestingCheckMaxReadahead(i.vbRH)) 1095 } 1096 } 1097 1098 func TestReaderChecksumErrors(t *testing.T) { 1099 for _, checksumType := range []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64} { 1100 t.Run(fmt.Sprintf("checksum-type=%d", checksumType), func(t *testing.T) { 1101 for _, twoLevelIndex := range []bool{false, true} { 1102 t.Run(fmt.Sprintf("two-level-index=%t", twoLevelIndex), func(t *testing.T) { 1103 mem := vfs.NewMem() 1104 1105 { 1106 // Create an sstable with 3 data blocks. 1107 f, err := mem.Create("test") 1108 require.NoError(t, err) 1109 1110 const blockSize = 32 1111 indexBlockSize := 4096 1112 if twoLevelIndex { 1113 indexBlockSize = 1 1114 } 1115 1116 w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{ 1117 BlockSize: blockSize, 1118 IndexBlockSize: indexBlockSize, 1119 Checksum: checksumType, 1120 }) 1121 require.NoError(t, w.Set(bytes.Repeat([]byte("a"), blockSize), nil)) 1122 require.NoError(t, w.Set(bytes.Repeat([]byte("b"), blockSize), nil)) 1123 require.NoError(t, w.Set(bytes.Repeat([]byte("c"), blockSize), nil)) 1124 require.NoError(t, w.Close()) 1125 } 1126 1127 // Load the layout so that we no the location of the data blocks. 1128 var layout *Layout 1129 { 1130 f, err := mem.Open("test") 1131 require.NoError(t, err) 1132 1133 r, err := newReader(f, ReaderOptions{}) 1134 require.NoError(t, err) 1135 layout, err = r.Layout() 1136 require.NoError(t, err) 1137 require.EqualValues(t, len(layout.Data), 3) 1138 require.NoError(t, r.Close()) 1139 } 1140 1141 for _, bh := range layout.Data { 1142 // Read the sstable and corrupt the first byte in the target data 1143 // block. 1144 orig, err := mem.Open("test") 1145 require.NoError(t, err) 1146 data, err := io.ReadAll(orig) 1147 require.NoError(t, err) 1148 require.NoError(t, orig.Close()) 1149 1150 // Corrupt the first byte in the block. 1151 data[bh.Offset] ^= 0xff 1152 1153 corrupted, err := mem.Create("corrupted") 1154 require.NoError(t, err) 1155 _, err = corrupted.Write(data) 1156 require.NoError(t, err) 1157 require.NoError(t, corrupted.Close()) 1158 1159 // Verify that we encounter a checksum mismatch error while iterating 1160 // over the sstable. 1161 corrupted, err = mem.Open("corrupted") 1162 require.NoError(t, err) 1163 1164 r, err := newReader(corrupted, ReaderOptions{}) 1165 require.NoError(t, err) 1166 1167 iter, err := r.NewIter(nil, nil) 1168 require.NoError(t, err) 1169 for k, _ := iter.First(); k != nil; k, _ = iter.Next() { 1170 } 1171 require.Regexp(t, `checksum mismatch`, iter.Error()) 1172 require.Regexp(t, `checksum mismatch`, iter.Close()) 1173 1174 iter, err = r.NewIter(nil, nil) 1175 require.NoError(t, err) 1176 for k, _ := iter.Last(); k != nil; k, _ = iter.Prev() { 1177 } 1178 require.Regexp(t, `checksum mismatch`, iter.Error()) 1179 require.Regexp(t, `checksum mismatch`, iter.Close()) 1180 1181 require.NoError(t, r.Close()) 1182 } 1183 }) 1184 } 1185 }) 1186 } 1187 } 1188 1189 func TestValidateBlockChecksums(t *testing.T) { 1190 seed := uint64(time.Now().UnixNano()) 1191 rng := rand.New(rand.NewSource(seed)) 1192 t.Logf("using seed = %d", seed) 1193 1194 allFiles := []string{ 1195 "testdata/h.no-compression.sst", 1196 "testdata/h.no-compression.two_level_index.sst", 1197 "testdata/h.sst", 1198 "testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 1199 "testdata/h.table-bloom.no-compression.sst", 1200 "testdata/h.table-bloom.sst", 1201 "testdata/h.zstd-compression.sst", 1202 } 1203 1204 type corruptionLocation int 1205 const ( 1206 corruptionLocationData corruptionLocation = iota 1207 corruptionLocationIndex 1208 corruptionLocationTopIndex 1209 corruptionLocationFilter 1210 corruptionLocationRangeDel 1211 corruptionLocationProperties 1212 corruptionLocationMetaIndex 1213 ) 1214 1215 testCases := []struct { 1216 name string 1217 files []string 1218 corruptionLocations []corruptionLocation 1219 }{ 1220 { 1221 name: "no corruption", 1222 corruptionLocations: []corruptionLocation{}, 1223 }, 1224 { 1225 name: "data block corruption", 1226 corruptionLocations: []corruptionLocation{ 1227 corruptionLocationData, 1228 }, 1229 }, 1230 { 1231 name: "index block corruption", 1232 corruptionLocations: []corruptionLocation{ 1233 corruptionLocationIndex, 1234 }, 1235 }, 1236 { 1237 name: "top index block corruption", 1238 files: []string{ 1239 "testdata/h.no-compression.two_level_index.sst", 1240 }, 1241 corruptionLocations: []corruptionLocation{ 1242 corruptionLocationTopIndex, 1243 }, 1244 }, 1245 { 1246 name: "filter block corruption", 1247 files: []string{ 1248 "testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", 1249 "testdata/h.table-bloom.no-compression.sst", 1250 "testdata/h.table-bloom.sst", 1251 }, 1252 corruptionLocations: []corruptionLocation{ 1253 corruptionLocationFilter, 1254 }, 1255 }, 1256 { 1257 name: "range deletion block corruption", 1258 corruptionLocations: []corruptionLocation{ 1259 corruptionLocationRangeDel, 1260 }, 1261 }, 1262 { 1263 name: "properties block corruption", 1264 corruptionLocations: []corruptionLocation{ 1265 corruptionLocationProperties, 1266 }, 1267 }, 1268 { 1269 name: "metaindex block corruption", 1270 corruptionLocations: []corruptionLocation{ 1271 corruptionLocationMetaIndex, 1272 }, 1273 }, 1274 { 1275 name: "multiple blocks corrupted", 1276 corruptionLocations: []corruptionLocation{ 1277 corruptionLocationData, 1278 corruptionLocationIndex, 1279 corruptionLocationRangeDel, 1280 corruptionLocationProperties, 1281 corruptionLocationMetaIndex, 1282 }, 1283 }, 1284 } 1285 1286 testFn := func(t *testing.T, file string, corruptionLocations []corruptionLocation) { 1287 // Create a copy of the SSTable that we can freely corrupt. 1288 f, err := os.Open(filepath.FromSlash(file)) 1289 require.NoError(t, err) 1290 1291 pathCopy := path.Join(t.TempDir(), path.Base(file)) 1292 fCopy, err := os.OpenFile(pathCopy, os.O_CREATE|os.O_RDWR, 0600) 1293 require.NoError(t, err) 1294 defer fCopy.Close() 1295 1296 _, err = io.Copy(fCopy, f) 1297 require.NoError(t, err) 1298 err = fCopy.Sync() 1299 require.NoError(t, err) 1300 require.NoError(t, f.Close()) 1301 1302 filter := bloom.FilterPolicy(10) 1303 r, err := newReader(fCopy, ReaderOptions{ 1304 Filters: map[string]FilterPolicy{ 1305 filter.Name(): filter, 1306 }, 1307 }) 1308 require.NoError(t, err) 1309 defer func() { require.NoError(t, r.Close()) }() 1310 1311 // Prior to corruption, validation is successful. 1312 require.NoError(t, r.ValidateBlockChecksums()) 1313 1314 // If we are not testing for corruption, we can stop here. 1315 if len(corruptionLocations) == 0 { 1316 return 1317 } 1318 1319 // Perform bit flips in various corruption locations. 1320 layout, err := r.Layout() 1321 require.NoError(t, err) 1322 for _, location := range corruptionLocations { 1323 var bh BlockHandle 1324 switch location { 1325 case corruptionLocationData: 1326 bh = layout.Data[rng.Intn(len(layout.Data))].BlockHandle 1327 case corruptionLocationIndex: 1328 bh = layout.Index[rng.Intn(len(layout.Index))] 1329 case corruptionLocationTopIndex: 1330 bh = layout.TopIndex 1331 case corruptionLocationFilter: 1332 bh = layout.Filter 1333 case corruptionLocationRangeDel: 1334 bh = layout.RangeDel 1335 case corruptionLocationProperties: 1336 bh = layout.Properties 1337 case corruptionLocationMetaIndex: 1338 bh = layout.MetaIndex 1339 default: 1340 t.Fatalf("unknown location") 1341 } 1342 1343 // Corrupt a random byte within the selected block. 1344 pos := int64(bh.Offset) + rng.Int63n(int64(bh.Length)) 1345 t.Logf("altering file=%s @ offset = %d", file, pos) 1346 1347 b := make([]byte, 1) 1348 n, err := fCopy.ReadAt(b, pos) 1349 require.NoError(t, err) 1350 require.Equal(t, 1, n) 1351 t.Logf("data (before) = %08b", b) 1352 1353 b[0] ^= 0xff 1354 t.Logf("data (after) = %08b", b) 1355 1356 _, err = fCopy.WriteAt(b, pos) 1357 require.NoError(t, err) 1358 } 1359 1360 // Write back to the file. 1361 err = fCopy.Sync() 1362 require.NoError(t, err) 1363 1364 // Confirm that checksum validation fails. 1365 err = r.ValidateBlockChecksums() 1366 require.Error(t, err) 1367 require.Regexp(t, `checksum mismatch`, err.Error()) 1368 } 1369 1370 for _, tc := range testCases { 1371 // By default, test across all files, unless overridden. 1372 files := tc.files 1373 if files == nil { 1374 files = allFiles 1375 } 1376 for _, file := range files { 1377 t.Run(tc.name+" "+path.Base(file), func(t *testing.T) { 1378 testFn(t, file, tc.corruptionLocations) 1379 }) 1380 } 1381 } 1382 } 1383 1384 func TestReader_TableFormat(t *testing.T) { 1385 test := func(t *testing.T, want TableFormat) { 1386 fs := vfs.NewMem() 1387 f, err := fs.Create("test") 1388 require.NoError(t, err) 1389 1390 opts := WriterOptions{TableFormat: want} 1391 w := NewWriter(objstorageprovider.NewFileWritable(f), opts) 1392 err = w.Close() 1393 require.NoError(t, err) 1394 1395 f, err = fs.Open("test") 1396 require.NoError(t, err) 1397 r, err := newReader(f, ReaderOptions{}) 1398 require.NoError(t, err) 1399 defer r.Close() 1400 1401 got, err := r.TableFormat() 1402 require.NoError(t, err) 1403 require.Equal(t, want, got) 1404 } 1405 1406 for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { 1407 t.Run(tf.String(), func(t *testing.T) { 1408 test(t, tf) 1409 }) 1410 } 1411 } 1412 1413 func buildTestTable( 1414 t *testing.T, numEntries uint64, blockSize, indexBlockSize int, compression Compression, 1415 ) *Reader { 1416 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(vfs.NewMem(), "" /* dirName */)) 1417 require.NoError(t, err) 1418 defer provider.Close() 1419 return buildTestTableWithProvider(t, provider, numEntries, blockSize, indexBlockSize, compression) 1420 } 1421 1422 func buildTestTableWithProvider( 1423 t *testing.T, 1424 provider objstorage.Provider, 1425 numEntries uint64, 1426 blockSize, indexBlockSize int, 1427 compression Compression, 1428 ) *Reader { 1429 f0, _, err := provider.Create(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.CreateOptions{}) 1430 require.NoError(t, err) 1431 1432 w := NewWriter(f0, WriterOptions{ 1433 BlockSize: blockSize, 1434 IndexBlockSize: indexBlockSize, 1435 Compression: compression, 1436 FilterPolicy: nil, 1437 }) 1438 1439 var ikey InternalKey 1440 for i := uint64(0); i < numEntries; i++ { 1441 key := make([]byte, 8+i%3) 1442 value := make([]byte, i%100) 1443 binary.BigEndian.PutUint64(key, i) 1444 ikey.UserKey = key 1445 w.Add(ikey, value) 1446 } 1447 1448 require.NoError(t, w.Close()) 1449 1450 // Re-open that filename for reading. 1451 f1, err := provider.OpenForReading(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.OpenOptions{}) 1452 require.NoError(t, err) 1453 1454 c := cache.New(128 << 20) 1455 defer c.Unref() 1456 r, err := NewReader(f1, ReaderOptions{ 1457 Cache: c, 1458 }) 1459 require.NoError(t, err) 1460 return r 1461 } 1462 1463 func buildBenchmarkTable( 1464 b *testing.B, options WriterOptions, confirmTwoLevelIndex bool, offset int, 1465 ) (*Reader, [][]byte) { 1466 mem := vfs.NewMem() 1467 f0, err := mem.Create("bench") 1468 if err != nil { 1469 b.Fatal(err) 1470 } 1471 1472 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1473 1474 var keys [][]byte 1475 var ikey InternalKey 1476 for i := uint64(0); i < 1e6; i++ { 1477 key := make([]byte, 8) 1478 binary.BigEndian.PutUint64(key, i+uint64(offset)) 1479 keys = append(keys, key) 1480 ikey.UserKey = key 1481 w.Add(ikey, nil) 1482 } 1483 1484 if err := w.Close(); err != nil { 1485 b.Fatal(err) 1486 } 1487 1488 // Re-open that filename for reading. 1489 f1, err := mem.Open("bench") 1490 if err != nil { 1491 b.Fatal(err) 1492 } 1493 c := cache.New(128 << 20) 1494 defer c.Unref() 1495 r, err := newReader(f1, ReaderOptions{ 1496 Cache: c, 1497 }) 1498 if err != nil { 1499 b.Fatal(err) 1500 } 1501 if confirmTwoLevelIndex && r.Properties.IndexPartitions == 0 { 1502 b.Fatalf("should have constructed two level index") 1503 } 1504 return r, keys 1505 } 1506 1507 var basicBenchmarks = []struct { 1508 name string 1509 options WriterOptions 1510 }{ 1511 { 1512 name: "restart=16,compression=Snappy", 1513 options: WriterOptions{ 1514 BlockSize: 32 << 10, 1515 BlockRestartInterval: 16, 1516 FilterPolicy: nil, 1517 Compression: SnappyCompression, 1518 TableFormat: TableFormatPebblev2, 1519 }, 1520 }, 1521 { 1522 name: "restart=16,compression=ZSTD", 1523 options: WriterOptions{ 1524 BlockSize: 32 << 10, 1525 BlockRestartInterval: 16, 1526 FilterPolicy: nil, 1527 Compression: ZstdCompression, 1528 TableFormat: TableFormatPebblev2, 1529 }, 1530 }, 1531 } 1532 1533 func BenchmarkTableIterSeekGE(b *testing.B) { 1534 for _, bm := range basicBenchmarks { 1535 b.Run(bm.name, 1536 func(b *testing.B) { 1537 r, keys := buildBenchmarkTable(b, bm.options, false, 0) 1538 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1539 require.NoError(b, err) 1540 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1541 1542 b.ResetTimer() 1543 for i := 0; i < b.N; i++ { 1544 it.SeekGE(keys[rng.Intn(len(keys))], base.SeekGEFlagsNone) 1545 } 1546 1547 b.StopTimer() 1548 it.Close() 1549 r.Close() 1550 }) 1551 } 1552 } 1553 1554 func BenchmarkTableIterSeekLT(b *testing.B) { 1555 for _, bm := range basicBenchmarks { 1556 b.Run(bm.name, 1557 func(b *testing.B) { 1558 r, keys := buildBenchmarkTable(b, bm.options, false, 0) 1559 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1560 require.NoError(b, err) 1561 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1562 1563 b.ResetTimer() 1564 for i := 0; i < b.N; i++ { 1565 it.SeekLT(keys[rng.Intn(len(keys))], base.SeekLTFlagsNone) 1566 } 1567 1568 b.StopTimer() 1569 it.Close() 1570 r.Close() 1571 }) 1572 } 1573 } 1574 1575 func BenchmarkTableIterNext(b *testing.B) { 1576 for _, bm := range basicBenchmarks { 1577 b.Run(bm.name, 1578 func(b *testing.B) { 1579 r, _ := buildBenchmarkTable(b, bm.options, false, 0) 1580 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1581 require.NoError(b, err) 1582 1583 b.ResetTimer() 1584 var sum int64 1585 var key *InternalKey 1586 for i := 0; i < b.N; i++ { 1587 if key == nil { 1588 key, _ = it.First() 1589 } 1590 sum += int64(binary.BigEndian.Uint64(key.UserKey)) 1591 key, _ = it.Next() 1592 } 1593 if testing.Verbose() { 1594 fmt.Fprint(io.Discard, sum) 1595 } 1596 1597 b.StopTimer() 1598 it.Close() 1599 r.Close() 1600 }) 1601 } 1602 } 1603 1604 func BenchmarkTableIterPrev(b *testing.B) { 1605 for _, bm := range basicBenchmarks { 1606 b.Run(bm.name, 1607 func(b *testing.B) { 1608 r, _ := buildBenchmarkTable(b, bm.options, false, 0) 1609 it, err := r.NewIter(nil /* lower */, nil /* upper */) 1610 require.NoError(b, err) 1611 1612 b.ResetTimer() 1613 var sum int64 1614 var key *InternalKey 1615 for i := 0; i < b.N; i++ { 1616 if key == nil { 1617 key, _ = it.Last() 1618 } 1619 sum += int64(binary.BigEndian.Uint64(key.UserKey)) 1620 key, _ = it.Prev() 1621 } 1622 if testing.Verbose() { 1623 fmt.Fprint(io.Discard, sum) 1624 } 1625 1626 b.StopTimer() 1627 it.Close() 1628 r.Close() 1629 }) 1630 } 1631 } 1632 1633 func BenchmarkLayout(b *testing.B) { 1634 r, _ := buildBenchmarkTable(b, WriterOptions{}, false, 0) 1635 b.ResetTimer() 1636 for i := 0; i < b.N; i++ { 1637 r.Layout() 1638 } 1639 b.StopTimer() 1640 r.Close() 1641 } 1642 1643 func BenchmarkSeqSeekGEExhausted(b *testing.B) { 1644 // Snappy with no bloom filter. 1645 options := basicBenchmarks[0].options 1646 1647 for _, twoLevelIndex := range []bool{false, true} { 1648 switch twoLevelIndex { 1649 case false: 1650 options.IndexBlockSize = 0 1651 case true: 1652 options.IndexBlockSize = 512 1653 } 1654 const offsetCount = 5000 1655 reader, keys := buildBenchmarkTable(b, options, twoLevelIndex, offsetCount) 1656 var preKeys [][]byte 1657 for i := 0; i < offsetCount; i++ { 1658 key := make([]byte, 8) 1659 binary.BigEndian.PutUint64(key, uint64(i)) 1660 preKeys = append(preKeys, key) 1661 } 1662 var postKeys [][]byte 1663 for i := 0; i < offsetCount; i++ { 1664 key := make([]byte, 8) 1665 binary.BigEndian.PutUint64(key, uint64(i+offsetCount+len(keys))) 1666 postKeys = append(postKeys, key) 1667 } 1668 for _, exhaustedBounds := range []bool{false, true} { 1669 for _, prefixSeek := range []bool{false, true} { 1670 exhausted := "file" 1671 if exhaustedBounds { 1672 exhausted = "bounds" 1673 } 1674 seekKind := "ge" 1675 if prefixSeek { 1676 seekKind = "prefix-ge" 1677 } 1678 b.Run(fmt.Sprintf( 1679 "two-level=%t/exhausted=%s/seek=%s", twoLevelIndex, exhausted, seekKind), 1680 func(b *testing.B) { 1681 var upper []byte 1682 var seekKeys [][]byte 1683 if exhaustedBounds { 1684 seekKeys = preKeys 1685 upper = keys[0] 1686 } else { 1687 seekKeys = postKeys 1688 } 1689 it, err := reader.NewIter(nil /* lower */, upper) 1690 require.NoError(b, err) 1691 b.ResetTimer() 1692 pos := 0 1693 var seekGEFlags SeekGEFlags 1694 for i := 0; i < b.N; i++ { 1695 seekKey := seekKeys[0] 1696 var k *InternalKey 1697 if prefixSeek { 1698 k, _ = it.SeekPrefixGE(seekKey, seekKey, seekGEFlags) 1699 } else { 1700 k, _ = it.SeekGE(seekKey, seekGEFlags) 1701 } 1702 if k != nil { 1703 b.Fatal("found a key") 1704 } 1705 if it.Error() != nil { 1706 b.Fatalf("%s", it.Error().Error()) 1707 } 1708 pos++ 1709 if pos == len(seekKeys) { 1710 pos = 0 1711 seekGEFlags = seekGEFlags.DisableTrySeekUsingNext() 1712 } else { 1713 seekGEFlags = seekGEFlags.EnableTrySeekUsingNext() 1714 } 1715 } 1716 b.StopTimer() 1717 it.Close() 1718 }) 1719 } 1720 } 1721 reader.Close() 1722 } 1723 } 1724 1725 func BenchmarkIteratorScanManyVersions(b *testing.B) { 1726 options := WriterOptions{ 1727 BlockSize: 32 << 10, 1728 BlockRestartInterval: 16, 1729 FilterPolicy: nil, 1730 Compression: SnappyCompression, 1731 Comparer: testkeys.Comparer, 1732 } 1733 // 10,000 key prefixes, each with 100 versions. 1734 const keyCount = 10000 1735 const sharedPrefixLen = 32 1736 const unsharedPrefixLen = 8 1737 const versionCount = 100 1738 1739 // Take the very large keyspace consisting of alphabetic characters of 1740 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 1741 // picking every 1 key every keyCount keys. 1742 keys := testkeys.Alpha(unsharedPrefixLen) 1743 keys = keys.EveryN(keys.Count() / keyCount) 1744 if keys.Count() < keyCount { 1745 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 1746 } 1747 keyBuf := make([]byte, sharedPrefixLen+unsharedPrefixLen+testkeys.MaxSuffixLen) 1748 for i := 0; i < sharedPrefixLen; i++ { 1749 keyBuf[i] = 'A' + byte(i) 1750 } 1751 // v2 sstable is 115,178,070 bytes. v3 sstable is 107,181,105 bytes with 1752 // 99,049,269 bytes in value blocks. 1753 setupBench := func(b *testing.B, tableFormat TableFormat, cacheSize int64) *Reader { 1754 mem := vfs.NewMem() 1755 f0, err := mem.Create("bench") 1756 require.NoError(b, err) 1757 options.TableFormat = tableFormat 1758 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1759 val := make([]byte, 100) 1760 rng := rand.New(rand.NewSource(100)) 1761 for i := int64(0); i < keys.Count(); i++ { 1762 for v := 0; v < versionCount; v++ { 1763 n := testkeys.WriteKeyAt(keyBuf[sharedPrefixLen:], keys, i, int64(versionCount-v+1)) 1764 key := keyBuf[:n+sharedPrefixLen] 1765 rng.Read(val) 1766 require.NoError(b, w.Set(key, val)) 1767 } 1768 } 1769 require.NoError(b, w.Close()) 1770 c := cache.New(cacheSize) 1771 defer c.Unref() 1772 // Re-open the filename for reading. 1773 f0, err = mem.Open("bench") 1774 require.NoError(b, err) 1775 r, err := newReader(f0, ReaderOptions{ 1776 Cache: c, 1777 Comparer: testkeys.Comparer, 1778 }) 1779 require.NoError(b, err) 1780 return r 1781 } 1782 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 1783 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 1784 // 150MiB results in a high cache hit rate for both formats. 20MiB 1785 // results in a high cache hit rate for the data blocks in 1786 // TableFormatPebblev3. 1787 for _, cacheSize := range []int64{20 << 20, 150 << 20} { 1788 b.Run(fmt.Sprintf("cache-size=%s", humanize.Bytes.Int64(cacheSize)), 1789 func(b *testing.B) { 1790 r := setupBench(b, format, cacheSize) 1791 defer func() { 1792 require.NoError(b, r.Close()) 1793 }() 1794 for _, readValue := range []bool{false, true} { 1795 b.Run(fmt.Sprintf("read-value=%t", readValue), func(b *testing.B) { 1796 iter, err := r.NewIter(nil, nil) 1797 require.NoError(b, err) 1798 var k *InternalKey 1799 var v base.LazyValue 1800 var valBuf [100]byte 1801 b.ResetTimer() 1802 for i := 0; i < b.N; i++ { 1803 if k == nil { 1804 k, _ = iter.First() 1805 if k == nil { 1806 b.Fatalf("k is nil") 1807 } 1808 } 1809 k, v = iter.Next() 1810 if k != nil && readValue { 1811 _, callerOwned, err := v.Value(valBuf[:]) 1812 if err != nil { 1813 b.Fatal(err) 1814 } else if callerOwned { 1815 b.Fatalf("unexpected callerOwned: %t", callerOwned) 1816 } 1817 } 1818 } 1819 }) 1820 } 1821 }) 1822 } 1823 }) 1824 } 1825 } 1826 1827 func BenchmarkIteratorScanNextPrefix(b *testing.B) { 1828 options := WriterOptions{ 1829 BlockSize: 32 << 10, 1830 BlockRestartInterval: 16, 1831 FilterPolicy: nil, 1832 Compression: SnappyCompression, 1833 TableFormat: TableFormatPebblev3, 1834 Comparer: testkeys.Comparer, 1835 } 1836 const keyCount = 10000 1837 const sharedPrefixLen = 32 1838 const unsharedPrefixLen = 8 1839 val := make([]byte, 100) 1840 rand.New(rand.NewSource(100)).Read(val) 1841 1842 // Take the very large keyspace consisting of alphabetic characters of 1843 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 1844 // picking every 1 key every keyCount keys. 1845 keys := testkeys.Alpha(unsharedPrefixLen) 1846 keys = keys.EveryN(keys.Count() / keyCount) 1847 if keys.Count() < keyCount { 1848 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 1849 } 1850 keyBuf := make([]byte, sharedPrefixLen+unsharedPrefixLen+testkeys.MaxSuffixLen) 1851 for i := 0; i < sharedPrefixLen; i++ { 1852 keyBuf[i] = 'A' + byte(i) 1853 } 1854 setupBench := func(b *testing.B, versCount int) (r *Reader, succKeys [][]byte) { 1855 mem := vfs.NewMem() 1856 f0, err := mem.Create("bench") 1857 require.NoError(b, err) 1858 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 1859 for i := int64(0); i < keys.Count(); i++ { 1860 for v := 0; v < versCount; v++ { 1861 n := testkeys.WriteKeyAt(keyBuf[sharedPrefixLen:], keys, i, int64(versCount-v+1)) 1862 key := keyBuf[:n+sharedPrefixLen] 1863 require.NoError(b, w.Set(key, val)) 1864 if v == 0 { 1865 prefixLen := testkeys.Comparer.Split(key) 1866 prefixKey := key[:prefixLen] 1867 succKey := testkeys.Comparer.ImmediateSuccessor(nil, prefixKey) 1868 succKeys = append(succKeys, succKey) 1869 } 1870 } 1871 } 1872 require.NoError(b, w.Close()) 1873 // NB: This 200MiB cache is sufficient for even the largest file: 10,000 1874 // keys * 100 versions = 1M keys, where each key-value pair is ~140 bytes 1875 // = 140MB. So we are not measuring the caching benefit of 1876 // TableFormatPebblev3 storing older values in value blocks. 1877 c := cache.New(200 << 20) 1878 defer c.Unref() 1879 // Re-open the filename for reading. 1880 f0, err = mem.Open("bench") 1881 require.NoError(b, err) 1882 r, err = newReader(f0, ReaderOptions{ 1883 Cache: c, 1884 Comparer: testkeys.Comparer, 1885 }) 1886 require.NoError(b, err) 1887 return r, succKeys 1888 } 1889 // Analysis of some sample results with TableFormatPebblev2: 1890 // versions=1/method=seek-ge-10 22107622 53.57 ns/op 1891 // versions=1/method=next-prefix-10 36292837 33.07 ns/op 1892 // versions=2/method=seek-ge-10 14429138 82.92 ns/op 1893 // versions=2/method=next-prefix-10 19676055 60.78 ns/op 1894 // versions=10/method=seek-ge-10 1453726 825.2 ns/op 1895 // versions=10/method=next-prefix-10 2450498 489.6 ns/op 1896 // versions=100/method=seek-ge-10 965143 1257 ns/op 1897 // versions=100/method=next-prefix-10 1000000 1054 ns/op 1898 // 1899 // With 1 version, both SeekGE and NextPrefix will be able to complete after 1900 // doing a single call to blockIter.Next. However, SeekGE has to do two key 1901 // comparisons unlike the one key comparison in NextPrefix. This is because 1902 // SeekGE also compares *before* calling Next since it is possible that the 1903 // preceding SeekGE is already at the right place. 1904 // 1905 // With 2 versions, both will do two calls to blockIter.Next. The difference 1906 // in the cost is the same as in the 1 version case. 1907 // 1908 // With 10 versions, it is still likely that the desired key is in the same 1909 // data block. NextPrefix will seek only the blockIter. And in the rare case 1910 // that the key is in the next data block, it will step the index block (not 1911 // seek). In comparison, SeekGE will seek the index block too. 1912 // 1913 // With 100 versions we more often cross from one data block to the next, so 1914 // the difference in cost declines. 1915 // 1916 // Some sample results with TableFormatPebblev3: 1917 1918 // versions=1/method=seek-ge-10 18702609 53.90 ns/op 1919 // versions=1/method=next-prefix-10 77440167 15.41 ns/op 1920 // versions=2/method=seek-ge-10 13554286 87.91 ns/op 1921 // versions=2/method=next-prefix-10 62148526 19.25 ns/op 1922 // versions=10/method=seek-ge-10 1316676 910.5 ns/op 1923 // versions=10/method=next-prefix-10 18829448 62.61 ns/op 1924 // versions=100/method=seek-ge-10 1166139 1025 ns/op 1925 // versions=100/method=next-prefix-10 4443386 265.3 ns/op 1926 // 1927 // NextPrefix is much cheaper than in TableFormatPebblev2 with larger number 1928 // of versions. It is also cheaper with 1 and 2 versions since 1929 // setHasSamePrefix=false eliminates a key comparison. 1930 for _, versionCount := range []int{1, 2, 10, 100} { 1931 b.Run(fmt.Sprintf("versions=%d", versionCount), func(b *testing.B) { 1932 r, succKeys := setupBench(b, versionCount) 1933 defer func() { 1934 require.NoError(b, r.Close()) 1935 }() 1936 for _, method := range []string{"seek-ge", "next-prefix"} { 1937 b.Run(fmt.Sprintf("method=%s", method), func(b *testing.B) { 1938 for _, readValue := range []bool{false, true} { 1939 b.Run(fmt.Sprintf("read-value=%t", readValue), func(b *testing.B) { 1940 iter, err := r.NewIter(nil, nil) 1941 require.NoError(b, err) 1942 var nextFunc func(index int) (*InternalKey, base.LazyValue) 1943 switch method { 1944 case "seek-ge": 1945 nextFunc = func(index int) (*InternalKey, base.LazyValue) { 1946 var flags base.SeekGEFlags 1947 return iter.SeekGE(succKeys[index], flags.EnableTrySeekUsingNext()) 1948 } 1949 case "next-prefix": 1950 nextFunc = func(index int) (*InternalKey, base.LazyValue) { 1951 return iter.NextPrefix(succKeys[index]) 1952 } 1953 default: 1954 b.Fatalf("unknown method %s", method) 1955 } 1956 n := keys.Count() 1957 j := n 1958 var k *InternalKey 1959 var v base.LazyValue 1960 var valBuf [100]byte 1961 b.ResetTimer() 1962 for i := 0; i < b.N; i++ { 1963 if k == nil { 1964 if j != n { 1965 b.Fatalf("unexpected %d != %d", j, n) 1966 } 1967 k, _ = iter.First() 1968 j = 0 1969 } else { 1970 k, v = nextFunc(int(j - 1)) 1971 if k != nil && readValue { 1972 _, callerOwned, err := v.Value(valBuf[:]) 1973 if err != nil { 1974 b.Fatal(err) 1975 } else if callerOwned { 1976 b.Fatalf("unexpected callerOwned: %t", callerOwned) 1977 } 1978 } 1979 1980 } 1981 if k != nil { 1982 j++ 1983 } 1984 } 1985 }) 1986 } 1987 }) 1988 } 1989 }) 1990 } 1991 } 1992 1993 func BenchmarkIteratorScanObsolete(b *testing.B) { 1994 options := WriterOptions{ 1995 BlockSize: 32 << 10, 1996 BlockRestartInterval: 16, 1997 FilterPolicy: nil, 1998 Compression: SnappyCompression, 1999 Comparer: testkeys.Comparer, 2000 } 2001 const keyCount = 1 << 20 2002 const keyLen = 10 2003 2004 // Take the very large keyspace consisting of alphabetic characters of 2005 // lengths up to unsharedPrefixLen and reduce it down to keyCount keys by 2006 // picking every 1 key every keyCount keys. 2007 keys := testkeys.Alpha(keyLen) 2008 keys = keys.EveryN(keys.Count() / keyCount) 2009 if keys.Count() < keyCount { 2010 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 2011 } 2012 expectedKeyCount := keys.Count() 2013 keyBuf := make([]byte, keyLen) 2014 setupBench := func(b *testing.B, tableFormat TableFormat, cacheSize int64) *Reader { 2015 mem := vfs.NewMem() 2016 f0, err := mem.Create("bench") 2017 require.NoError(b, err) 2018 options.TableFormat = tableFormat 2019 w := NewWriter(objstorageprovider.NewFileWritable(f0), options) 2020 val := make([]byte, 100) 2021 rng := rand.New(rand.NewSource(100)) 2022 for i := int64(0); i < keys.Count(); i++ { 2023 n := testkeys.WriteKey(keyBuf, keys, i) 2024 key := keyBuf[:n] 2025 rng.Read(val) 2026 forceObsolete := true 2027 if i == 0 { 2028 forceObsolete = false 2029 } 2030 require.NoError(b, w.AddWithForceObsolete( 2031 base.MakeInternalKey(key, 0, InternalKeyKindSet), val, forceObsolete)) 2032 } 2033 require.NoError(b, w.Close()) 2034 c := cache.New(cacheSize) 2035 defer c.Unref() 2036 // Re-open the filename for reading. 2037 f0, err = mem.Open("bench") 2038 require.NoError(b, err) 2039 r, err := newReader(f0, ReaderOptions{ 2040 Cache: c, 2041 Comparer: testkeys.Comparer, 2042 }) 2043 require.NoError(b, err) 2044 return r 2045 } 2046 for _, format := range []TableFormat{TableFormatPebblev3, TableFormatPebblev4} { 2047 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 2048 // 150MiB results in a high cache hit rate for both formats. 2049 for _, cacheSize := range []int64{1, 150 << 20} { 2050 b.Run(fmt.Sprintf("cache-size=%s", humanize.Bytes.Int64(cacheSize)), 2051 func(b *testing.B) { 2052 r := setupBench(b, format, cacheSize) 2053 defer func() { 2054 require.NoError(b, r.Close()) 2055 }() 2056 for _, hideObsoletePoints := range []bool{false, true} { 2057 b.Run(fmt.Sprintf("hide-obsolete=%t", hideObsoletePoints), func(b *testing.B) { 2058 var filterer *BlockPropertiesFilterer 2059 if format == TableFormatPebblev4 && hideObsoletePoints { 2060 filterer = newBlockPropertiesFilterer( 2061 []BlockPropertyFilter{obsoleteKeyBlockPropertyFilter{}}, nil) 2062 intersects, err := 2063 filterer.intersectsUserPropsAndFinishInit(r.Properties.UserProperties) 2064 if err != nil { 2065 b.Fatalf("%s", err.Error()) 2066 } 2067 if !intersects { 2068 b.Fatalf("sstable does not intersect") 2069 } 2070 } 2071 iter, err := r.NewIterWithBlockPropertyFiltersAndContextEtc( 2072 context.Background(), nil, nil, filterer, hideObsoletePoints, 2073 true, nil, TrivialReaderProvider{Reader: r}) 2074 require.NoError(b, err) 2075 b.ResetTimer() 2076 for i := 0; i < b.N; i++ { 2077 count := int64(0) 2078 k, _ := iter.First() 2079 for k != nil { 2080 count++ 2081 k, _ = iter.Next() 2082 } 2083 if format == TableFormatPebblev4 && hideObsoletePoints { 2084 if count != 1 { 2085 b.Fatalf("found %d points", count) 2086 } 2087 } else { 2088 if count != expectedKeyCount { 2089 b.Fatalf("found %d points", count) 2090 } 2091 } 2092 } 2093 }) 2094 } 2095 }) 2096 } 2097 }) 2098 } 2099 } 2100 2101 func newReader(r ReadableFile, o ReaderOptions, extraOpts ...ReaderOption) (*Reader, error) { 2102 readable, err := NewSimpleReadable(r) 2103 if err != nil { 2104 return nil, err 2105 } 2106 return NewReader(readable, o, extraOpts...) 2107 }