github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/iterator_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "flag" 11 "fmt" 12 "io" 13 "runtime" 14 "sort" 15 "strconv" 16 "strings" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/datadriven" 21 "github.com/cockroachdb/errors" 22 "github.com/cockroachdb/pebble/internal/base" 23 "github.com/cockroachdb/pebble/internal/bytealloc" 24 "github.com/cockroachdb/pebble/internal/invalidating" 25 "github.com/cockroachdb/pebble/internal/keyspan" 26 "github.com/cockroachdb/pebble/internal/manifest" 27 "github.com/cockroachdb/pebble/internal/testkeys" 28 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 29 "github.com/cockroachdb/pebble/sstable" 30 "github.com/cockroachdb/pebble/vfs" 31 "github.com/stretchr/testify/require" 32 "golang.org/x/exp/rand" 33 ) 34 35 var testKeyValuePairs = []string{ 36 "10:10", 37 "11:11", 38 "12:12", 39 "13:13", 40 "14:14", 41 "15:15", 42 "16:16", 43 "17:17", 44 "18:18", 45 "19:19", 46 } 47 48 type fakeIter struct { 49 lower []byte 50 upper []byte 51 keys []InternalKey 52 vals [][]byte 53 index int 54 valid bool 55 closeErr error 56 } 57 58 // fakeIter implements the base.InternalIterator interface. 59 var _ base.InternalIterator = (*fakeIter)(nil) 60 61 func fakeIkey(s string) InternalKey { 62 j := strings.Index(s, ":") 63 seqNum, err := strconv.Atoi(s[j+1:]) 64 if err != nil { 65 panic(err) 66 } 67 return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet) 68 } 69 70 func newFakeIterator(closeErr error, keys ...string) *fakeIter { 71 ikeys := make([]InternalKey, len(keys)) 72 for i, k := range keys { 73 ikeys[i] = fakeIkey(k) 74 } 75 return &fakeIter{ 76 keys: ikeys, 77 index: 0, 78 valid: len(ikeys) > 0, 79 closeErr: closeErr, 80 } 81 } 82 83 func (f *fakeIter) String() string { 84 return "fake" 85 } 86 87 func (f *fakeIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { 88 f.valid = false 89 for f.index = 0; f.index < len(f.keys); f.index++ { 90 if DefaultComparer.Compare(key, f.key().UserKey) <= 0 { 91 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 92 return nil, base.LazyValue{} 93 } 94 f.valid = true 95 return f.Key(), f.Value() 96 } 97 } 98 return nil, base.LazyValue{} 99 } 100 101 func (f *fakeIter) SeekPrefixGE( 102 prefix, key []byte, flags base.SeekGEFlags, 103 ) (*base.InternalKey, base.LazyValue) { 104 return f.SeekGE(key, flags) 105 } 106 107 func (f *fakeIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { 108 f.valid = false 109 for f.index = len(f.keys) - 1; f.index >= 0; f.index-- { 110 if DefaultComparer.Compare(key, f.key().UserKey) > 0 { 111 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 112 return nil, base.LazyValue{} 113 } 114 f.valid = true 115 return f.Key(), f.Value() 116 } 117 } 118 return nil, base.LazyValue{} 119 } 120 121 func (f *fakeIter) First() (*InternalKey, base.LazyValue) { 122 f.valid = false 123 f.index = -1 124 if key, _ := f.Next(); key == nil { 125 return nil, base.LazyValue{} 126 } 127 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 128 return nil, base.LazyValue{} 129 } 130 f.valid = true 131 return f.Key(), f.Value() 132 } 133 134 func (f *fakeIter) Last() (*InternalKey, base.LazyValue) { 135 f.valid = false 136 f.index = len(f.keys) 137 if key, _ := f.Prev(); key == nil { 138 return nil, base.LazyValue{} 139 } 140 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 141 return nil, base.LazyValue{} 142 } 143 f.valid = true 144 return f.Key(), f.Value() 145 } 146 147 func (f *fakeIter) Next() (*InternalKey, base.LazyValue) { 148 f.valid = false 149 if f.index == len(f.keys) { 150 return nil, base.LazyValue{} 151 } 152 f.index++ 153 if f.index == len(f.keys) { 154 return nil, base.LazyValue{} 155 } 156 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 157 return nil, base.LazyValue{} 158 } 159 f.valid = true 160 return f.Key(), f.Value() 161 } 162 163 func (f *fakeIter) Prev() (*InternalKey, base.LazyValue) { 164 f.valid = false 165 if f.index < 0 { 166 return nil, base.LazyValue{} 167 } 168 f.index-- 169 if f.index < 0 { 170 return nil, base.LazyValue{} 171 } 172 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 173 return nil, base.LazyValue{} 174 } 175 f.valid = true 176 return f.Key(), f.Value() 177 } 178 179 func (f *fakeIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { 180 return f.SeekGE(succKey, base.SeekGEFlagsNone) 181 } 182 183 // key returns the current Key the iterator is positioned at regardless of the 184 // value of f.valid. 185 func (f *fakeIter) key() *InternalKey { 186 return &f.keys[f.index] 187 } 188 189 func (f *fakeIter) Key() *InternalKey { 190 if f.valid { 191 return &f.keys[f.index] 192 } 193 // It is invalid to call Key() when Valid() returns false. Rather than 194 // returning nil here which would technically be more correct, return a 195 // non-nil key which is the behavior of some InternalIterator 196 // implementations. This provides better testing of users of 197 // InternalIterators. 198 if f.index < 0 { 199 return &f.keys[0] 200 } 201 return &f.keys[len(f.keys)-1] 202 } 203 204 func (f *fakeIter) Value() base.LazyValue { 205 if f.index >= 0 && f.index < len(f.vals) { 206 return base.MakeInPlaceValue(f.vals[f.index]) 207 } 208 return base.LazyValue{} 209 } 210 211 func (f *fakeIter) Valid() bool { 212 return f.index >= 0 && f.index < len(f.keys) && f.valid 213 } 214 215 func (f *fakeIter) Error() error { 216 return f.closeErr 217 } 218 219 func (f *fakeIter) Close() error { 220 return f.closeErr 221 } 222 223 func (f *fakeIter) SetBounds(lower, upper []byte) { 224 f.lower = lower 225 f.upper = upper 226 } 227 228 func (f *fakeIter) SetContext(_ context.Context) {} 229 230 // testIterator tests creating a combined iterator from a number of sub- 231 // iterators. newFunc is a constructor function. splitFunc returns a random 232 // split of the testKeyValuePairs slice such that walking a combined iterator 233 // over those splits should recover the original key/value pairs in order. 234 func testIterator( 235 t *testing.T, 236 newFunc func(...internalIterator) internalIterator, 237 splitFunc func(r *rand.Rand) [][]string, 238 ) { 239 // Test pre-determined sub-iterators. The sub-iterators are designed 240 // so that the combined key/value pair order is the same whether the 241 // combined iterator is concatenating or merging. 242 testCases := []struct { 243 desc string 244 iters []internalIterator 245 want string 246 }{ 247 { 248 "one sub-iterator", 249 []internalIterator{ 250 newFakeIterator(nil, "e:1", "w:2"), 251 }, 252 "<e:1><w:2>.", 253 }, 254 { 255 "two sub-iterators", 256 []internalIterator{ 257 newFakeIterator(nil, "a0:0"), 258 newFakeIterator(nil, "b1:1", "b2:2"), 259 }, 260 "<a0:0><b1:1><b2:2>.", 261 }, 262 { 263 "empty sub-iterators", 264 []internalIterator{ 265 newFakeIterator(nil), 266 newFakeIterator(nil), 267 newFakeIterator(nil), 268 }, 269 ".", 270 }, 271 { 272 "sub-iterator errors", 273 []internalIterator{ 274 newFakeIterator(nil, "a0:0", "a1:1"), 275 newFakeIterator(errors.New("the sky is falling"), "b2:2", "b3:3", "b4:4"), 276 newFakeIterator(errors.New("run for your lives"), "c5:5", "c6:6"), 277 }, 278 "<a0:0><a1:1><b2:2><b3:3><b4:4>err=the sky is falling", 279 }, 280 } 281 for _, tc := range testCases { 282 var b bytes.Buffer 283 iter := invalidating.NewIter(newFunc(tc.iters...)) 284 for key, _ := iter.First(); key != nil; key, _ = iter.Next() { 285 fmt.Fprintf(&b, "<%s:%d>", key.UserKey, key.SeqNum()) 286 } 287 if err := iter.Close(); err != nil { 288 fmt.Fprintf(&b, "err=%v", err) 289 } else { 290 b.WriteByte('.') 291 } 292 if got := b.String(); got != tc.want { 293 t.Errorf("%s:\ngot %q\nwant %q", tc.desc, got, tc.want) 294 } 295 } 296 297 // Test randomly generated sub-iterators. 298 r := rand.New(rand.NewSource(0)) 299 for i, nBad := 0, 0; i < 1000; i++ { 300 bad := false 301 302 splits := splitFunc(r) 303 iters := make([]internalIterator, len(splits)) 304 for i, split := range splits { 305 iters[i] = newFakeIterator(nil, split...) 306 } 307 iter := newInternalIterAdapter(invalidating.NewIter(newFunc(iters...))) 308 iter.First() 309 310 j := 0 311 for ; iter.Valid() && j < len(testKeyValuePairs); j++ { 312 got := fmt.Sprintf("%s:%d", iter.Key().UserKey, iter.Key().SeqNum()) 313 want := testKeyValuePairs[j] 314 if got != want { 315 bad = true 316 t.Errorf("random splits: i=%d, j=%d: got %q, want %q", i, j, got, want) 317 } 318 iter.Next() 319 } 320 if iter.Valid() { 321 bad = true 322 t.Errorf("random splits: i=%d, j=%d: iter was not exhausted", i, j) 323 } 324 if j != len(testKeyValuePairs) { 325 bad = true 326 t.Errorf("random splits: i=%d, j=%d: want j=%d", i, j, len(testKeyValuePairs)) 327 return 328 } 329 if err := iter.Close(); err != nil { 330 bad = true 331 t.Errorf("random splits: i=%d, j=%d: %v", i, j, err) 332 } 333 334 if bad { 335 nBad++ 336 if nBad == 10 { 337 t.Fatal("random splits: too many errors; stopping") 338 } 339 } 340 } 341 } 342 343 // deletableSumValueMerger computes the sum of its arguments, 344 // but transforms a zero sum into a non-existent entry. 345 type deletableSumValueMerger struct { 346 sum int64 347 } 348 349 func newDeletableSumValueMerger(key, value []byte) (ValueMerger, error) { 350 m := &deletableSumValueMerger{} 351 return m, m.MergeNewer(value) 352 } 353 354 func (m *deletableSumValueMerger) parseAndCalculate(value []byte) error { 355 v, err := strconv.ParseInt(string(value), 10, 64) 356 if err == nil { 357 m.sum += v 358 } 359 return err 360 } 361 362 func (m *deletableSumValueMerger) MergeNewer(value []byte) error { 363 return m.parseAndCalculate(value) 364 } 365 366 func (m *deletableSumValueMerger) MergeOlder(value []byte) error { 367 return m.parseAndCalculate(value) 368 } 369 370 func (m *deletableSumValueMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 371 if m.sum == 0 { 372 return nil, nil, nil 373 } 374 return []byte(strconv.FormatInt(m.sum, 10)), nil, nil 375 } 376 377 func (m *deletableSumValueMerger) DeletableFinish( 378 includesBase bool, 379 ) ([]byte, bool, io.Closer, error) { 380 value, closer, err := m.Finish(includesBase) 381 return value, len(value) == 0, closer, err 382 } 383 384 func TestIterator(t *testing.T) { 385 var merge Merge 386 var keys []InternalKey 387 var vals [][]byte 388 389 newIter := func(seqNum uint64, opts IterOptions) *Iterator { 390 if merge == nil { 391 merge = DefaultMerger.Merge 392 } 393 wrappedMerge := func(key, value []byte) (ValueMerger, error) { 394 if len(key) == 0 { 395 t.Fatalf("an empty key is passed into Merge") 396 } 397 return merge(key, value) 398 } 399 it := &Iterator{ 400 opts: opts, 401 comparer: *testkeys.Comparer, 402 merge: wrappedMerge, 403 } 404 // NB: Use a mergingIter to filter entries newer than seqNum. 405 iter := newMergingIter(nil /* logger */, &it.stats.InternalStats, it.cmp, it.split, &fakeIter{ 406 lower: opts.GetLowerBound(), 407 upper: opts.GetUpperBound(), 408 keys: keys, 409 vals: vals, 410 }) 411 iter.snapshot = seqNum 412 // NB: This Iterator cannot be cloned since it is not constructed 413 // with a readState. It suffices for this test. 414 it.iter = invalidating.NewIter(iter) 415 return it 416 } 417 418 datadriven.RunTest(t, "testdata/iterator", func(t *testing.T, d *datadriven.TestData) string { 419 switch d.Cmd { 420 case "define": 421 merge = nil 422 if arg, ok := d.Arg("merger"); ok && len(arg.Vals[0]) > 0 && arg.Vals[0] == "deletable" { 423 merge = newDeletableSumValueMerger 424 } 425 keys = keys[:0] 426 vals = vals[:0] 427 for _, key := range strings.Split(d.Input, "\n") { 428 j := strings.Index(key, ":") 429 keys = append(keys, base.ParseInternalKey(key[:j])) 430 vals = append(vals, []byte(key[j+1:])) 431 } 432 return "" 433 434 case "iter": 435 var seqNum uint64 436 var opts IterOptions 437 d.MaybeScanArgs(t, "seq", &seqNum) 438 var lower, upper string 439 if d.MaybeScanArgs(t, "lower", &lower) { 440 opts.LowerBound = []byte(lower) 441 } 442 if d.MaybeScanArgs(t, "upper", &upper) { 443 opts.UpperBound = []byte(upper) 444 } 445 446 iter := newIter(seqNum, opts) 447 iterOutput := runIterCmd(d, iter, true) 448 stats := iter.Stats() 449 return fmt.Sprintf("%sstats: %s\n", iterOutput, stats.String()) 450 451 default: 452 return fmt.Sprintf("unknown command: %s", d.Cmd) 453 } 454 }) 455 } 456 457 type minSeqNumPropertyCollector struct { 458 minSeqNum uint64 459 } 460 461 func (c *minSeqNumPropertyCollector) Add(key InternalKey, value []byte) error { 462 if c.minSeqNum == 0 || c.minSeqNum > key.SeqNum() { 463 c.minSeqNum = key.SeqNum() 464 } 465 return nil 466 } 467 468 func (c *minSeqNumPropertyCollector) Finish(userProps map[string]string) error { 469 userProps["test.min-seq-num"] = fmt.Sprint(c.minSeqNum) 470 return nil 471 } 472 473 func (c *minSeqNumPropertyCollector) Name() string { 474 return "minSeqNumPropertyCollector" 475 } 476 477 func TestReadSampling(t *testing.T) { 478 var d *DB 479 defer func() { 480 if d != nil { 481 require.NoError(t, d.Close()) 482 } 483 }() 484 485 var iter *Iterator 486 defer func() { 487 if iter != nil { 488 require.NoError(t, iter.Close()) 489 } 490 }() 491 492 datadriven.RunTest(t, "testdata/iterator_read_sampling", func(t *testing.T, td *datadriven.TestData) string { 493 switch td.Cmd { 494 case "define": 495 if iter != nil { 496 if err := iter.Close(); err != nil { 497 return err.Error() 498 } 499 } 500 if d != nil { 501 if err := d.Close(); err != nil { 502 return err.Error() 503 } 504 } 505 506 opts := &Options{} 507 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 508 func() TablePropertyCollector { 509 return &minSeqNumPropertyCollector{} 510 }) 511 512 var err error 513 if d, err = runDBDefineCmd(td, opts); err != nil { 514 return err.Error() 515 } 516 517 d.mu.Lock() 518 // Disable the "dynamic base level" code for this test. 519 // d.mu.versions.picker.forceBaseLevel1() 520 s := d.mu.versions.currentVersion().String() 521 d.mu.Unlock() 522 return s 523 524 case "set": 525 if d == nil { 526 return fmt.Sprintf("%s: db is not defined", td.Cmd) 527 } 528 529 var allowedSeeks int64 530 td.ScanArgs(t, "allowed-seeks", &allowedSeeks) 531 532 d.mu.Lock() 533 for _, l := range d.mu.versions.currentVersion().Levels { 534 l.Slice().Each(func(f *fileMetadata) { 535 f.AllowedSeeks.Store(allowedSeeks) 536 }) 537 } 538 d.mu.Unlock() 539 return "" 540 541 case "show": 542 if d == nil { 543 return fmt.Sprintf("%s: db is not defined", td.Cmd) 544 } 545 546 var fileNum int64 547 for _, arg := range td.CmdArgs { 548 if len(arg.Vals) != 2 { 549 return fmt.Sprintf("%s: %s=<value>", td.Cmd, arg.Key) 550 } 551 switch arg.Key { 552 case "allowed-seeks": 553 var err error 554 fileNum, err = strconv.ParseInt(arg.Vals[0], 10, 64) 555 if err != nil { 556 return err.Error() 557 } 558 } 559 } 560 561 var foundAllowedSeeks int64 = -1 562 d.mu.Lock() 563 for _, l := range d.mu.versions.currentVersion().Levels { 564 l.Slice().Each(func(f *fileMetadata) { 565 if f.FileNum == base.FileNum(fileNum) { 566 actualAllowedSeeks := f.AllowedSeeks.Load() 567 foundAllowedSeeks = actualAllowedSeeks 568 } 569 }) 570 } 571 d.mu.Unlock() 572 573 if foundAllowedSeeks == -1 { 574 return fmt.Sprintf("invalid file num: %d", fileNum) 575 } 576 return fmt.Sprintf("%d", foundAllowedSeeks) 577 578 case "iter": 579 if iter == nil || iter.iter == nil { 580 // TODO(peter): runDBDefineCmd doesn't properly update the visible 581 // sequence number. So we have to use a snapshot with a very large 582 // sequence number, otherwise the DB appears empty. 583 snap := Snapshot{ 584 db: d, 585 seqNum: InternalKeySeqNumMax, 586 } 587 iter, _ = snap.NewIter(nil) 588 iter.readSampling.forceReadSampling = true 589 } 590 return runIterCmd(td, iter, false) 591 592 case "read-compactions": 593 if d == nil { 594 return fmt.Sprintf("%s: db is not defined", td.Cmd) 595 } 596 597 d.mu.Lock() 598 var sb strings.Builder 599 if d.mu.compact.readCompactions.size == 0 { 600 sb.WriteString("(none)") 601 } 602 for i := 0; i < d.mu.compact.readCompactions.size; i++ { 603 rc := d.mu.compact.readCompactions.at(i) 604 sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) 605 } 606 d.mu.Unlock() 607 return sb.String() 608 609 case "iter-read-compactions": 610 if iter == nil { 611 return fmt.Sprintf("%s: iter is not defined", td.Cmd) 612 } 613 614 var sb strings.Builder 615 if iter.readSampling.pendingCompactions.size == 0 { 616 sb.WriteString("(none)") 617 } 618 for i := 0; i < iter.readSampling.pendingCompactions.size; i++ { 619 rc := iter.readSampling.pendingCompactions.at(i) 620 sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) 621 } 622 return sb.String() 623 624 case "close-iter": 625 if iter != nil { 626 if err := iter.Close(); err != nil { 627 return err.Error() 628 } 629 } 630 return "" 631 632 default: 633 return fmt.Sprintf("unknown command: %s", td.Cmd) 634 } 635 }) 636 } 637 638 func TestIteratorTableFilter(t *testing.T) { 639 var d *DB 640 defer func() { 641 if d != nil { 642 require.NoError(t, d.Close()) 643 } 644 }() 645 646 datadriven.RunTest(t, "testdata/iterator_table_filter", func(t *testing.T, td *datadriven.TestData) string { 647 switch td.Cmd { 648 case "define": 649 if d != nil { 650 if err := d.Close(); err != nil { 651 return err.Error() 652 } 653 } 654 655 opts := &Options{} 656 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 657 func() TablePropertyCollector { 658 return &minSeqNumPropertyCollector{} 659 }) 660 661 var err error 662 if d, err = runDBDefineCmd(td, opts); err != nil { 663 return err.Error() 664 } 665 666 d.mu.Lock() 667 // Disable the "dynamic base level" code for this test. 668 d.mu.versions.picker.forceBaseLevel1() 669 s := d.mu.versions.currentVersion().String() 670 d.mu.Unlock() 671 return s 672 673 case "iter": 674 // We're using an iterator table filter to approximate what is done by 675 // snapshots. 676 iterOpts := &IterOptions{} 677 var filterSeqNum uint64 678 if td.MaybeScanArgs(t, "filter", &filterSeqNum) { 679 iterOpts.TableFilter = func(userProps map[string]string) bool { 680 minSeqNum, err := strconv.ParseUint(userProps["test.min-seq-num"], 10, 64) 681 if err != nil { 682 return true 683 } 684 return minSeqNum < filterSeqNum 685 } 686 } 687 688 // TODO(peter): runDBDefineCmd doesn't properly update the visible 689 // sequence number. So we have to use a snapshot with a very large 690 // sequence number, otherwise the DB appears empty. 691 snap := Snapshot{ 692 db: d, 693 seqNum: InternalKeySeqNumMax, 694 } 695 iter, _ := snap.NewIter(iterOpts) 696 return runIterCmd(td, iter, true) 697 698 default: 699 return fmt.Sprintf("unknown command: %s", td.Cmd) 700 } 701 }) 702 } 703 704 func TestIteratorNextPrev(t *testing.T) { 705 var mem vfs.FS 706 var d *DB 707 defer func() { 708 require.NoError(t, d.Close()) 709 }() 710 711 reset := func() { 712 if d != nil { 713 require.NoError(t, d.Close()) 714 } 715 716 mem = vfs.NewMem() 717 require.NoError(t, mem.MkdirAll("ext", 0755)) 718 opts := &Options{FS: mem} 719 // Automatic compactions may compact away tombstones from L6, making 720 // some testcases non-deterministic. 721 opts.DisableAutomaticCompactions = true 722 var err error 723 d, err = Open("", opts) 724 require.NoError(t, err) 725 } 726 reset() 727 728 datadriven.RunTest(t, "testdata/iterator_next_prev", func(t *testing.T, td *datadriven.TestData) string { 729 switch td.Cmd { 730 case "reset": 731 reset() 732 return "" 733 734 case "build": 735 if err := runBuildCmd(td, d, mem); err != nil { 736 return err.Error() 737 } 738 return "" 739 740 case "ingest": 741 if err := runIngestCmd(td, d, mem); err != nil { 742 return err.Error() 743 } 744 return runLSMCmd(td, d) 745 746 case "iter": 747 snap := Snapshot{ 748 db: d, 749 seqNum: InternalKeySeqNumMax, 750 } 751 td.MaybeScanArgs(t, "seq", &snap.seqNum) 752 iter, _ := snap.NewIter(nil) 753 return runIterCmd(td, iter, true) 754 755 default: 756 return fmt.Sprintf("unknown command: %s", td.Cmd) 757 } 758 }) 759 } 760 761 func TestIteratorStats(t *testing.T) { 762 var mem vfs.FS 763 var d *DB 764 defer func() { 765 require.NoError(t, d.Close()) 766 }() 767 768 reset := func() { 769 if d != nil { 770 require.NoError(t, d.Close()) 771 } 772 773 mem = vfs.NewMem() 774 require.NoError(t, mem.MkdirAll("ext", 0755)) 775 opts := &Options{Comparer: testkeys.Comparer, FS: mem, FormatMajorVersion: internalFormatNewest} 776 // Automatic compactions may make some testcases non-deterministic. 777 opts.DisableAutomaticCompactions = true 778 var err error 779 d, err = Open("", opts) 780 require.NoError(t, err) 781 } 782 reset() 783 784 datadriven.RunTest(t, "testdata/iterator_stats", func(t *testing.T, td *datadriven.TestData) string { 785 switch td.Cmd { 786 case "reset": 787 reset() 788 return "" 789 790 case "build": 791 if err := runBuildCmd(td, d, mem); err != nil { 792 return err.Error() 793 } 794 return "" 795 796 case "ingest": 797 if err := runIngestCmd(td, d, mem); err != nil { 798 return err.Error() 799 } 800 return runLSMCmd(td, d) 801 802 case "iter": 803 snap := Snapshot{ 804 db: d, 805 seqNum: InternalKeySeqNumMax, 806 } 807 td.MaybeScanArgs(t, "seq", &snap.seqNum) 808 iter, _ := snap.NewIter(nil) 809 return runIterCmd(td, iter, true) 810 811 default: 812 return fmt.Sprintf("unknown command: %s", td.Cmd) 813 } 814 }) 815 } 816 817 type iterSeekOptWrapper struct { 818 internalIterator 819 820 seekGEUsingNext, seekPrefixGEUsingNext *int 821 } 822 823 func (i *iterSeekOptWrapper) SeekGE( 824 key []byte, flags base.SeekGEFlags, 825 ) (*InternalKey, base.LazyValue) { 826 if flags.TrySeekUsingNext() { 827 *i.seekGEUsingNext++ 828 } 829 return i.internalIterator.SeekGE(key, flags) 830 } 831 832 func (i *iterSeekOptWrapper) SeekPrefixGE( 833 prefix, key []byte, flags base.SeekGEFlags, 834 ) (*InternalKey, base.LazyValue) { 835 if flags.TrySeekUsingNext() { 836 *i.seekPrefixGEUsingNext++ 837 } 838 return i.internalIterator.SeekPrefixGE(prefix, key, flags) 839 } 840 841 func TestIteratorSeekOpt(t *testing.T) { 842 var d *DB 843 defer func() { 844 require.NoError(t, d.Close()) 845 }() 846 var iter *Iterator 847 defer func() { 848 if iter != nil { 849 require.NoError(t, iter.Close()) 850 } 851 }() 852 var seekGEUsingNext, seekPrefixGEUsingNext int 853 854 datadriven.RunTest(t, "testdata/iterator_seek_opt", func(t *testing.T, td *datadriven.TestData) string { 855 switch td.Cmd { 856 case "define": 857 if iter != nil { 858 if err := iter.Close(); err != nil { 859 return err.Error() 860 } 861 } 862 if d != nil { 863 if err := d.Close(); err != nil { 864 return err.Error() 865 } 866 } 867 seekGEUsingNext = 0 868 seekPrefixGEUsingNext = 0 869 870 opts := &Options{} 871 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 872 func() TablePropertyCollector { 873 return &minSeqNumPropertyCollector{} 874 }) 875 876 var err error 877 if d, err = runDBDefineCmd(td, opts); err != nil { 878 return err.Error() 879 } 880 881 d.mu.Lock() 882 s := d.mu.versions.currentVersion().String() 883 d.mu.Unlock() 884 oldNewIters := d.newIters 885 d.newIters = func( 886 ctx context.Context, file *manifest.FileMetadata, opts *IterOptions, 887 internalOpts internalIterOpts) (internalIterator, keyspan.FragmentIterator, error) { 888 iter, rangeIter, err := oldNewIters(ctx, file, opts, internalOpts) 889 iterWrapped := &iterSeekOptWrapper{ 890 internalIterator: iter, 891 seekGEUsingNext: &seekGEUsingNext, 892 seekPrefixGEUsingNext: &seekPrefixGEUsingNext, 893 } 894 return iterWrapped, rangeIter, err 895 } 896 return s 897 898 case "iter": 899 if iter == nil || iter.iter == nil { 900 // TODO(peter): runDBDefineCmd doesn't properly update the visible 901 // sequence number. So we have to use a snapshot with a very large 902 // sequence number, otherwise the DB appears empty. 903 snap := Snapshot{ 904 db: d, 905 seqNum: InternalKeySeqNumMax, 906 } 907 iter, _ = snap.NewIter(nil) 908 iter.readSampling.forceReadSampling = true 909 iter.comparer.Split = func(a []byte) int { return len(a) } 910 iter.forceEnableSeekOpt = true 911 iter.merging.forceEnableSeekOpt = true 912 } 913 iterOutput := runIterCmd(td, iter, false) 914 stats := iter.Stats() 915 // InternalStats are non-deterministic since they depend on how data is 916 // distributed across memtables and sstables in the DB. 917 stats.InternalStats = InternalIteratorStats{} 918 var builder strings.Builder 919 fmt.Fprintf(&builder, "%sstats: %s\n", iterOutput, stats.String()) 920 fmt.Fprintf(&builder, "SeekGEs with trySeekUsingNext: %d\n", seekGEUsingNext) 921 fmt.Fprintf(&builder, "SeekPrefixGEs with trySeekUsingNext: %d\n", seekPrefixGEUsingNext) 922 return builder.String() 923 924 default: 925 return fmt.Sprintf("unknown command: %s", td.Cmd) 926 } 927 }) 928 } 929 930 type errorSeekIter struct { 931 internalIterator 932 // Fields controlling error injection for seeks. 933 injectSeekErrorCounts []int 934 seekCount int 935 err error 936 } 937 938 func (i *errorSeekIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { 939 if i.tryInjectError() { 940 return nil, base.LazyValue{} 941 } 942 i.err = nil 943 i.seekCount++ 944 return i.internalIterator.SeekGE(key, flags) 945 } 946 947 func (i *errorSeekIter) SeekPrefixGE( 948 prefix, key []byte, flags base.SeekGEFlags, 949 ) (*InternalKey, base.LazyValue) { 950 if i.tryInjectError() { 951 return nil, base.LazyValue{} 952 } 953 i.err = nil 954 i.seekCount++ 955 return i.internalIterator.SeekPrefixGE(prefix, key, flags) 956 } 957 958 func (i *errorSeekIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { 959 if i.tryInjectError() { 960 return nil, base.LazyValue{} 961 } 962 i.err = nil 963 i.seekCount++ 964 return i.internalIterator.SeekLT(key, flags) 965 } 966 967 func (i *errorSeekIter) tryInjectError() bool { 968 if len(i.injectSeekErrorCounts) > 0 && i.injectSeekErrorCounts[0] == i.seekCount { 969 i.seekCount++ 970 i.err = errors.Errorf("injecting error") 971 i.injectSeekErrorCounts = i.injectSeekErrorCounts[1:] 972 return true 973 } 974 return false 975 } 976 977 func (i *errorSeekIter) First() (*InternalKey, base.LazyValue) { 978 i.err = nil 979 return i.internalIterator.First() 980 } 981 982 func (i *errorSeekIter) Last() (*InternalKey, base.LazyValue) { 983 i.err = nil 984 return i.internalIterator.Last() 985 } 986 987 func (i *errorSeekIter) Next() (*InternalKey, base.LazyValue) { 988 if i.err != nil { 989 return nil, base.LazyValue{} 990 } 991 return i.internalIterator.Next() 992 } 993 994 func (i *errorSeekIter) Prev() (*InternalKey, base.LazyValue) { 995 if i.err != nil { 996 return nil, base.LazyValue{} 997 } 998 return i.internalIterator.Prev() 999 } 1000 1001 func (i *errorSeekIter) Error() error { 1002 if i.err != nil { 1003 return i.err 1004 } 1005 return i.internalIterator.Error() 1006 } 1007 1008 func TestIteratorSeekOptErrors(t *testing.T) { 1009 var keys []InternalKey 1010 var vals [][]byte 1011 1012 var errorIter errorSeekIter 1013 newIter := func(opts IterOptions) *Iterator { 1014 iter := &fakeIter{ 1015 lower: opts.GetLowerBound(), 1016 upper: opts.GetUpperBound(), 1017 keys: keys, 1018 vals: vals, 1019 } 1020 errorIter = errorSeekIter{internalIterator: invalidating.NewIter(iter)} 1021 // NB: This Iterator cannot be cloned since it is not constructed 1022 // with a readState. It suffices for this test. 1023 return &Iterator{ 1024 opts: opts, 1025 comparer: *testkeys.Comparer, 1026 merge: DefaultMerger.Merge, 1027 iter: &errorIter, 1028 } 1029 } 1030 1031 datadriven.RunTest(t, "testdata/iterator_seek_opt_errors", func(t *testing.T, d *datadriven.TestData) string { 1032 switch d.Cmd { 1033 case "define": 1034 keys = keys[:0] 1035 vals = vals[:0] 1036 for _, key := range strings.Split(d.Input, "\n") { 1037 j := strings.Index(key, ":") 1038 keys = append(keys, base.ParseInternalKey(key[:j])) 1039 vals = append(vals, []byte(key[j+1:])) 1040 } 1041 return "" 1042 1043 case "iter": 1044 var opts IterOptions 1045 var injectSeekGEErrorCounts []int 1046 for _, arg := range d.CmdArgs { 1047 if len(arg.Vals) < 1 { 1048 return fmt.Sprintf("%s: %s=<value>", d.Cmd, arg.Key) 1049 } 1050 switch arg.Key { 1051 case "lower": 1052 opts.LowerBound = []byte(arg.Vals[0]) 1053 case "upper": 1054 opts.UpperBound = []byte(arg.Vals[0]) 1055 case "seek-error": 1056 for i := 0; i < len(arg.Vals); i++ { 1057 n, err := strconv.Atoi(arg.Vals[i]) 1058 if err != nil { 1059 return err.Error() 1060 } 1061 injectSeekGEErrorCounts = append(injectSeekGEErrorCounts, n) 1062 } 1063 default: 1064 return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) 1065 } 1066 } 1067 1068 iter := newIter(opts) 1069 errorIter.injectSeekErrorCounts = injectSeekGEErrorCounts 1070 return runIterCmd(d, iter, true) 1071 1072 default: 1073 return fmt.Sprintf("unknown command: %s", d.Cmd) 1074 } 1075 }) 1076 } 1077 1078 type testBlockIntervalCollector struct { 1079 numLength int 1080 offsetFromEnd int 1081 initialized bool 1082 lower, upper uint64 1083 } 1084 1085 func (bi *testBlockIntervalCollector) Add(key InternalKey, value []byte) error { 1086 k := key.UserKey 1087 if len(k) < bi.numLength+bi.offsetFromEnd { 1088 return nil 1089 } 1090 n := len(k) - bi.offsetFromEnd - bi.numLength 1091 val, err := strconv.Atoi(string(k[n : n+bi.numLength])) 1092 if err != nil { 1093 return err 1094 } 1095 if val < 0 { 1096 panic("testBlockIntervalCollector expects values >= 0") 1097 } 1098 uval := uint64(val) 1099 if !bi.initialized { 1100 bi.lower, bi.upper = uval, uval+1 1101 bi.initialized = true 1102 return nil 1103 } 1104 if bi.lower > uval { 1105 bi.lower = uval 1106 } 1107 if uval >= bi.upper { 1108 bi.upper = uval + 1 1109 } 1110 return nil 1111 } 1112 1113 func (bi *testBlockIntervalCollector) FinishDataBlock() (lower uint64, upper uint64, err error) { 1114 bi.initialized = false 1115 l, u := bi.lower, bi.upper 1116 bi.lower, bi.upper = 0, 0 1117 return l, u, nil 1118 } 1119 1120 func TestIteratorBlockIntervalFilter(t *testing.T) { 1121 var mem vfs.FS 1122 var d *DB 1123 defer func() { 1124 require.NoError(t, d.Close()) 1125 }() 1126 1127 type collector struct { 1128 id uint16 1129 offset int 1130 } 1131 createDB := func(collectors []collector) { 1132 if d != nil { 1133 require.NoError(t, d.Close()) 1134 } 1135 1136 mem = vfs.NewMem() 1137 require.NoError(t, mem.MkdirAll("ext", 0755)) 1138 1139 var bpCollectors []func() BlockPropertyCollector 1140 for _, c := range collectors { 1141 coll := c 1142 bpCollectors = append(bpCollectors, func() BlockPropertyCollector { 1143 return sstable.NewBlockIntervalCollector( 1144 fmt.Sprintf("%d", coll.id), 1145 &testBlockIntervalCollector{numLength: 2, offsetFromEnd: coll.offset}, 1146 nil, /* range key collector */ 1147 ) 1148 }) 1149 } 1150 opts := &Options{ 1151 FS: mem, 1152 FormatMajorVersion: internalFormatNewest, 1153 BlockPropertyCollectors: bpCollectors, 1154 } 1155 lo := LevelOptions{BlockSize: 1, IndexBlockSize: 1} 1156 opts.Levels = append(opts.Levels, lo) 1157 1158 // Automatic compactions may compact away tombstones from L6, making 1159 // some testcases non-deterministic. 1160 opts.DisableAutomaticCompactions = true 1161 var err error 1162 d, err = Open("", opts) 1163 require.NoError(t, err) 1164 } 1165 1166 datadriven.RunTest( 1167 t, "testdata/iterator_block_interval_filter", func(t *testing.T, td *datadriven.TestData) string { 1168 switch td.Cmd { 1169 case "build": 1170 var collectors []collector 1171 for _, arg := range td.CmdArgs { 1172 switch arg.Key { 1173 case "id_offset": 1174 if len(arg.Vals) != 2 { 1175 return "id and offset not provided" 1176 } 1177 var id, offset int 1178 var err error 1179 if id, err = strconv.Atoi(arg.Vals[0]); err != nil { 1180 return err.Error() 1181 } 1182 if offset, err = strconv.Atoi(arg.Vals[1]); err != nil { 1183 return err.Error() 1184 } 1185 collectors = append(collectors, collector{id: uint16(id), offset: offset}) 1186 default: 1187 return fmt.Sprintf("unknown key: %s", arg.Key) 1188 } 1189 } 1190 createDB(collectors) 1191 b := d.NewBatch() 1192 if err := runBatchDefineCmd(td, b); err != nil { 1193 return err.Error() 1194 } 1195 if err := b.Commit(nil); err != nil { 1196 return err.Error() 1197 } 1198 if err := d.Flush(); err != nil { 1199 return err.Error() 1200 } 1201 return runLSMCmd(td, d) 1202 1203 case "iter": 1204 var opts IterOptions 1205 for _, arg := range td.CmdArgs { 1206 switch arg.Key { 1207 case "id_lower_upper": 1208 if len(arg.Vals) != 3 { 1209 return "id, lower, upper not provided" 1210 } 1211 var id, lower, upper int 1212 var err error 1213 if id, err = strconv.Atoi(arg.Vals[0]); err != nil { 1214 return err.Error() 1215 } 1216 if lower, err = strconv.Atoi(arg.Vals[1]); err != nil { 1217 return err.Error() 1218 } 1219 if upper, err = strconv.Atoi(arg.Vals[2]); err != nil { 1220 return err.Error() 1221 } 1222 opts.PointKeyFilters = append(opts.PointKeyFilters, 1223 sstable.NewBlockIntervalFilter(fmt.Sprintf("%d", id), 1224 uint64(lower), uint64(upper))) 1225 default: 1226 return fmt.Sprintf("unknown key: %s", arg.Key) 1227 } 1228 } 1229 rand.Shuffle(len(opts.PointKeyFilters), func(i, j int) { 1230 opts.PointKeyFilters[i], opts.PointKeyFilters[j] = 1231 opts.PointKeyFilters[j], opts.PointKeyFilters[i] 1232 }) 1233 iter, _ := d.NewIter(&opts) 1234 return runIterCmd(td, iter, true) 1235 1236 default: 1237 return fmt.Sprintf("unknown command: %s", td.Cmd) 1238 } 1239 }) 1240 } 1241 1242 var seed = flag.Uint64("seed", 0, "a pseudorandom number generator seed") 1243 1244 func randStr(fill []byte, rng *rand.Rand) { 1245 const letters = "abcdefghijklmnopqrstuvwxyz" 1246 const lettersLen = len(letters) 1247 for i := 0; i < len(fill); i++ { 1248 fill[i] = letters[rng.Intn(lettersLen)] 1249 } 1250 } 1251 1252 func randValue(n int, rng *rand.Rand) []byte { 1253 buf := make([]byte, n) 1254 randStr(buf, rng) 1255 return buf 1256 } 1257 1258 func randKey(n int, rng *rand.Rand) ([]byte, int) { 1259 keyPrefix := randValue(n, rng) 1260 suffix := rng.Intn(100) 1261 return append(keyPrefix, []byte(fmt.Sprintf("%02d", suffix))...), suffix 1262 } 1263 1264 func TestIteratorRandomizedBlockIntervalFilter(t *testing.T) { 1265 mem := vfs.NewMem() 1266 opts := &Options{ 1267 FS: mem, 1268 FormatMajorVersion: internalFormatNewest, 1269 BlockPropertyCollectors: []func() BlockPropertyCollector{ 1270 func() BlockPropertyCollector { 1271 return sstable.NewBlockIntervalCollector( 1272 "0", &testBlockIntervalCollector{numLength: 2}, nil, /* range key collector */ 1273 ) 1274 }, 1275 }, 1276 } 1277 seed := *seed 1278 if seed == 0 { 1279 seed = uint64(time.Now().UnixNano()) 1280 t.Logf("seed: %d", seed) 1281 } 1282 rng := rand.New(rand.NewSource(seed)) 1283 opts.FlushSplitBytes = 1 << rng.Intn(8) // 1B - 256B 1284 opts.L0CompactionThreshold = 1 << rng.Intn(2) // 1-2 1285 opts.L0CompactionFileThreshold = 1 << rng.Intn(11) // 1-1024 1286 opts.LBaseMaxBytes = 1 << rng.Intn(11) // 1B - 1KB 1287 opts.MemTableSize = 2 << 10 // 2KB 1288 var lopts LevelOptions 1289 lopts.BlockSize = 1 << rng.Intn(8) // 1B - 256B 1290 lopts.IndexBlockSize = 1 << rng.Intn(8) // 1B - 256B 1291 opts.Levels = []LevelOptions{lopts} 1292 1293 d, err := Open("", opts) 1294 require.NoError(t, err) 1295 defer func() { 1296 require.NoError(t, d.Close()) 1297 }() 1298 matchingKeyValues := make(map[string]string) 1299 lower := rng.Intn(100) 1300 upper := rng.Intn(100) 1301 if lower > upper { 1302 lower, upper = upper, lower 1303 } 1304 n := 2000 1305 for i := 0; i < n; i++ { 1306 key, suffix := randKey(20+rng.Intn(5), rng) 1307 value := randValue(50, rng) 1308 if lower <= suffix && suffix < upper { 1309 matchingKeyValues[string(key)] = string(value) 1310 } 1311 d.Set(key, value, nil) 1312 } 1313 1314 var iterOpts IterOptions 1315 iterOpts.PointKeyFilters = []BlockPropertyFilter{ 1316 sstable.NewBlockIntervalFilter("0", 1317 uint64(lower), uint64(upper)), 1318 } 1319 iter, _ := d.NewIter(&iterOpts) 1320 defer func() { 1321 require.NoError(t, iter.Close()) 1322 }() 1323 iter.First() 1324 found := 0 1325 matchingCount := len(matchingKeyValues) 1326 for ; iter.Valid(); iter.Next() { 1327 found++ 1328 key := string(iter.Key()) 1329 value, ok := matchingKeyValues[key] 1330 if ok { 1331 require.Equal(t, value, string(iter.Value())) 1332 delete(matchingKeyValues, key) 1333 } 1334 } 1335 t.Logf("generated %d keys: %d matching, %d found", n, matchingCount, found) 1336 require.Equal(t, 0, len(matchingKeyValues)) 1337 } 1338 1339 func TestIteratorGuaranteedDurable(t *testing.T) { 1340 mem := vfs.NewMem() 1341 opts := &Options{FS: mem} 1342 d, err := Open("", opts) 1343 require.NoError(t, err) 1344 defer func() { 1345 require.NoError(t, d.Close()) 1346 }() 1347 iterOptions := IterOptions{OnlyReadGuaranteedDurable: true} 1348 failFunc := func(t *testing.T, reader Reader) { 1349 defer func() { 1350 if r := recover(); r == nil { 1351 require.Fail(t, "expected panic") 1352 } 1353 reader.Close() 1354 }() 1355 iter, _ := reader.NewIter(&iterOptions) 1356 defer iter.Close() 1357 } 1358 t.Run("snapshot", func(t *testing.T) { 1359 failFunc(t, d.NewSnapshot()) 1360 }) 1361 t.Run("batch", func(t *testing.T) { 1362 failFunc(t, d.NewIndexedBatch()) 1363 }) 1364 t.Run("db", func(t *testing.T) { 1365 d.Set([]byte("k"), []byte("v"), nil) 1366 foundKV := func(o *IterOptions) bool { 1367 iter, _ := d.NewIter(o) 1368 defer iter.Close() 1369 iter.SeekGE([]byte("k")) 1370 return iter.Valid() 1371 } 1372 require.True(t, foundKV(nil)) 1373 require.False(t, foundKV(&iterOptions)) 1374 require.NoError(t, d.Flush()) 1375 require.True(t, foundKV(nil)) 1376 require.True(t, foundKV(&iterOptions)) 1377 }) 1378 } 1379 1380 func TestIteratorBoundsLifetimes(t *testing.T) { 1381 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1382 d := newPointTestkeysDatabase(t, testkeys.Alpha(2)) 1383 defer func() { require.NoError(t, d.Close()) }() 1384 1385 var buf bytes.Buffer 1386 iterators := map[string]*Iterator{} 1387 var labels []string 1388 printIters := func(w io.Writer) { 1389 labels = labels[:0] 1390 for label := range iterators { 1391 labels = append(labels, label) 1392 } 1393 sort.Strings(labels) 1394 for _, label := range labels { 1395 it := iterators[label] 1396 fmt.Fprintf(&buf, "%s: (", label) 1397 if it.opts.LowerBound == nil { 1398 fmt.Fprint(&buf, "<nil>, ") 1399 } else { 1400 fmt.Fprintf(&buf, "%q, ", it.opts.LowerBound) 1401 } 1402 if it.opts.UpperBound == nil { 1403 fmt.Fprint(&buf, "<nil>)") 1404 } else { 1405 fmt.Fprintf(&buf, "%q)", it.opts.UpperBound) 1406 } 1407 fmt.Fprintf(&buf, " boundsBufIdx=%d\n", it.boundsBufIdx) 1408 } 1409 } 1410 parseBounds := func(td *datadriven.TestData) (lower, upper []byte) { 1411 for _, arg := range td.CmdArgs { 1412 if arg.Key == "lower" { 1413 lower = []byte(arg.Vals[0]) 1414 } else if arg.Key == "upper" { 1415 upper = []byte(arg.Vals[0]) 1416 } 1417 } 1418 return lower, upper 1419 } 1420 trashBounds := func(bounds ...[]byte) { 1421 for _, bound := range bounds { 1422 rng.Read(bound[:]) 1423 } 1424 } 1425 1426 datadriven.RunTest(t, "testdata/iterator_bounds_lifetimes", func(t *testing.T, td *datadriven.TestData) string { 1427 switch td.Cmd { 1428 case "define": 1429 var err error 1430 if d, err = runDBDefineCmd(td, d.opts); err != nil { 1431 return err.Error() 1432 } 1433 d.mu.Lock() 1434 s := d.mu.versions.currentVersion().String() 1435 d.mu.Unlock() 1436 return s 1437 case "new-iter": 1438 var label string 1439 td.ScanArgs(t, "label", &label) 1440 lower, upper := parseBounds(td) 1441 iterators[label], _ = d.NewIter(&IterOptions{ 1442 LowerBound: lower, 1443 UpperBound: upper, 1444 }) 1445 trashBounds(lower, upper) 1446 buf.Reset() 1447 printIters(&buf) 1448 return buf.String() 1449 case "clone": 1450 var from, to string 1451 td.ScanArgs(t, "from", &from) 1452 td.ScanArgs(t, "to", &to) 1453 var err error 1454 iterators[to], err = iterators[from].Clone(CloneOptions{}) 1455 if err != nil { 1456 return err.Error() 1457 } 1458 buf.Reset() 1459 printIters(&buf) 1460 return buf.String() 1461 case "close": 1462 var label string 1463 td.ScanArgs(t, "label", &label) 1464 iterators[label].Close() 1465 delete(iterators, label) 1466 buf.Reset() 1467 printIters(&buf) 1468 return buf.String() 1469 case "iter": 1470 var label string 1471 td.ScanArgs(t, "label", &label) 1472 return runIterCmd(td, iterators[label], false /* closeIter */) 1473 case "set-bounds": 1474 var label string 1475 td.ScanArgs(t, "label", &label) 1476 lower, upper := parseBounds(td) 1477 iterators[label].SetBounds(lower, upper) 1478 trashBounds(lower, upper) 1479 buf.Reset() 1480 printIters(&buf) 1481 return buf.String() 1482 case "set-options": 1483 var label string 1484 var tableFilter bool 1485 td.ScanArgs(t, "label", &label) 1486 opts := iterators[label].opts 1487 for _, arg := range td.CmdArgs { 1488 if arg.Key == "table-filter" { 1489 tableFilter = true 1490 } 1491 if arg.Key == "key-types" { 1492 switch arg.Vals[0] { 1493 case "points-only": 1494 opts.KeyTypes = IterKeyTypePointsOnly 1495 case "ranges-only": 1496 opts.KeyTypes = IterKeyTypeRangesOnly 1497 case "both": 1498 opts.KeyTypes = IterKeyTypePointsAndRanges 1499 default: 1500 panic(fmt.Sprintf("unrecognized key type %q", arg.Vals[0])) 1501 } 1502 } 1503 } 1504 opts.LowerBound, opts.UpperBound = parseBounds(td) 1505 if tableFilter { 1506 opts.TableFilter = func(userProps map[string]string) bool { return false } 1507 } 1508 iterators[label].SetOptions(&opts) 1509 trashBounds(opts.LowerBound, opts.UpperBound) 1510 buf.Reset() 1511 printIters(&buf) 1512 return buf.String() 1513 default: 1514 return fmt.Sprintf("unrecognized command %q", td.Cmd) 1515 } 1516 }) 1517 } 1518 1519 func TestIteratorStatsMerge(t *testing.T) { 1520 s := IteratorStats{ 1521 ForwardSeekCount: [NumStatsKind]int{1, 2}, 1522 ReverseSeekCount: [NumStatsKind]int{3, 4}, 1523 ForwardStepCount: [NumStatsKind]int{5, 6}, 1524 ReverseStepCount: [NumStatsKind]int{7, 8}, 1525 InternalStats: InternalIteratorStats{ 1526 BlockBytes: 9, 1527 BlockBytesInCache: 10, 1528 BlockReadDuration: 3 * time.Millisecond, 1529 KeyBytes: 11, 1530 ValueBytes: 12, 1531 PointCount: 13, 1532 PointsCoveredByRangeTombstones: 14, 1533 }, 1534 RangeKeyStats: RangeKeyIteratorStats{ 1535 Count: 15, 1536 ContainedPoints: 16, 1537 SkippedPoints: 17, 1538 }, 1539 } 1540 s.InternalStats.SeparatedPointValue.Count = 1 1541 s.InternalStats.SeparatedPointValue.ValueBytes = 5 1542 s.InternalStats.SeparatedPointValue.ValueBytesFetched = 3 1543 s2 := IteratorStats{ 1544 ForwardSeekCount: [NumStatsKind]int{1, 2}, 1545 ReverseSeekCount: [NumStatsKind]int{3, 4}, 1546 ForwardStepCount: [NumStatsKind]int{5, 6}, 1547 ReverseStepCount: [NumStatsKind]int{7, 8}, 1548 InternalStats: InternalIteratorStats{ 1549 BlockBytes: 9, 1550 BlockBytesInCache: 10, 1551 BlockReadDuration: 4 * time.Millisecond, 1552 KeyBytes: 11, 1553 ValueBytes: 12, 1554 PointCount: 13, 1555 PointsCoveredByRangeTombstones: 14, 1556 }, 1557 RangeKeyStats: RangeKeyIteratorStats{ 1558 Count: 15, 1559 ContainedPoints: 16, 1560 SkippedPoints: 17, 1561 }, 1562 } 1563 s2.InternalStats.SeparatedPointValue.Count = 2 1564 s2.InternalStats.SeparatedPointValue.ValueBytes = 10 1565 s2.InternalStats.SeparatedPointValue.ValueBytesFetched = 6 1566 s.Merge(s2) 1567 expected := IteratorStats{ 1568 ForwardSeekCount: [NumStatsKind]int{2, 4}, 1569 ReverseSeekCount: [NumStatsKind]int{6, 8}, 1570 ForwardStepCount: [NumStatsKind]int{10, 12}, 1571 ReverseStepCount: [NumStatsKind]int{14, 16}, 1572 InternalStats: InternalIteratorStats{ 1573 BlockBytes: 18, 1574 BlockBytesInCache: 20, 1575 BlockReadDuration: 7 * time.Millisecond, 1576 KeyBytes: 22, 1577 ValueBytes: 24, 1578 PointCount: 26, 1579 PointsCoveredByRangeTombstones: 28, 1580 }, 1581 RangeKeyStats: RangeKeyIteratorStats{ 1582 Count: 30, 1583 ContainedPoints: 32, 1584 SkippedPoints: 34, 1585 }, 1586 } 1587 expected.InternalStats.SeparatedPointValue.Count = 3 1588 expected.InternalStats.SeparatedPointValue.ValueBytes = 15 1589 expected.InternalStats.SeparatedPointValue.ValueBytesFetched = 9 1590 require.Equal(t, expected, s) 1591 } 1592 1593 // TestSetOptionsEquivalence tests equivalence between SetOptions to mutate an 1594 // iterator and constructing a new iterator with NewIter. The long-lived 1595 // iterator and the new iterator should surface identical iterator states. 1596 func TestSetOptionsEquivalence(t *testing.T) { 1597 seed := uint64(time.Now().UnixNano()) 1598 // Call a helper function with the seed so that the seed appears within 1599 // stack traces if there's a panic. 1600 testSetOptionsEquivalence(t, seed) 1601 } 1602 1603 func testSetOptionsEquivalence(t *testing.T, seed uint64) { 1604 rng := rand.New(rand.NewSource(seed)) 1605 ks := testkeys.Alpha(2) 1606 d := newTestkeysDatabase(t, ks, rng) 1607 defer func() { require.NoError(t, d.Close()) }() 1608 1609 var o IterOptions 1610 generateNewOptions := func() { 1611 // TODO(jackson): Include test coverage for block property filters, etc. 1612 if rng.Intn(2) == 1 { 1613 o.KeyTypes = IterKeyType(rng.Intn(3)) 1614 } 1615 if rng.Intn(2) == 1 { 1616 if rng.Intn(2) == 1 { 1617 o.LowerBound = nil 1618 if rng.Intn(2) == 1 { 1619 o.LowerBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count())) 1620 } 1621 } 1622 if rng.Intn(2) == 1 { 1623 o.UpperBound = nil 1624 if rng.Intn(2) == 1 { 1625 o.UpperBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count())) 1626 } 1627 } 1628 if testkeys.Comparer.Compare(o.LowerBound, o.UpperBound) > 0 { 1629 o.LowerBound, o.UpperBound = o.UpperBound, o.LowerBound 1630 } 1631 } 1632 o.RangeKeyMasking.Suffix = nil 1633 if o.KeyTypes == IterKeyTypePointsAndRanges && rng.Intn(2) == 1 { 1634 o.RangeKeyMasking.Suffix = testkeys.Suffix(rng.Int63n(ks.Count())) 1635 } 1636 } 1637 1638 var longLivedIter, newIter *Iterator 1639 var history, longLivedBuf, newIterBuf bytes.Buffer 1640 defer func() { 1641 if r := recover(); r != nil { 1642 t.Log(history.String()) 1643 panic(r) 1644 } 1645 }() 1646 defer func() { 1647 if longLivedIter != nil { 1648 longLivedIter.Close() 1649 } 1650 if newIter != nil { 1651 newIter.Close() 1652 } 1653 }() 1654 1655 type positioningOp struct { 1656 desc string 1657 run func(*Iterator) IterValidityState 1658 } 1659 positioningOps := []func() positioningOp{ 1660 // SeekGE 1661 func() positioningOp { 1662 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1663 return positioningOp{ 1664 desc: fmt.Sprintf("SeekGE(%q)", k), 1665 run: func(it *Iterator) IterValidityState { 1666 return it.SeekGEWithLimit(k, nil) 1667 }, 1668 } 1669 }, 1670 // SeekLT 1671 func() positioningOp { 1672 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1673 return positioningOp{ 1674 desc: fmt.Sprintf("SeekLT(%q)", k), 1675 run: func(it *Iterator) IterValidityState { 1676 return it.SeekLTWithLimit(k, nil) 1677 }, 1678 } 1679 }, 1680 // SeekPrefixGE 1681 func() positioningOp { 1682 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1683 return positioningOp{ 1684 desc: fmt.Sprintf("SeekPrefixGE(%q)", k), 1685 run: func(it *Iterator) IterValidityState { 1686 if it.SeekPrefixGE(k) { 1687 return IterValid 1688 } 1689 return IterExhausted 1690 }, 1691 } 1692 }, 1693 } 1694 1695 for i := 0; i < 10_000; i++ { 1696 // Generate new random options. The options in o will be mutated. 1697 generateNewOptions() 1698 fmt.Fprintf(&history, "new options: %s\n", iterOptionsString(&o)) 1699 1700 newIter, _ = d.NewIter(&o) 1701 if longLivedIter == nil { 1702 longLivedIter, _ = d.NewIter(&o) 1703 } else { 1704 longLivedIter.SetOptions(&o) 1705 } 1706 1707 // Apply the same operation to both keys. 1708 iterOp := positioningOps[rng.Intn(len(positioningOps))]() 1709 newIterValidity := iterOp.run(newIter) 1710 longLivedValidity := iterOp.run(longLivedIter) 1711 1712 newIterBuf.Reset() 1713 longLivedBuf.Reset() 1714 printIterState(&newIterBuf, newIter, newIterValidity, true /* printValidityState */) 1715 printIterState(&longLivedBuf, longLivedIter, longLivedValidity, true /* printValidityState */) 1716 fmt.Fprintf(&history, "%s = %s\n", iterOp.desc, newIterBuf.String()) 1717 1718 if newIterBuf.String() != longLivedBuf.String() { 1719 t.Logf("history:\n%s\n", history.String()) 1720 t.Logf("seed: %d\n", seed) 1721 t.Fatalf("expected %q, got %q", newIterBuf.String(), longLivedBuf.String()) 1722 } 1723 _ = newIter.Close() 1724 1725 newIter = nil 1726 } 1727 t.Logf("history:\n%s\n", history.String()) 1728 } 1729 1730 func iterOptionsString(o *IterOptions) string { 1731 var buf bytes.Buffer 1732 fmt.Fprintf(&buf, "key-types=%s, lower=%q, upper=%q", 1733 o.KeyTypes, o.LowerBound, o.UpperBound) 1734 if o.TableFilter != nil { 1735 fmt.Fprintf(&buf, ", table-filter") 1736 } 1737 if o.OnlyReadGuaranteedDurable { 1738 fmt.Fprintf(&buf, ", only-durable") 1739 } 1740 if o.UseL6Filters { 1741 fmt.Fprintf(&buf, ", use-L6-filters") 1742 } 1743 for i, pkf := range o.PointKeyFilters { 1744 fmt.Fprintf(&buf, ", point-key-filter[%d]=%q", i, pkf.Name()) 1745 } 1746 for i, rkf := range o.RangeKeyFilters { 1747 fmt.Fprintf(&buf, ", range-key-filter[%d]=%q", i, rkf.Name()) 1748 } 1749 return buf.String() 1750 } 1751 1752 func newTestkeysDatabase(t *testing.T, ks testkeys.Keyspace, rng *rand.Rand) *DB { 1753 dbOpts := &Options{ 1754 Comparer: testkeys.Comparer, 1755 FS: vfs.NewMem(), 1756 FormatMajorVersion: FormatRangeKeys, 1757 Logger: panicLogger{}, 1758 } 1759 d, err := Open("", dbOpts) 1760 require.NoError(t, err) 1761 1762 // Randomize the order in which we write keys. 1763 order := rng.Perm(int(ks.Count())) 1764 b := d.NewBatch() 1765 keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1766 keyBuf2 := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1767 for i := 0; i < len(order); i++ { 1768 const maxVersionsPerKey = 10 1769 keyIndex := order[i] 1770 for versions := rng.Intn(maxVersionsPerKey); versions > 0; versions-- { 1771 n := testkeys.WriteKeyAt(keyBuf, ks, int64(keyIndex), rng.Int63n(maxVersionsPerKey)) 1772 b.Set(keyBuf[:n], keyBuf[:n], nil) 1773 } 1774 1775 // Sometimes add a range key too. 1776 if rng.Intn(100) == 1 { 1777 startIdx := rng.Int63n(ks.Count()) 1778 endIdx := rng.Int63n(ks.Count()) 1779 startLen := testkeys.WriteKey(keyBuf, ks, startIdx) 1780 endLen := testkeys.WriteKey(keyBuf2, ks, endIdx) 1781 suffixInt := rng.Int63n(maxVersionsPerKey) 1782 require.NoError(t, b.RangeKeySet( 1783 keyBuf[:startLen], 1784 keyBuf2[:endLen], 1785 testkeys.Suffix(suffixInt), 1786 nil, 1787 nil)) 1788 } 1789 1790 // Randomize the flush points. 1791 if !b.Empty() && rng.Intn(10) == 1 { 1792 require.NoError(t, b.Commit(nil)) 1793 require.NoError(t, d.Flush()) 1794 b = d.NewBatch() 1795 } 1796 } 1797 if !b.Empty() { 1798 require.NoError(t, b.Commit(nil)) 1799 } 1800 return d 1801 } 1802 1803 func newPointTestkeysDatabase(t *testing.T, ks testkeys.Keyspace) *DB { 1804 dbOpts := &Options{ 1805 Comparer: testkeys.Comparer, 1806 FS: vfs.NewMem(), 1807 FormatMajorVersion: FormatRangeKeys, 1808 } 1809 d, err := Open("", dbOpts) 1810 require.NoError(t, err) 1811 1812 b := d.NewBatch() 1813 keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1814 for i := int64(0); i < ks.Count(); i++ { 1815 n := testkeys.WriteKeyAt(keyBuf, ks, i, i) 1816 b.Set(keyBuf[:n], keyBuf[:n], nil) 1817 } 1818 require.NoError(t, b.Commit(nil)) 1819 return d 1820 } 1821 1822 func BenchmarkIteratorSeekGE(b *testing.B) { 1823 m, keys := buildMemTable(b) 1824 iter := &Iterator{ 1825 comparer: *DefaultComparer, 1826 iter: m.newIter(nil), 1827 } 1828 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1829 1830 b.ResetTimer() 1831 for i := 0; i < b.N; i++ { 1832 key := keys[rng.Intn(len(keys))] 1833 iter.SeekGE(key) 1834 } 1835 } 1836 1837 func BenchmarkIteratorNext(b *testing.B) { 1838 m, _ := buildMemTable(b) 1839 iter := &Iterator{ 1840 comparer: *DefaultComparer, 1841 iter: m.newIter(nil), 1842 } 1843 1844 b.ResetTimer() 1845 for i := 0; i < b.N; i++ { 1846 if !iter.Valid() { 1847 iter.First() 1848 } 1849 iter.Next() 1850 } 1851 } 1852 1853 func BenchmarkIteratorPrev(b *testing.B) { 1854 m, _ := buildMemTable(b) 1855 iter := &Iterator{ 1856 comparer: *DefaultComparer, 1857 iter: m.newIter(nil), 1858 } 1859 1860 b.ResetTimer() 1861 for i := 0; i < b.N; i++ { 1862 if !iter.Valid() { 1863 iter.Last() 1864 } 1865 iter.Prev() 1866 } 1867 } 1868 1869 type twoLevelBloomTombstoneState struct { 1870 keys [][]byte 1871 readers [8][][]*sstable.Reader 1872 levelSlices [8][]manifest.LevelSlice 1873 indexFunc func(twoLevelIndex bool, bloom bool, withTombstone bool) int 1874 } 1875 1876 func setupForTwoLevelBloomTombstone(b *testing.B, keyOffset int) twoLevelBloomTombstoneState { 1877 const blockSize = 32 << 10 1878 const restartInterval = 16 1879 const levelCount = 5 1880 1881 var readers [8][][]*sstable.Reader 1882 var levelSlices [8][]manifest.LevelSlice 1883 var keys [][]byte 1884 indexFunc := func(twoLevelIndex bool, bloom bool, withTombstone bool) int { 1885 index := 0 1886 if twoLevelIndex { 1887 index = 4 1888 } 1889 if bloom { 1890 index += 2 1891 } 1892 if withTombstone { 1893 index++ 1894 } 1895 return index 1896 } 1897 for _, twoLevelIndex := range []bool{false, true} { 1898 for _, bloom := range []bool{false, true} { 1899 for _, withTombstone := range []bool{false, true} { 1900 index := indexFunc(twoLevelIndex, bloom, withTombstone) 1901 levels := levelCount 1902 if withTombstone { 1903 levels = 1 1904 } 1905 readers[index], levelSlices[index], keys = buildLevelsForMergingIterSeqSeek( 1906 b, blockSize, restartInterval, levels, keyOffset, withTombstone, bloom, twoLevelIndex) 1907 } 1908 } 1909 } 1910 return twoLevelBloomTombstoneState{ 1911 keys: keys, readers: readers, levelSlices: levelSlices, indexFunc: indexFunc} 1912 } 1913 1914 // BenchmarkIteratorSeqSeekPrefixGENotFound exercises the case of SeekPrefixGE 1915 // specifying monotonic keys all of which precede actual keys present in L6 of 1916 // the DB. Moreover, with-tombstone=true exercises the sub-case where those 1917 // actual keys are deleted using a range tombstone that has not physically 1918 // deleted those keys due to the presence of a snapshot that needs to see 1919 // those keys. This sub-case needs to be efficient in (a) avoiding iteration 1920 // over all those deleted keys, including repeated iteration, (b) using the 1921 // next optimization, since the seeks are monotonic. 1922 func BenchmarkIteratorSeqSeekPrefixGENotFound(b *testing.B) { 1923 const keyOffset = 100000 1924 state := setupForTwoLevelBloomTombstone(b, keyOffset) 1925 readers := state.readers 1926 levelSlices := state.levelSlices 1927 indexFunc := state.indexFunc 1928 1929 // We will not be seeking to the keys that were written but instead to 1930 // keys before the written keys. This is to validate that the optimization 1931 // to use Next still functions when mergingIter checks for the prefix 1932 // match, and that mergingIter can avoid iterating over all the keys 1933 // deleted by a range tombstone when there is no possibility of matching 1934 // the prefix. 1935 var keys [][]byte 1936 for i := 0; i < keyOffset; i++ { 1937 keys = append(keys, []byte(fmt.Sprintf("%08d", i))) 1938 } 1939 for _, skip := range []int{1, 2, 4} { 1940 for _, twoLevelIndex := range []bool{false, true} { 1941 for _, bloom := range []bool{false, true} { 1942 for _, withTombstone := range []bool{false, true} { 1943 b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t", 1944 skip, twoLevelIndex, bloom, withTombstone), 1945 func(b *testing.B) { 1946 index := indexFunc(twoLevelIndex, bloom, withTombstone) 1947 readers := readers[index] 1948 levelSlices := levelSlices[index] 1949 m := buildMergingIter(readers, levelSlices) 1950 iter := Iterator{ 1951 comparer: *testkeys.Comparer, 1952 merge: DefaultMerger.Merge, 1953 iter: m, 1954 } 1955 pos := 0 1956 b.ResetTimer() 1957 for i := 0; i < b.N; i++ { 1958 // When withTombstone=true, and prior to the 1959 // optimization to stop early due to a range 1960 // tombstone, the iteration would continue into the 1961 // next file, and not be able to use Next at the lower 1962 // level in the next SeekPrefixGE call. So we would 1963 // incur the cost of iterating over all the deleted 1964 // keys for every seek. Note that it is not possible 1965 // to do a noop optimization in Iterator for the 1966 // prefix case, unlike SeekGE/SeekLT, since we don't 1967 // know if the iterators inside mergingIter are all 1968 // appropriately positioned -- some may not be due to 1969 // bloom filters not matching. 1970 valid := iter.SeekPrefixGE(keys[pos]) 1971 if valid { 1972 b.Fatalf("key should not be found") 1973 } 1974 pos += skip 1975 if pos >= keyOffset { 1976 pos = 0 1977 } 1978 } 1979 b.StopTimer() 1980 iter.Close() 1981 }) 1982 } 1983 } 1984 } 1985 } 1986 for _, r := range readers { 1987 for i := range r { 1988 for j := range r[i] { 1989 r[i][j].Close() 1990 } 1991 } 1992 } 1993 } 1994 1995 // BenchmarkIteratorSeqSeekPrefixGEFound exercises the case of SeekPrefixGE 1996 // specifying monotonic keys that are present in L6 of the DB. Moreover, 1997 // with-tombstone=true exercises the sub-case where those actual keys are 1998 // deleted using a range tombstone that has not physically deleted those keys 1999 // due to the presence of a snapshot that needs to see those keys. This 2000 // sub-case needs to be efficient in (a) avoiding iteration over all those 2001 // deleted keys, including repeated iteration, (b) using the next 2002 // optimization, since the seeks are monotonic. 2003 func BenchmarkIteratorSeqSeekPrefixGEFound(b *testing.B) { 2004 state := setupForTwoLevelBloomTombstone(b, 0) 2005 keys := state.keys 2006 readers := state.readers 2007 levelSlices := state.levelSlices 2008 indexFunc := state.indexFunc 2009 2010 for _, skip := range []int{1, 2, 4} { 2011 for _, twoLevelIndex := range []bool{false, true} { 2012 for _, bloom := range []bool{false, true} { 2013 for _, withTombstone := range []bool{false, true} { 2014 b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t", 2015 skip, twoLevelIndex, bloom, withTombstone), 2016 func(b *testing.B) { 2017 index := indexFunc(twoLevelIndex, bloom, withTombstone) 2018 readers := readers[index] 2019 levelSlices := levelSlices[index] 2020 m := buildMergingIter(readers, levelSlices) 2021 iter := Iterator{ 2022 comparer: *testkeys.Comparer, 2023 merge: DefaultMerger.Merge, 2024 iter: m, 2025 } 2026 pos := 0 2027 b.ResetTimer() 2028 for i := 0; i < b.N; i++ { 2029 // When withTombstone=true, and prior to the 2030 // optimization to stop early due to a range 2031 // tombstone, the iteration would continue into the 2032 // next file, and not be able to use Next at the lower 2033 // level in the next SeekPrefixGE call. So we would 2034 // incur the cost of iterating over all the deleted 2035 // keys for every seek. Note that it is not possible 2036 // to do a noop optimization in Iterator for the 2037 // prefix case, unlike SeekGE/SeekLT, since we don't 2038 // know if the iterators inside mergingIter are all 2039 // appropriately positioned -- some may not be due to 2040 // bloom filters not matching. 2041 _ = iter.SeekPrefixGE(keys[pos]) 2042 pos += skip 2043 if pos >= len(keys) { 2044 pos = 0 2045 } 2046 } 2047 b.StopTimer() 2048 iter.Close() 2049 }) 2050 } 2051 } 2052 } 2053 } 2054 for _, r := range readers { 2055 for i := range r { 2056 for j := range r[i] { 2057 r[i][j].Close() 2058 } 2059 } 2060 } 2061 } 2062 2063 // BenchmarkIteratorSeqSeekGEWithBounds is analogous to 2064 // BenchmarkMergingIterSeqSeekGEWithBounds, except for using an Iterator, 2065 // which causes it to exercise the end-to-end code path. 2066 func BenchmarkIteratorSeqSeekGEWithBounds(b *testing.B) { 2067 const blockSize = 32 << 10 2068 const restartInterval = 16 2069 const levelCount = 5 2070 for _, twoLevelIndex := range []bool{false, true} { 2071 b.Run(fmt.Sprintf("two-level=%t", twoLevelIndex), 2072 func(b *testing.B) { 2073 readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek( 2074 b, blockSize, restartInterval, levelCount, 0, /* keyOffset */ 2075 false, false, twoLevelIndex) 2076 m := buildMergingIter(readers, levelSlices) 2077 iter := Iterator{ 2078 comparer: *testkeys.Comparer, 2079 merge: DefaultMerger.Merge, 2080 iter: m, 2081 } 2082 keyCount := len(keys) 2083 b.ResetTimer() 2084 for i := 0; i < b.N; i++ { 2085 pos := i % (keyCount - 1) 2086 iter.SetBounds(keys[pos], keys[pos+1]) 2087 // SeekGE will return keys[pos]. 2088 valid := iter.SeekGE(keys[pos]) 2089 for valid { 2090 valid = iter.Next() 2091 } 2092 if iter.Error() != nil { 2093 b.Fatalf(iter.Error().Error()) 2094 } 2095 } 2096 iter.Close() 2097 for i := range readers { 2098 for j := range readers[i] { 2099 readers[i][j].Close() 2100 } 2101 } 2102 }) 2103 } 2104 } 2105 2106 func BenchmarkIteratorSeekGENoop(b *testing.B) { 2107 const blockSize = 32 << 10 2108 const restartInterval = 16 2109 const levelCount = 5 2110 const keyOffset = 10000 2111 readers, levelSlices, _ := buildLevelsForMergingIterSeqSeek( 2112 b, blockSize, restartInterval, levelCount, keyOffset, false, false, false) 2113 var keys [][]byte 2114 for i := 0; i < keyOffset; i++ { 2115 keys = append(keys, []byte(fmt.Sprintf("%08d", i))) 2116 } 2117 for _, withLimit := range []bool{false, true} { 2118 b.Run(fmt.Sprintf("withLimit=%t", withLimit), func(b *testing.B) { 2119 m := buildMergingIter(readers, levelSlices) 2120 iter := Iterator{ 2121 comparer: *testkeys.Comparer, 2122 merge: DefaultMerger.Merge, 2123 iter: m, 2124 } 2125 b.ResetTimer() 2126 for i := 0; i < b.N; i++ { 2127 pos := i % (len(keys) - 1) 2128 if withLimit { 2129 if iter.SeekGEWithLimit(keys[pos], keys[pos+1]) != IterAtLimit { 2130 b.Fatal("should be at limit") 2131 } 2132 } else { 2133 if !iter.SeekGE(keys[pos]) { 2134 b.Fatal("should be valid") 2135 } 2136 } 2137 } 2138 iter.Close() 2139 }) 2140 } 2141 for i := range readers { 2142 for j := range readers[i] { 2143 readers[i][j].Close() 2144 } 2145 } 2146 } 2147 2148 func BenchmarkBlockPropertyFilter(b *testing.B) { 2149 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 2150 for _, matchInterval := range []int{1, 10, 100, 1000} { 2151 b.Run(fmt.Sprintf("match-interval=%d", matchInterval), func(b *testing.B) { 2152 mem := vfs.NewMem() 2153 opts := &Options{ 2154 FS: mem, 2155 FormatMajorVersion: FormatNewest, 2156 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2157 func() BlockPropertyCollector { 2158 return sstable.NewBlockIntervalCollector( 2159 "0", &testBlockIntervalCollector{numLength: 3}, nil, /* range key collector */ 2160 ) 2161 }, 2162 }, 2163 } 2164 d, err := Open("", opts) 2165 require.NoError(b, err) 2166 defer func() { 2167 require.NoError(b, d.Close()) 2168 }() 2169 batch := d.NewBatch() 2170 const numKeys = 20 * 1000 2171 const valueSize = 1000 2172 for i := 0; i < numKeys; i++ { 2173 key := fmt.Sprintf("%06d%03d", i, i%matchInterval) 2174 value := randValue(valueSize, rng) 2175 require.NoError(b, batch.Set([]byte(key), value, nil)) 2176 } 2177 require.NoError(b, batch.Commit(nil)) 2178 require.NoError(b, d.Flush()) 2179 require.NoError(b, d.Compact(nil, []byte{0xFF}, false)) 2180 2181 for _, filter := range []bool{false, true} { 2182 b.Run(fmt.Sprintf("filter=%t", filter), func(b *testing.B) { 2183 var iterOpts IterOptions 2184 if filter { 2185 iterOpts.PointKeyFilters = []BlockPropertyFilter{ 2186 sstable.NewBlockIntervalFilter("0", 2187 uint64(0), uint64(1)), 2188 } 2189 } 2190 iter, _ := d.NewIter(&iterOpts) 2191 b.ResetTimer() 2192 for i := 0; i < b.N; i++ { 2193 valid := iter.First() 2194 for valid { 2195 valid = iter.Next() 2196 } 2197 } 2198 b.StopTimer() 2199 require.NoError(b, iter.Close()) 2200 }) 2201 } 2202 }) 2203 } 2204 } 2205 2206 func TestRangeKeyMaskingRandomized(t *testing.T) { 2207 seed := *seed 2208 if seed == 0 { 2209 seed = uint64(time.Now().UnixNano()) 2210 t.Logf("seed: %d", seed) 2211 } 2212 rng := rand.New(rand.NewSource(seed)) 2213 2214 // Generate keyspace with point keys, and range keys which will 2215 // mask the point keys. 2216 var timestamps []int64 2217 for i := 0; i <= 100; i++ { 2218 timestamps = append(timestamps, rng.Int63n(1000)) 2219 } 2220 2221 ks := testkeys.Alpha(5) 2222 numKeys := 1000 + rng.Intn(9000) 2223 keys := make([][]byte, numKeys) 2224 keyTimeStamps := make([]int64, numKeys) // ts associated with the keys. 2225 for i := 0; i < numKeys; i++ { 2226 keys[i] = make([]byte, 5+testkeys.MaxSuffixLen) 2227 keyTimeStamps[i] = timestamps[rng.Intn(len(timestamps))] 2228 n := testkeys.WriteKeyAt(keys[i], ks, rng.Int63n(ks.Count()), keyTimeStamps[i]) 2229 keys[i] = keys[i][:n] 2230 } 2231 2232 numRangeKeys := rng.Intn(20) 2233 type rkey struct { 2234 start []byte 2235 end []byte 2236 suffix []byte 2237 } 2238 rkeys := make([]rkey, numRangeKeys) 2239 pointKeyHidden := make([]bool, numKeys) 2240 for i := 0; i < numRangeKeys; i++ { 2241 rkeys[i].start = make([]byte, 5) 2242 rkeys[i].end = make([]byte, 5) 2243 2244 testkeys.WriteKey(rkeys[i].start[:5], ks, rng.Int63n(ks.Count())) 2245 testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count())) 2246 2247 for bytes.Equal(rkeys[i].start[:5], rkeys[i].end[:5]) { 2248 testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count())) 2249 } 2250 2251 if bytes.Compare(rkeys[i].start[:5], rkeys[i].end[:5]) > 0 { 2252 rkeys[i].start, rkeys[i].end = rkeys[i].end, rkeys[i].start 2253 } 2254 2255 rkeyTimestamp := timestamps[rng.Intn(len(timestamps))] 2256 rkeys[i].suffix = []byte("@" + strconv.FormatInt(rkeyTimestamp, 10)) 2257 2258 // Each time we create a range key, check if the range key masks any 2259 // point keys. 2260 for j, pkey := range keys { 2261 if pointKeyHidden[j] { 2262 continue 2263 } 2264 2265 if keyTimeStamps[j] >= rkeyTimestamp { 2266 continue 2267 } 2268 2269 if testkeys.Comparer.Compare(pkey, rkeys[i].start) >= 0 && 2270 testkeys.Comparer.Compare(pkey, rkeys[i].end) < 0 { 2271 pointKeyHidden[j] = true 2272 } 2273 } 2274 } 2275 2276 // Define a simple base testOpts, and a randomized testOpts. The results 2277 // of iteration will be compared. 2278 type testOpts struct { 2279 levelOpts []LevelOptions 2280 filter func() BlockPropertyFilterMask 2281 } 2282 2283 baseOpts := testOpts{ 2284 levelOpts: make([]LevelOptions, 7), 2285 } 2286 for i := 0; i < len(baseOpts.levelOpts); i++ { 2287 baseOpts.levelOpts[i].TargetFileSize = 1 2288 baseOpts.levelOpts[i].BlockSize = 1 2289 } 2290 2291 randomOpts := testOpts{ 2292 levelOpts: []LevelOptions{ 2293 { 2294 TargetFileSize: int64(1 + rng.Intn(2<<20)), // Vary the L0 file size. 2295 BlockSize: 1 + rng.Intn(32<<10), 2296 }, 2297 }, 2298 } 2299 if rng.Intn(2) == 0 { 2300 randomOpts.filter = func() BlockPropertyFilterMask { 2301 return sstable.NewTestKeysMaskingFilter() 2302 } 2303 } 2304 2305 maxProcs := runtime.GOMAXPROCS(0) 2306 2307 opts1 := &Options{ 2308 FS: vfs.NewStrictMem(), 2309 Comparer: testkeys.Comparer, 2310 FormatMajorVersion: FormatNewest, 2311 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2312 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2313 sstable.NewTestKeysBlockPropertyCollector, 2314 }, 2315 } 2316 opts1.Levels = baseOpts.levelOpts 2317 d1, err := Open("", opts1) 2318 require.NoError(t, err) 2319 2320 opts2 := &Options{ 2321 FS: vfs.NewStrictMem(), 2322 Comparer: testkeys.Comparer, 2323 FormatMajorVersion: FormatNewest, 2324 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2325 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2326 sstable.NewTestKeysBlockPropertyCollector, 2327 }, 2328 } 2329 opts2.Levels = randomOpts.levelOpts 2330 d2, err := Open("", opts2) 2331 require.NoError(t, err) 2332 2333 defer func() { 2334 if err := d1.Close(); err != nil { 2335 t.Fatal(err) 2336 } 2337 if err := d2.Close(); err != nil { 2338 t.Fatal(err) 2339 } 2340 }() 2341 2342 // Run test 2343 var batch1 *Batch 2344 var batch2 *Batch 2345 const keysPerBatch = 50 2346 for i := 0; i < numKeys; i++ { 2347 if i%keysPerBatch == 0 { 2348 if batch1 != nil { 2349 require.NoError(t, batch1.Commit(nil)) 2350 require.NoError(t, batch2.Commit(nil)) 2351 } 2352 batch1 = d1.NewBatch() 2353 batch2 = d2.NewBatch() 2354 } 2355 require.NoError(t, batch1.Set(keys[i], []byte{1}, nil)) 2356 require.NoError(t, batch2.Set(keys[i], []byte{1}, nil)) 2357 } 2358 2359 for _, rkey := range rkeys { 2360 require.NoError(t, d1.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil)) 2361 require.NoError(t, d2.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil)) 2362 } 2363 2364 // Scan the keyspace 2365 iter1Opts := IterOptions{ 2366 KeyTypes: IterKeyTypePointsAndRanges, 2367 RangeKeyMasking: RangeKeyMasking{ 2368 Suffix: []byte("@1000"), 2369 Filter: baseOpts.filter, 2370 }, 2371 } 2372 2373 iter2Opts := IterOptions{ 2374 KeyTypes: IterKeyTypePointsAndRanges, 2375 RangeKeyMasking: RangeKeyMasking{ 2376 Suffix: []byte("@1000"), 2377 Filter: randomOpts.filter, 2378 }, 2379 } 2380 2381 iter1, _ := d1.NewIter(&iter1Opts) 2382 iter2, _ := d2.NewIter(&iter2Opts) 2383 defer func() { 2384 if err := iter1.Close(); err != nil { 2385 t.Fatal(err) 2386 } 2387 if err := iter2.Close(); err != nil { 2388 t.Fatal(err) 2389 } 2390 }() 2391 2392 for valid1, valid2 := iter1.First(), iter2.First(); valid1 || valid2; valid1, valid2 = iter1.Next(), iter2.Next() { 2393 if valid1 != valid2 { 2394 t.Fatalf("iteration didn't produce identical results") 2395 } 2396 2397 // Confirm exposed range key state is identical. 2398 hasP1, hasR1 := iter1.HasPointAndRange() 2399 hasP2, hasR2 := iter2.HasPointAndRange() 2400 if hasP1 != hasP2 || hasR1 != hasR2 { 2401 t.Fatalf("iteration didn't produce identical results") 2402 } 2403 if hasP1 && !bytes.Equal(iter1.Key(), iter2.Key()) { 2404 t.Fatalf(fmt.Sprintf("iteration didn't produce identical point keys: %s, %s", iter1.Key(), iter2.Key())) 2405 } 2406 if hasR1 { 2407 // Confirm that the range key is the same. 2408 b1, e1 := iter1.RangeBounds() 2409 b2, e2 := iter2.RangeBounds() 2410 if !bytes.Equal(b1, b2) || !bytes.Equal(e1, e2) { 2411 t.Fatalf(fmt.Sprintf( 2412 "iteration didn't produce identical range keys: [%s, %s], [%s, %s]", 2413 b1, e1, b2, e2, 2414 )) 2415 } 2416 2417 } 2418 2419 // Confirm that the returned point key wasn't hidden. 2420 for j, pkey := range keys { 2421 if bytes.Equal(iter1.Key(), pkey) && pointKeyHidden[j] { 2422 t.Fatalf(fmt.Sprintf("hidden point key was exposed %s %d", pkey, keyTimeStamps[j])) 2423 } 2424 } 2425 } 2426 } 2427 2428 // BenchmarkIterator_RangeKeyMasking benchmarks a scan through a keyspace with 2429 // 10,000 random suffixed point keys, and three range keys covering most of the 2430 // keyspace. It varies the suffix of the range keys in subbenchmarks to exercise 2431 // varying amounts of masking. This benchmark does configure a block-property 2432 // filter, allowing for skipping blocks wholly contained within a range key and 2433 // consisting of points all with a suffix lower than the range key's. 2434 func BenchmarkIterator_RangeKeyMasking(b *testing.B) { 2435 const ( 2436 prefixLen = 20 2437 valueSize = 1024 2438 batches = 200 2439 keysPerBatch = 50 2440 ) 2441 var alloc bytealloc.A 2442 rng := rand.New(rand.NewSource(uint64(1658872515083979000))) 2443 keyBuf := make([]byte, prefixLen+testkeys.MaxSuffixLen) 2444 valBuf := make([]byte, valueSize) 2445 2446 mem := vfs.NewStrictMem() 2447 maxProcs := runtime.GOMAXPROCS(0) 2448 opts := &Options{ 2449 FS: mem, 2450 Comparer: testkeys.Comparer, 2451 FormatMajorVersion: FormatNewest, 2452 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2453 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2454 sstable.NewTestKeysBlockPropertyCollector, 2455 }, 2456 } 2457 d, err := Open("", opts) 2458 require.NoError(b, err) 2459 2460 keys := make([][]byte, keysPerBatch*batches) 2461 for bi := 0; bi < batches; bi++ { 2462 batch := d.NewBatch() 2463 for k := 0; k < keysPerBatch; k++ { 2464 randStr(keyBuf[:prefixLen], rng) 2465 suffix := rng.Int63n(100) 2466 suffixLen := testkeys.WriteSuffix(keyBuf[prefixLen:], suffix) 2467 randStr(valBuf[:], rng) 2468 2469 var key []byte 2470 alloc, key = alloc.Copy(keyBuf[:prefixLen+suffixLen]) 2471 keys[bi*keysPerBatch+k] = key 2472 require.NoError(b, batch.Set(key, valBuf[:], nil)) 2473 } 2474 require.NoError(b, batch.Commit(nil)) 2475 } 2476 2477 // Wait for compactions to complete before starting benchmarks. We don't 2478 // want to benchmark while compactions are running. 2479 d.mu.Lock() 2480 for d.mu.compact.compactingCount > 0 { 2481 d.mu.compact.cond.Wait() 2482 } 2483 d.mu.Unlock() 2484 b.Log(d.Metrics().String()) 2485 require.NoError(b, d.Close()) 2486 // Set ignore syncs to true so that each subbenchmark may mutate state and 2487 // then revert back to the original state. 2488 mem.SetIgnoreSyncs(true) 2489 2490 // TODO(jackson): Benchmark lazy-combined iteration versus not. 2491 // TODO(jackson): Benchmark seeks. 2492 for _, rkSuffix := range []string{"@10", "@50", "@75", "@100"} { 2493 b.Run(fmt.Sprintf("range-keys-suffixes=%s", rkSuffix), func(b *testing.B) { 2494 d, err := Open("", opts) 2495 require.NoError(b, err) 2496 require.NoError(b, d.RangeKeySet([]byte("b"), []byte("e"), []byte(rkSuffix), nil, nil)) 2497 require.NoError(b, d.RangeKeySet([]byte("f"), []byte("p"), []byte(rkSuffix), nil, nil)) 2498 require.NoError(b, d.RangeKeySet([]byte("q"), []byte("z"), []byte(rkSuffix), nil, nil)) 2499 require.NoError(b, d.Flush()) 2500 2501 // Populate 3 range keys, covering most of the keyspace, at the 2502 // given suffix. 2503 2504 iterOpts := IterOptions{ 2505 KeyTypes: IterKeyTypePointsAndRanges, 2506 RangeKeyMasking: RangeKeyMasking{ 2507 Suffix: []byte("@100"), 2508 Filter: func() BlockPropertyFilterMask { 2509 return sstable.NewTestKeysMaskingFilter() 2510 }, 2511 }, 2512 } 2513 b.Run("forward", func(b *testing.B) { 2514 b.Run("seekprefix", func(b *testing.B) { 2515 b.ResetTimer() 2516 for i := 0; i < b.N; i++ { 2517 iter, _ := d.NewIter(&iterOpts) 2518 count := 0 2519 for j := 0; j < len(keys); j++ { 2520 if !iter.SeekPrefixGE(keys[j]) { 2521 b.Errorf("unable to find %q\n", keys[j]) 2522 } 2523 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2524 count++ 2525 } 2526 } 2527 if err := iter.Close(); err != nil { 2528 b.Fatal(err) 2529 } 2530 } 2531 }) 2532 b.Run("next", func(b *testing.B) { 2533 b.ResetTimer() 2534 for i := 0; i < b.N; i++ { 2535 iter, _ := d.NewIter(&iterOpts) 2536 count := 0 2537 for valid := iter.First(); valid; valid = iter.Next() { 2538 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2539 count++ 2540 } 2541 } 2542 if err := iter.Close(); err != nil { 2543 b.Fatal(err) 2544 } 2545 } 2546 }) 2547 }) 2548 b.Run("backward", func(b *testing.B) { 2549 b.ResetTimer() 2550 for i := 0; i < b.N; i++ { 2551 iter, _ := d.NewIter(&iterOpts) 2552 count := 0 2553 for valid := iter.Last(); valid; valid = iter.Prev() { 2554 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2555 count++ 2556 } 2557 } 2558 if err := iter.Close(); err != nil { 2559 b.Fatal(err) 2560 } 2561 } 2562 }) 2563 2564 // Reset the benchmark state at the end of each run to remove the 2565 // range keys we wrote. 2566 b.StopTimer() 2567 require.NoError(b, d.Close()) 2568 mem.ResetToSyncedState() 2569 }) 2570 } 2571 2572 } 2573 2574 func BenchmarkIteratorScan(b *testing.B) { 2575 const maxPrefixLen = 8 2576 keyBuf := make([]byte, maxPrefixLen+testkeys.MaxSuffixLen) 2577 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 2578 2579 for _, keyCount := range []int64{100, 1000, 10000} { 2580 for _, readAmp := range []int{1, 3, 7, 10} { 2581 func() { 2582 opts := &Options{ 2583 FS: vfs.NewMem(), 2584 FormatMajorVersion: FormatNewest, 2585 } 2586 opts.DisableAutomaticCompactions = true 2587 d, err := Open("", opts) 2588 require.NoError(b, err) 2589 defer func() { require.NoError(b, d.Close()) }() 2590 2591 // Take the very large keyspace consisting of alphabetic 2592 // characters of lengths up to `maxPrefixLen` and reduce it down 2593 // to `keyCount` keys by picking every 1 key every `keyCount` keys. 2594 keys := testkeys.Alpha(maxPrefixLen) 2595 keys = keys.EveryN(keys.Count() / keyCount) 2596 if keys.Count() < keyCount { 2597 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 2598 } 2599 2600 // Portion the keys into `readAmp` overlapping key sets. 2601 for _, ks := range testkeys.Divvy(keys, int64(readAmp)) { 2602 batch := d.NewBatch() 2603 for i := int64(0); i < ks.Count(); i++ { 2604 n := testkeys.WriteKeyAt(keyBuf[:], ks, i, rng.Int63n(100)) 2605 batch.Set(keyBuf[:n], keyBuf[:n], nil) 2606 } 2607 require.NoError(b, batch.Commit(nil)) 2608 require.NoError(b, d.Flush()) 2609 } 2610 // Each level is a sublevel. 2611 m := d.Metrics() 2612 require.Equal(b, readAmp, m.ReadAmp()) 2613 2614 for _, keyTypes := range []IterKeyType{IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} { 2615 iterOpts := IterOptions{KeyTypes: keyTypes} 2616 b.Run(fmt.Sprintf("keys=%d,r-amp=%d,key-types=%s", keyCount, readAmp, keyTypes), func(b *testing.B) { 2617 for i := 0; i < b.N; i++ { 2618 b.StartTimer() 2619 iter, _ := d.NewIter(&iterOpts) 2620 valid := iter.First() 2621 for valid { 2622 valid = iter.Next() 2623 } 2624 b.StopTimer() 2625 require.NoError(b, iter.Close()) 2626 } 2627 }) 2628 } 2629 }() 2630 } 2631 } 2632 } 2633 2634 func BenchmarkIteratorScanNextPrefix(b *testing.B) { 2635 setupBench := func( 2636 b *testing.B, maxKeysPerLevel, versCount, readAmp int, enableValueBlocks bool) *DB { 2637 keyBuf := make([]byte, readAmp+testkeys.MaxSuffixLen) 2638 opts := &Options{ 2639 FS: vfs.NewMem(), 2640 Comparer: testkeys.Comparer, 2641 FormatMajorVersion: FormatNewest, 2642 } 2643 opts.DisableAutomaticCompactions = true 2644 opts.Experimental.EnableValueBlocks = func() bool { return enableValueBlocks } 2645 d, err := Open("", opts) 2646 require.NoError(b, err) 2647 2648 // Create `readAmp` levels. Prefixes in the top of the LSM are length 1. 2649 // Prefixes in the bottom of the LSM are length `readAmp`. Eg,: 2650 // 2651 // a b c... 2652 // aa ab ac... 2653 // aaa aab aac... 2654 // 2655 for l := readAmp; l > 0; l-- { 2656 ks := testkeys.Alpha(l) 2657 if step := ks.Count() / int64(maxKeysPerLevel); step > 1 { 2658 ks = ks.EveryN(step) 2659 } 2660 if ks.Count() > int64(maxKeysPerLevel) { 2661 ks = ks.Slice(0, int64(maxKeysPerLevel)) 2662 } 2663 2664 batch := d.NewBatch() 2665 for i := int64(0); i < ks.Count(); i++ { 2666 for v := 0; v < versCount; v++ { 2667 n := testkeys.WriteKeyAt(keyBuf[:], ks, i, int64(versCount-v+1)) 2668 batch.Set(keyBuf[:n], keyBuf[:n], nil) 2669 } 2670 } 2671 require.NoError(b, batch.Commit(nil)) 2672 require.NoError(b, d.Flush()) 2673 } 2674 2675 // Each level is a sublevel. 2676 m := d.Metrics() 2677 require.Equal(b, readAmp, m.ReadAmp()) 2678 return d 2679 } 2680 2681 for _, keysPerLevel := range []int{10, 100, 1000} { 2682 b.Run(fmt.Sprintf("keysPerLevel=%d", keysPerLevel), func(b *testing.B) { 2683 for _, versionCount := range []int{1, 2, 10, 100} { 2684 b.Run(fmt.Sprintf("versions=%d", versionCount), func(b *testing.B) { 2685 for _, readAmp := range []int{1, 3, 7, 10} { 2686 b.Run(fmt.Sprintf("ramp=%d", readAmp), func(b *testing.B) { 2687 for _, enableValueBlocks := range []bool{false, true} { 2688 b.Run(fmt.Sprintf("value-blocks=%t", enableValueBlocks), func(b *testing.B) { 2689 d := setupBench(b, keysPerLevel, versionCount, readAmp, enableValueBlocks) 2690 defer func() { require.NoError(b, d.Close()) }() 2691 for _, keyTypes := range []IterKeyType{ 2692 IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} { 2693 b.Run(fmt.Sprintf("key-types=%s", keyTypes), func(b *testing.B) { 2694 iterOpts := IterOptions{KeyTypes: keyTypes} 2695 iter, _ := d.NewIter(&iterOpts) 2696 var valid bool 2697 b.ResetTimer() 2698 for i := 0; i < b.N; i++ { 2699 if !valid { 2700 valid = iter.First() 2701 if !valid { 2702 b.Fatalf("iter must be valid") 2703 } 2704 } else { 2705 valid = iter.NextPrefix() 2706 } 2707 } 2708 b.StopTimer() 2709 require.NoError(b, iter.Close()) 2710 }) 2711 } 2712 }) 2713 } 2714 }) 2715 } 2716 }) 2717 } 2718 }) 2719 } 2720 } 2721 2722 func BenchmarkCombinedIteratorSeek(b *testing.B) { 2723 for _, withRangeKey := range []bool{false, true} { 2724 b.Run(fmt.Sprintf("range-key=%t", withRangeKey), func(b *testing.B) { 2725 rng := rand.New(rand.NewSource(uint64(1658872515083979000))) 2726 ks := testkeys.Alpha(1) 2727 opts := &Options{ 2728 FS: vfs.NewMem(), 2729 Comparer: testkeys.Comparer, 2730 FormatMajorVersion: FormatNewest, 2731 } 2732 d, err := Open("", opts) 2733 require.NoError(b, err) 2734 defer func() { require.NoError(b, d.Close()) }() 2735 2736 keys := make([][]byte, ks.Count()) 2737 for i := int64(0); i < ks.Count(); i++ { 2738 keys[i] = testkeys.Key(ks, i) 2739 var val [40]byte 2740 rng.Read(val[:]) 2741 require.NoError(b, d.Set(keys[i], val[:], nil)) 2742 } 2743 if withRangeKey { 2744 require.NoError(b, d.RangeKeySet([]byte("a"), []byte{'z', 0x00}, []byte("@5"), nil, nil)) 2745 } 2746 2747 batch := d.NewIndexedBatch() 2748 defer batch.Close() 2749 2750 for _, useBatch := range []bool{false, true} { 2751 b.Run(fmt.Sprintf("batch=%t", useBatch), func(b *testing.B) { 2752 for i := 0; i < b.N; i++ { 2753 iterOpts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 2754 var it *Iterator 2755 if useBatch { 2756 it, _ = batch.NewIter(&iterOpts) 2757 } else { 2758 it, _ = d.NewIter(&iterOpts) 2759 } 2760 for j := 0; j < len(keys); j++ { 2761 if !it.SeekGE(keys[j]) { 2762 b.Errorf("key %q missing", keys[j]) 2763 } 2764 } 2765 require.NoError(b, it.Close()) 2766 } 2767 }) 2768 } 2769 }) 2770 } 2771 } 2772 2773 // BenchmarkCombinedIteratorSeek_Bounded benchmarks a bounded iterator that 2774 // performs repeated seeks over 5% of the middle of a keyspace covered by a 2775 // range key that's fragmented across hundreds of files. The iterator bounds 2776 // should prevent defragmenting beyond the iterator's bounds. 2777 func BenchmarkCombinedIteratorSeek_Bounded(b *testing.B) { 2778 d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000)) 2779 2780 var lower = len(keys) / 2 2781 var upper = len(keys)/2 + len(keys)/20 // 5% 2782 iterOpts := IterOptions{ 2783 KeyTypes: IterKeyTypePointsAndRanges, 2784 LowerBound: keys[lower], 2785 UpperBound: keys[upper], 2786 } 2787 b.ResetTimer() 2788 for i := 0; i < b.N; i++ { 2789 it, _ := d.NewIter(&iterOpts) 2790 for j := lower; j < upper; j++ { 2791 if !it.SeekGE(keys[j]) { 2792 b.Errorf("key %q missing", keys[j]) 2793 } 2794 } 2795 require.NoError(b, it.Close()) 2796 } 2797 } 2798 2799 // BenchmarkCombinedIteratorSeekPrefix benchmarks an iterator that 2800 // performs repeated prefix seeks over 5% of the middle of a keyspace covered by a 2801 // range key that's fragmented across hundreds of files. The seek prefix should 2802 // avoid defragmenting beyond the seek prefixes. 2803 func BenchmarkCombinedIteratorSeekPrefix(b *testing.B) { 2804 d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000)) 2805 2806 var lower = len(keys) / 2 2807 var upper = len(keys)/2 + len(keys)/20 // 5% 2808 iterOpts := IterOptions{ 2809 KeyTypes: IterKeyTypePointsAndRanges, 2810 } 2811 b.ResetTimer() 2812 for i := 0; i < b.N; i++ { 2813 it, _ := d.NewIter(&iterOpts) 2814 for j := lower; j < upper; j++ { 2815 if !it.SeekPrefixGE(keys[j]) { 2816 b.Errorf("key %q missing", keys[j]) 2817 } 2818 } 2819 require.NoError(b, it.Close()) 2820 } 2821 } 2822 2823 func buildFragmentedRangeKey(b testing.TB, seed uint64) (d *DB, keys [][]byte) { 2824 rng := rand.New(rand.NewSource(seed)) 2825 ks := testkeys.Alpha(2) 2826 opts := &Options{ 2827 FS: vfs.NewMem(), 2828 Comparer: testkeys.Comparer, 2829 FormatMajorVersion: FormatNewest, 2830 L0CompactionFileThreshold: 1, 2831 } 2832 opts.EnsureDefaults() 2833 for l := 0; l < len(opts.Levels); l++ { 2834 opts.Levels[l].TargetFileSize = 1 2835 } 2836 var err error 2837 d, err = Open("", opts) 2838 require.NoError(b, err) 2839 2840 keys = make([][]byte, ks.Count()) 2841 for i := int64(0); i < ks.Count(); i++ { 2842 keys[i] = testkeys.Key(ks, i) 2843 } 2844 for i := 0; i < len(keys); i++ { 2845 var val [40]byte 2846 rng.Read(val[:]) 2847 require.NoError(b, d.Set(keys[i], val[:], nil)) 2848 if i < len(keys)-1 { 2849 require.NoError(b, d.RangeKeySet(keys[i], keys[i+1], []byte("@5"), nil, nil)) 2850 } 2851 require.NoError(b, d.Flush()) 2852 } 2853 2854 d.mu.Lock() 2855 for d.mu.compact.compactingCount > 0 { 2856 d.mu.compact.cond.Wait() 2857 } 2858 v := d.mu.versions.currentVersion() 2859 d.mu.Unlock() 2860 require.GreaterOrEqualf(b, v.Levels[numLevels-1].Len(), 2861 700, "expect many (≥700) L6 files but found %d", v.Levels[numLevels-1].Len()) 2862 return d, keys 2863 } 2864 2865 // BenchmarkSeekPrefixTombstones benchmarks a SeekPrefixGE into the beginning of 2866 // a series of sstables containing exclusively range tombstones. Previously, 2867 // such a seek would next through all the tombstone files until it arrived at a 2868 // point key or exhausted the level's files. The SeekPrefixGE should not next 2869 // beyond the files that contain the prefix. 2870 // 2871 // See cockroachdb/cockroach#89327. 2872 func BenchmarkSeekPrefixTombstones(b *testing.B) { 2873 o := (&Options{ 2874 FS: vfs.NewMem(), 2875 Comparer: testkeys.Comparer, 2876 FormatMajorVersion: FormatNewest, 2877 }).EnsureDefaults() 2878 wOpts := o.MakeWriterOptions(numLevels-1, FormatNewest.MaxTableFormat()) 2879 d, err := Open("", o) 2880 require.NoError(b, err) 2881 defer func() { require.NoError(b, d.Close()) }() 2882 2883 // Keep a snapshot open for the duration of the test to prevent elision-only 2884 // compactions from removing the ingested files containing exclusively 2885 // elidable tombstones. 2886 defer d.NewSnapshot().Close() 2887 2888 ks := testkeys.Alpha(2) 2889 for i := int64(0); i < ks.Count()-1; i++ { 2890 func() { 2891 filename := fmt.Sprintf("ext%2d", i) 2892 f, err := o.FS.Create(filename) 2893 require.NoError(b, err) 2894 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), wOpts) 2895 require.NoError(b, w.DeleteRange(testkeys.Key(ks, i), testkeys.Key(ks, i+1))) 2896 require.NoError(b, w.Close()) 2897 require.NoError(b, d.Ingest([]string{filename})) 2898 }() 2899 } 2900 2901 d.mu.Lock() 2902 require.Equal(b, int64(ks.Count()-1), d.mu.versions.metrics.Levels[numLevels-1].NumFiles) 2903 d.mu.Unlock() 2904 2905 seekKey := testkeys.Key(ks, 1) 2906 iter, _ := d.NewIter(nil) 2907 defer iter.Close() 2908 b.ResetTimer() 2909 defer b.StopTimer() 2910 for i := 0; i < b.N; i++ { 2911 iter.SeekPrefixGE(seekKey) 2912 } 2913 }