github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/iterator_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "flag" 11 "fmt" 12 "io" 13 "runtime" 14 "sort" 15 "strconv" 16 "strings" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/datadriven" 21 "github.com/cockroachdb/errors" 22 "github.com/cockroachdb/pebble/internal/base" 23 "github.com/cockroachdb/pebble/internal/bytealloc" 24 "github.com/cockroachdb/pebble/internal/invalidating" 25 "github.com/cockroachdb/pebble/internal/keyspan" 26 "github.com/cockroachdb/pebble/internal/manifest" 27 "github.com/cockroachdb/pebble/internal/testkeys" 28 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 29 "github.com/cockroachdb/pebble/sstable" 30 "github.com/cockroachdb/pebble/vfs" 31 "github.com/stretchr/testify/require" 32 "golang.org/x/exp/rand" 33 ) 34 35 var testKeyValuePairs = []string{ 36 "10:10", 37 "11:11", 38 "12:12", 39 "13:13", 40 "14:14", 41 "15:15", 42 "16:16", 43 "17:17", 44 "18:18", 45 "19:19", 46 } 47 48 type fakeIter struct { 49 lower []byte 50 upper []byte 51 keys []InternalKey 52 vals [][]byte 53 index int 54 valid bool 55 closeErr error 56 } 57 58 // fakeIter implements the base.InternalIterator interface. 59 var _ base.InternalIterator = (*fakeIter)(nil) 60 61 func fakeIkey(s string) InternalKey { 62 j := strings.Index(s, ":") 63 seqNum, err := strconv.Atoi(s[j+1:]) 64 if err != nil { 65 panic(err) 66 } 67 return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet) 68 } 69 70 func newFakeIterator(closeErr error, keys ...string) *fakeIter { 71 ikeys := make([]InternalKey, len(keys)) 72 for i, k := range keys { 73 ikeys[i] = fakeIkey(k) 74 } 75 return &fakeIter{ 76 keys: ikeys, 77 index: 0, 78 valid: len(ikeys) > 0, 79 closeErr: closeErr, 80 } 81 } 82 83 func (f *fakeIter) String() string { 84 return "fake" 85 } 86 87 func (f *fakeIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { 88 f.valid = false 89 for f.index = 0; f.index < len(f.keys); f.index++ { 90 if DefaultComparer.Compare(key, f.key().UserKey) <= 0 { 91 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 92 return nil, base.LazyValue{} 93 } 94 f.valid = true 95 return f.Key(), f.Value() 96 } 97 } 98 return nil, base.LazyValue{} 99 } 100 101 func (f *fakeIter) SeekPrefixGE( 102 prefix, key []byte, flags base.SeekGEFlags, 103 ) (*base.InternalKey, base.LazyValue) { 104 return f.SeekGE(key, flags) 105 } 106 107 func (f *fakeIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { 108 f.valid = false 109 for f.index = len(f.keys) - 1; f.index >= 0; f.index-- { 110 if DefaultComparer.Compare(key, f.key().UserKey) > 0 { 111 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 112 return nil, base.LazyValue{} 113 } 114 f.valid = true 115 return f.Key(), f.Value() 116 } 117 } 118 return nil, base.LazyValue{} 119 } 120 121 func (f *fakeIter) First() (*InternalKey, base.LazyValue) { 122 f.valid = false 123 f.index = -1 124 if key, _ := f.Next(); key == nil { 125 return nil, base.LazyValue{} 126 } 127 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 128 return nil, base.LazyValue{} 129 } 130 f.valid = true 131 return f.Key(), f.Value() 132 } 133 134 func (f *fakeIter) Last() (*InternalKey, base.LazyValue) { 135 f.valid = false 136 f.index = len(f.keys) 137 if key, _ := f.Prev(); key == nil { 138 return nil, base.LazyValue{} 139 } 140 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 141 return nil, base.LazyValue{} 142 } 143 f.valid = true 144 return f.Key(), f.Value() 145 } 146 147 func (f *fakeIter) Next() (*InternalKey, base.LazyValue) { 148 f.valid = false 149 if f.index == len(f.keys) { 150 return nil, base.LazyValue{} 151 } 152 f.index++ 153 if f.index == len(f.keys) { 154 return nil, base.LazyValue{} 155 } 156 if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 { 157 return nil, base.LazyValue{} 158 } 159 f.valid = true 160 return f.Key(), f.Value() 161 } 162 163 func (f *fakeIter) Prev() (*InternalKey, base.LazyValue) { 164 f.valid = false 165 if f.index < 0 { 166 return nil, base.LazyValue{} 167 } 168 f.index-- 169 if f.index < 0 { 170 return nil, base.LazyValue{} 171 } 172 if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 { 173 return nil, base.LazyValue{} 174 } 175 f.valid = true 176 return f.Key(), f.Value() 177 } 178 179 func (f *fakeIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { 180 return f.SeekGE(succKey, base.SeekGEFlagsNone) 181 } 182 183 // key returns the current Key the iterator is positioned at regardless of the 184 // value of f.valid. 185 func (f *fakeIter) key() *InternalKey { 186 return &f.keys[f.index] 187 } 188 189 func (f *fakeIter) Key() *InternalKey { 190 if f.valid { 191 return &f.keys[f.index] 192 } 193 // It is invalid to call Key() when Valid() returns false. Rather than 194 // returning nil here which would technically be more correct, return a 195 // non-nil key which is the behavior of some InternalIterator 196 // implementations. This provides better testing of users of 197 // InternalIterators. 198 if f.index < 0 { 199 return &f.keys[0] 200 } 201 return &f.keys[len(f.keys)-1] 202 } 203 204 func (f *fakeIter) Value() base.LazyValue { 205 if f.index >= 0 && f.index < len(f.vals) { 206 return base.MakeInPlaceValue(f.vals[f.index]) 207 } 208 return base.LazyValue{} 209 } 210 211 func (f *fakeIter) Valid() bool { 212 return f.index >= 0 && f.index < len(f.keys) && f.valid 213 } 214 215 func (f *fakeIter) Error() error { 216 return f.closeErr 217 } 218 219 func (f *fakeIter) Close() error { 220 return f.closeErr 221 } 222 223 func (f *fakeIter) SetBounds(lower, upper []byte) { 224 f.lower = lower 225 f.upper = upper 226 } 227 228 // testIterator tests creating a combined iterator from a number of sub- 229 // iterators. newFunc is a constructor function. splitFunc returns a random 230 // split of the testKeyValuePairs slice such that walking a combined iterator 231 // over those splits should recover the original key/value pairs in order. 232 func testIterator( 233 t *testing.T, 234 newFunc func(...internalIterator) internalIterator, 235 splitFunc func(r *rand.Rand) [][]string, 236 ) { 237 // Test pre-determined sub-iterators. The sub-iterators are designed 238 // so that the combined key/value pair order is the same whether the 239 // combined iterator is concatenating or merging. 240 testCases := []struct { 241 desc string 242 iters []internalIterator 243 want string 244 }{ 245 { 246 "one sub-iterator", 247 []internalIterator{ 248 newFakeIterator(nil, "e:1", "w:2"), 249 }, 250 "<e:1><w:2>.", 251 }, 252 { 253 "two sub-iterators", 254 []internalIterator{ 255 newFakeIterator(nil, "a0:0"), 256 newFakeIterator(nil, "b1:1", "b2:2"), 257 }, 258 "<a0:0><b1:1><b2:2>.", 259 }, 260 { 261 "empty sub-iterators", 262 []internalIterator{ 263 newFakeIterator(nil), 264 newFakeIterator(nil), 265 newFakeIterator(nil), 266 }, 267 ".", 268 }, 269 { 270 "sub-iterator errors", 271 []internalIterator{ 272 newFakeIterator(nil, "a0:0", "a1:1"), 273 newFakeIterator(errors.New("the sky is falling"), "b2:2", "b3:3", "b4:4"), 274 newFakeIterator(errors.New("run for your lives"), "c5:5", "c6:6"), 275 }, 276 "<a0:0><a1:1><b2:2><b3:3><b4:4>err=the sky is falling", 277 }, 278 } 279 for _, tc := range testCases { 280 var b bytes.Buffer 281 iter := invalidating.NewIter(newFunc(tc.iters...)) 282 for key, _ := iter.First(); key != nil; key, _ = iter.Next() { 283 fmt.Fprintf(&b, "<%s:%d>", key.UserKey, key.SeqNum()) 284 } 285 if err := iter.Close(); err != nil { 286 fmt.Fprintf(&b, "err=%v", err) 287 } else { 288 b.WriteByte('.') 289 } 290 if got := b.String(); got != tc.want { 291 t.Errorf("%s:\ngot %q\nwant %q", tc.desc, got, tc.want) 292 } 293 } 294 295 // Test randomly generated sub-iterators. 296 r := rand.New(rand.NewSource(0)) 297 for i, nBad := 0, 0; i < 1000; i++ { 298 bad := false 299 300 splits := splitFunc(r) 301 iters := make([]internalIterator, len(splits)) 302 for i, split := range splits { 303 iters[i] = newFakeIterator(nil, split...) 304 } 305 iter := newInternalIterAdapter(invalidating.NewIter(newFunc(iters...))) 306 iter.First() 307 308 j := 0 309 for ; iter.Valid() && j < len(testKeyValuePairs); j++ { 310 got := fmt.Sprintf("%s:%d", iter.Key().UserKey, iter.Key().SeqNum()) 311 want := testKeyValuePairs[j] 312 if got != want { 313 bad = true 314 t.Errorf("random splits: i=%d, j=%d: got %q, want %q", i, j, got, want) 315 } 316 iter.Next() 317 } 318 if iter.Valid() { 319 bad = true 320 t.Errorf("random splits: i=%d, j=%d: iter was not exhausted", i, j) 321 } 322 if j != len(testKeyValuePairs) { 323 bad = true 324 t.Errorf("random splits: i=%d, j=%d: want j=%d", i, j, len(testKeyValuePairs)) 325 return 326 } 327 if err := iter.Close(); err != nil { 328 bad = true 329 t.Errorf("random splits: i=%d, j=%d: %v", i, j, err) 330 } 331 332 if bad { 333 nBad++ 334 if nBad == 10 { 335 t.Fatal("random splits: too many errors; stopping") 336 } 337 } 338 } 339 } 340 341 // deletableSumValueMerger computes the sum of its arguments, 342 // but transforms a zero sum into a non-existent entry. 343 type deletableSumValueMerger struct { 344 sum int64 345 } 346 347 func newDeletableSumValueMerger(key, value []byte) (ValueMerger, error) { 348 m := &deletableSumValueMerger{} 349 return m, m.MergeNewer(value) 350 } 351 352 func (m *deletableSumValueMerger) parseAndCalculate(value []byte) error { 353 v, err := strconv.ParseInt(string(value), 10, 64) 354 if err == nil { 355 m.sum += v 356 } 357 return err 358 } 359 360 func (m *deletableSumValueMerger) MergeNewer(value []byte) error { 361 return m.parseAndCalculate(value) 362 } 363 364 func (m *deletableSumValueMerger) MergeOlder(value []byte) error { 365 return m.parseAndCalculate(value) 366 } 367 368 func (m *deletableSumValueMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 369 if m.sum == 0 { 370 return nil, nil, nil 371 } 372 return []byte(strconv.FormatInt(m.sum, 10)), nil, nil 373 } 374 375 func (m *deletableSumValueMerger) DeletableFinish( 376 includesBase bool, 377 ) ([]byte, bool, io.Closer, error) { 378 value, closer, err := m.Finish(includesBase) 379 return value, len(value) == 0, closer, err 380 } 381 382 func TestIterator(t *testing.T) { 383 var merge Merge 384 var keys []InternalKey 385 var vals [][]byte 386 387 newIter := func(seqNum uint64, opts IterOptions) *Iterator { 388 if merge == nil { 389 merge = DefaultMerger.Merge 390 } 391 wrappedMerge := func(key, value []byte) (ValueMerger, error) { 392 if len(key) == 0 { 393 t.Fatalf("an empty key is passed into Merge") 394 } 395 return merge(key, value) 396 } 397 it := &Iterator{ 398 opts: opts, 399 comparer: *testkeys.Comparer, 400 merge: wrappedMerge, 401 } 402 // NB: Use a mergingIter to filter entries newer than seqNum. 403 iter := newMergingIter(nil /* logger */, &it.stats.InternalStats, it.cmp, it.split, &fakeIter{ 404 lower: opts.GetLowerBound(), 405 upper: opts.GetUpperBound(), 406 keys: keys, 407 vals: vals, 408 }) 409 iter.snapshot = seqNum 410 // NB: This Iterator cannot be cloned since it is not constructed 411 // with a readState. It suffices for this test. 412 it.iter = invalidating.NewIter(iter) 413 return it 414 } 415 416 datadriven.RunTest(t, "testdata/iterator", func(t *testing.T, d *datadriven.TestData) string { 417 switch d.Cmd { 418 case "define": 419 merge = nil 420 if arg, ok := d.Arg("merger"); ok && len(arg.Vals[0]) > 0 && arg.Vals[0] == "deletable" { 421 merge = newDeletableSumValueMerger 422 } 423 keys = keys[:0] 424 vals = vals[:0] 425 for _, key := range strings.Split(d.Input, "\n") { 426 j := strings.Index(key, ":") 427 keys = append(keys, base.ParseInternalKey(key[:j])) 428 vals = append(vals, []byte(key[j+1:])) 429 } 430 return "" 431 432 case "iter": 433 var seqNum uint64 434 var opts IterOptions 435 d.MaybeScanArgs(t, "seq", &seqNum) 436 var lower, upper string 437 if d.MaybeScanArgs(t, "lower", &lower) { 438 opts.LowerBound = []byte(lower) 439 } 440 if d.MaybeScanArgs(t, "upper", &upper) { 441 opts.UpperBound = []byte(upper) 442 } 443 444 iter := newIter(seqNum, opts) 445 iterOutput := runIterCmd(d, iter, true) 446 stats := iter.Stats() 447 return fmt.Sprintf("%sstats: %s\n", iterOutput, stats.String()) 448 449 default: 450 return fmt.Sprintf("unknown command: %s", d.Cmd) 451 } 452 }) 453 } 454 455 type minSeqNumPropertyCollector struct { 456 minSeqNum uint64 457 } 458 459 func (c *minSeqNumPropertyCollector) Add(key InternalKey, value []byte) error { 460 if c.minSeqNum == 0 || c.minSeqNum > key.SeqNum() { 461 c.minSeqNum = key.SeqNum() 462 } 463 return nil 464 } 465 466 func (c *minSeqNumPropertyCollector) Finish(userProps map[string]string) error { 467 userProps["test.min-seq-num"] = fmt.Sprint(c.minSeqNum) 468 return nil 469 } 470 471 func (c *minSeqNumPropertyCollector) Name() string { 472 return "minSeqNumPropertyCollector" 473 } 474 475 func TestReadSampling(t *testing.T) { 476 var d *DB 477 defer func() { 478 if d != nil { 479 require.NoError(t, d.Close()) 480 } 481 }() 482 483 var iter *Iterator 484 defer func() { 485 if iter != nil { 486 require.NoError(t, iter.Close()) 487 } 488 }() 489 490 datadriven.RunTest(t, "testdata/iterator_read_sampling", func(t *testing.T, td *datadriven.TestData) string { 491 switch td.Cmd { 492 case "define": 493 if iter != nil { 494 if err := iter.Close(); err != nil { 495 return err.Error() 496 } 497 } 498 if d != nil { 499 if err := d.Close(); err != nil { 500 return err.Error() 501 } 502 } 503 504 opts := &Options{} 505 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 506 func() TablePropertyCollector { 507 return &minSeqNumPropertyCollector{} 508 }) 509 510 var err error 511 if d, err = runDBDefineCmd(td, opts); err != nil { 512 return err.Error() 513 } 514 515 d.mu.Lock() 516 // Disable the "dynamic base level" code for this test. 517 // d.mu.versions.picker.forceBaseLevel1() 518 s := d.mu.versions.currentVersion().String() 519 d.mu.Unlock() 520 return s 521 522 case "set": 523 if d == nil { 524 return fmt.Sprintf("%s: db is not defined", td.Cmd) 525 } 526 527 var allowedSeeks int64 528 td.ScanArgs(t, "allowed-seeks", &allowedSeeks) 529 530 d.mu.Lock() 531 for _, l := range d.mu.versions.currentVersion().Levels { 532 l.Slice().Each(func(f *fileMetadata) { 533 f.AllowedSeeks.Store(allowedSeeks) 534 }) 535 } 536 d.mu.Unlock() 537 return "" 538 539 case "show": 540 if d == nil { 541 return fmt.Sprintf("%s: db is not defined", td.Cmd) 542 } 543 544 var fileNum int64 545 for _, arg := range td.CmdArgs { 546 if len(arg.Vals) != 2 { 547 return fmt.Sprintf("%s: %s=<value>", td.Cmd, arg.Key) 548 } 549 switch arg.Key { 550 case "allowed-seeks": 551 var err error 552 fileNum, err = strconv.ParseInt(arg.Vals[0], 10, 64) 553 if err != nil { 554 return err.Error() 555 } 556 } 557 } 558 559 var foundAllowedSeeks int64 = -1 560 d.mu.Lock() 561 for _, l := range d.mu.versions.currentVersion().Levels { 562 l.Slice().Each(func(f *fileMetadata) { 563 if f.FileNum == base.FileNum(fileNum) { 564 actualAllowedSeeks := f.AllowedSeeks.Load() 565 foundAllowedSeeks = actualAllowedSeeks 566 } 567 }) 568 } 569 d.mu.Unlock() 570 571 if foundAllowedSeeks == -1 { 572 return fmt.Sprintf("invalid file num: %d", fileNum) 573 } 574 return fmt.Sprintf("%d", foundAllowedSeeks) 575 576 case "iter": 577 if iter == nil || iter.iter == nil { 578 // TODO(peter): runDBDefineCmd doesn't properly update the visible 579 // sequence number. So we have to use a snapshot with a very large 580 // sequence number, otherwise the DB appears empty. 581 snap := Snapshot{ 582 db: d, 583 seqNum: InternalKeySeqNumMax, 584 } 585 iter, _ = snap.NewIter(nil) 586 iter.readSampling.forceReadSampling = true 587 } 588 return runIterCmd(td, iter, false) 589 590 case "read-compactions": 591 if d == nil { 592 return fmt.Sprintf("%s: db is not defined", td.Cmd) 593 } 594 595 d.mu.Lock() 596 var sb strings.Builder 597 if d.mu.compact.readCompactions.size == 0 { 598 sb.WriteString("(none)") 599 } 600 for i := 0; i < d.mu.compact.readCompactions.size; i++ { 601 rc := d.mu.compact.readCompactions.at(i) 602 sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) 603 } 604 d.mu.Unlock() 605 return sb.String() 606 607 case "iter-read-compactions": 608 if iter == nil { 609 return fmt.Sprintf("%s: iter is not defined", td.Cmd) 610 } 611 612 var sb strings.Builder 613 if iter.readSampling.pendingCompactions.size == 0 { 614 sb.WriteString("(none)") 615 } 616 for i := 0; i < iter.readSampling.pendingCompactions.size; i++ { 617 rc := iter.readSampling.pendingCompactions.at(i) 618 sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) 619 } 620 return sb.String() 621 622 case "close-iter": 623 if iter != nil { 624 if err := iter.Close(); err != nil { 625 return err.Error() 626 } 627 } 628 return "" 629 630 default: 631 return fmt.Sprintf("unknown command: %s", td.Cmd) 632 } 633 }) 634 } 635 636 func TestIteratorTableFilter(t *testing.T) { 637 var d *DB 638 defer func() { 639 if d != nil { 640 require.NoError(t, d.Close()) 641 } 642 }() 643 644 datadriven.RunTest(t, "testdata/iterator_table_filter", func(t *testing.T, td *datadriven.TestData) string { 645 switch td.Cmd { 646 case "define": 647 if d != nil { 648 if err := d.Close(); err != nil { 649 return err.Error() 650 } 651 } 652 653 opts := &Options{} 654 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 655 func() TablePropertyCollector { 656 return &minSeqNumPropertyCollector{} 657 }) 658 659 var err error 660 if d, err = runDBDefineCmd(td, opts); err != nil { 661 return err.Error() 662 } 663 664 d.mu.Lock() 665 // Disable the "dynamic base level" code for this test. 666 d.mu.versions.picker.forceBaseLevel1() 667 s := d.mu.versions.currentVersion().String() 668 d.mu.Unlock() 669 return s 670 671 case "iter": 672 // We're using an iterator table filter to approximate what is done by 673 // snapshots. 674 iterOpts := &IterOptions{} 675 var filterSeqNum uint64 676 if td.MaybeScanArgs(t, "filter", &filterSeqNum) { 677 iterOpts.TableFilter = func(userProps map[string]string) bool { 678 minSeqNum, err := strconv.ParseUint(userProps["test.min-seq-num"], 10, 64) 679 if err != nil { 680 return true 681 } 682 return minSeqNum < filterSeqNum 683 } 684 } 685 686 // TODO(peter): runDBDefineCmd doesn't properly update the visible 687 // sequence number. So we have to use a snapshot with a very large 688 // sequence number, otherwise the DB appears empty. 689 snap := Snapshot{ 690 db: d, 691 seqNum: InternalKeySeqNumMax, 692 } 693 iter, _ := snap.NewIter(iterOpts) 694 return runIterCmd(td, iter, true) 695 696 default: 697 return fmt.Sprintf("unknown command: %s", td.Cmd) 698 } 699 }) 700 } 701 702 func TestIteratorNextPrev(t *testing.T) { 703 var mem vfs.FS 704 var d *DB 705 defer func() { 706 require.NoError(t, d.Close()) 707 }() 708 709 reset := func() { 710 if d != nil { 711 require.NoError(t, d.Close()) 712 } 713 714 mem = vfs.NewMem() 715 require.NoError(t, mem.MkdirAll("ext", 0755)) 716 opts := &Options{FS: mem} 717 // Automatic compactions may compact away tombstones from L6, making 718 // some testcases non-deterministic. 719 opts.DisableAutomaticCompactions = true 720 var err error 721 d, err = Open("", opts) 722 require.NoError(t, err) 723 } 724 reset() 725 726 datadriven.RunTest(t, "testdata/iterator_next_prev", func(t *testing.T, td *datadriven.TestData) string { 727 switch td.Cmd { 728 case "reset": 729 reset() 730 return "" 731 732 case "build": 733 if err := runBuildCmd(td, d, mem); err != nil { 734 return err.Error() 735 } 736 return "" 737 738 case "ingest": 739 if err := runIngestCmd(td, d, mem); err != nil { 740 return err.Error() 741 } 742 return runLSMCmd(td, d) 743 744 case "iter": 745 snap := Snapshot{ 746 db: d, 747 seqNum: InternalKeySeqNumMax, 748 } 749 td.MaybeScanArgs(t, "seq", &snap.seqNum) 750 iter, _ := snap.NewIter(nil) 751 return runIterCmd(td, iter, true) 752 753 default: 754 return fmt.Sprintf("unknown command: %s", td.Cmd) 755 } 756 }) 757 } 758 759 func TestIteratorStats(t *testing.T) { 760 var mem vfs.FS 761 var d *DB 762 defer func() { 763 require.NoError(t, d.Close()) 764 }() 765 766 reset := func() { 767 if d != nil { 768 require.NoError(t, d.Close()) 769 } 770 771 mem = vfs.NewMem() 772 require.NoError(t, mem.MkdirAll("ext", 0755)) 773 opts := &Options{Comparer: testkeys.Comparer, FS: mem, FormatMajorVersion: internalFormatNewest} 774 // Automatic compactions may make some testcases non-deterministic. 775 opts.DisableAutomaticCompactions = true 776 var err error 777 d, err = Open("", opts) 778 require.NoError(t, err) 779 } 780 reset() 781 782 datadriven.RunTest(t, "testdata/iterator_stats", func(t *testing.T, td *datadriven.TestData) string { 783 switch td.Cmd { 784 case "reset": 785 reset() 786 return "" 787 788 case "build": 789 if err := runBuildCmd(td, d, mem); err != nil { 790 return err.Error() 791 } 792 return "" 793 794 case "ingest": 795 if err := runIngestCmd(td, d, mem); err != nil { 796 return err.Error() 797 } 798 return runLSMCmd(td, d) 799 800 case "iter": 801 snap := Snapshot{ 802 db: d, 803 seqNum: InternalKeySeqNumMax, 804 } 805 td.MaybeScanArgs(t, "seq", &snap.seqNum) 806 iter, _ := snap.NewIter(nil) 807 return runIterCmd(td, iter, true) 808 809 default: 810 return fmt.Sprintf("unknown command: %s", td.Cmd) 811 } 812 }) 813 } 814 815 type iterSeekOptWrapper struct { 816 internalIterator 817 818 seekGEUsingNext, seekPrefixGEUsingNext *int 819 } 820 821 func (i *iterSeekOptWrapper) SeekGE( 822 key []byte, flags base.SeekGEFlags, 823 ) (*InternalKey, base.LazyValue) { 824 if flags.TrySeekUsingNext() { 825 *i.seekGEUsingNext++ 826 } 827 return i.internalIterator.SeekGE(key, flags) 828 } 829 830 func (i *iterSeekOptWrapper) SeekPrefixGE( 831 prefix, key []byte, flags base.SeekGEFlags, 832 ) (*InternalKey, base.LazyValue) { 833 if flags.TrySeekUsingNext() { 834 *i.seekPrefixGEUsingNext++ 835 } 836 return i.internalIterator.SeekPrefixGE(prefix, key, flags) 837 } 838 839 func TestIteratorSeekOpt(t *testing.T) { 840 var d *DB 841 defer func() { 842 require.NoError(t, d.Close()) 843 }() 844 var iter *Iterator 845 defer func() { 846 if iter != nil { 847 require.NoError(t, iter.Close()) 848 } 849 }() 850 var seekGEUsingNext, seekPrefixGEUsingNext int 851 852 datadriven.RunTest(t, "testdata/iterator_seek_opt", func(t *testing.T, td *datadriven.TestData) string { 853 switch td.Cmd { 854 case "define": 855 if iter != nil { 856 if err := iter.Close(); err != nil { 857 return err.Error() 858 } 859 } 860 if d != nil { 861 if err := d.Close(); err != nil { 862 return err.Error() 863 } 864 } 865 seekGEUsingNext = 0 866 seekPrefixGEUsingNext = 0 867 868 opts := &Options{} 869 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 870 func() TablePropertyCollector { 871 return &minSeqNumPropertyCollector{} 872 }) 873 874 var err error 875 if d, err = runDBDefineCmd(td, opts); err != nil { 876 return err.Error() 877 } 878 879 d.mu.Lock() 880 s := d.mu.versions.currentVersion().String() 881 d.mu.Unlock() 882 oldNewIters := d.newIters 883 d.newIters = func( 884 ctx context.Context, file *manifest.FileMetadata, opts *IterOptions, 885 internalOpts internalIterOpts) (internalIterator, keyspan.FragmentIterator, error) { 886 iter, rangeIter, err := oldNewIters(ctx, file, opts, internalOpts) 887 iterWrapped := &iterSeekOptWrapper{ 888 internalIterator: iter, 889 seekGEUsingNext: &seekGEUsingNext, 890 seekPrefixGEUsingNext: &seekPrefixGEUsingNext, 891 } 892 return iterWrapped, rangeIter, err 893 } 894 return s 895 896 case "iter": 897 if iter == nil || iter.iter == nil { 898 // TODO(peter): runDBDefineCmd doesn't properly update the visible 899 // sequence number. So we have to use a snapshot with a very large 900 // sequence number, otherwise the DB appears empty. 901 snap := Snapshot{ 902 db: d, 903 seqNum: InternalKeySeqNumMax, 904 } 905 iter, _ = snap.NewIter(nil) 906 iter.readSampling.forceReadSampling = true 907 iter.comparer.Split = func(a []byte) int { return len(a) } 908 iter.forceEnableSeekOpt = true 909 iter.merging.forceEnableSeekOpt = true 910 } 911 iterOutput := runIterCmd(td, iter, false) 912 stats := iter.Stats() 913 // InternalStats are non-deterministic since they depend on how data is 914 // distributed across memtables and sstables in the DB. 915 stats.InternalStats = InternalIteratorStats{} 916 var builder strings.Builder 917 fmt.Fprintf(&builder, "%sstats: %s\n", iterOutput, stats.String()) 918 fmt.Fprintf(&builder, "SeekGEs with trySeekUsingNext: %d\n", seekGEUsingNext) 919 fmt.Fprintf(&builder, "SeekPrefixGEs with trySeekUsingNext: %d\n", seekPrefixGEUsingNext) 920 return builder.String() 921 922 default: 923 return fmt.Sprintf("unknown command: %s", td.Cmd) 924 } 925 }) 926 } 927 928 type errorSeekIter struct { 929 internalIterator 930 // Fields controlling error injection for seeks. 931 injectSeekErrorCounts []int 932 seekCount int 933 err error 934 } 935 936 func (i *errorSeekIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { 937 if i.tryInjectError() { 938 return nil, base.LazyValue{} 939 } 940 i.err = nil 941 i.seekCount++ 942 return i.internalIterator.SeekGE(key, flags) 943 } 944 945 func (i *errorSeekIter) SeekPrefixGE( 946 prefix, key []byte, flags base.SeekGEFlags, 947 ) (*InternalKey, base.LazyValue) { 948 if i.tryInjectError() { 949 return nil, base.LazyValue{} 950 } 951 i.err = nil 952 i.seekCount++ 953 return i.internalIterator.SeekPrefixGE(prefix, key, flags) 954 } 955 956 func (i *errorSeekIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { 957 if i.tryInjectError() { 958 return nil, base.LazyValue{} 959 } 960 i.err = nil 961 i.seekCount++ 962 return i.internalIterator.SeekLT(key, flags) 963 } 964 965 func (i *errorSeekIter) tryInjectError() bool { 966 if len(i.injectSeekErrorCounts) > 0 && i.injectSeekErrorCounts[0] == i.seekCount { 967 i.seekCount++ 968 i.err = errors.Errorf("injecting error") 969 i.injectSeekErrorCounts = i.injectSeekErrorCounts[1:] 970 return true 971 } 972 return false 973 } 974 975 func (i *errorSeekIter) First() (*InternalKey, base.LazyValue) { 976 i.err = nil 977 return i.internalIterator.First() 978 } 979 980 func (i *errorSeekIter) Last() (*InternalKey, base.LazyValue) { 981 i.err = nil 982 return i.internalIterator.Last() 983 } 984 985 func (i *errorSeekIter) Next() (*InternalKey, base.LazyValue) { 986 if i.err != nil { 987 return nil, base.LazyValue{} 988 } 989 return i.internalIterator.Next() 990 } 991 992 func (i *errorSeekIter) Prev() (*InternalKey, base.LazyValue) { 993 if i.err != nil { 994 return nil, base.LazyValue{} 995 } 996 return i.internalIterator.Prev() 997 } 998 999 func (i *errorSeekIter) Error() error { 1000 if i.err != nil { 1001 return i.err 1002 } 1003 return i.internalIterator.Error() 1004 } 1005 1006 func TestIteratorSeekOptErrors(t *testing.T) { 1007 var keys []InternalKey 1008 var vals [][]byte 1009 1010 var errorIter errorSeekIter 1011 newIter := func(opts IterOptions) *Iterator { 1012 iter := &fakeIter{ 1013 lower: opts.GetLowerBound(), 1014 upper: opts.GetUpperBound(), 1015 keys: keys, 1016 vals: vals, 1017 } 1018 errorIter = errorSeekIter{internalIterator: invalidating.NewIter(iter)} 1019 // NB: This Iterator cannot be cloned since it is not constructed 1020 // with a readState. It suffices for this test. 1021 return &Iterator{ 1022 opts: opts, 1023 comparer: *testkeys.Comparer, 1024 merge: DefaultMerger.Merge, 1025 iter: &errorIter, 1026 } 1027 } 1028 1029 datadriven.RunTest(t, "testdata/iterator_seek_opt_errors", func(t *testing.T, d *datadriven.TestData) string { 1030 switch d.Cmd { 1031 case "define": 1032 keys = keys[:0] 1033 vals = vals[:0] 1034 for _, key := range strings.Split(d.Input, "\n") { 1035 j := strings.Index(key, ":") 1036 keys = append(keys, base.ParseInternalKey(key[:j])) 1037 vals = append(vals, []byte(key[j+1:])) 1038 } 1039 return "" 1040 1041 case "iter": 1042 var opts IterOptions 1043 var injectSeekGEErrorCounts []int 1044 for _, arg := range d.CmdArgs { 1045 if len(arg.Vals) < 1 { 1046 return fmt.Sprintf("%s: %s=<value>", d.Cmd, arg.Key) 1047 } 1048 switch arg.Key { 1049 case "lower": 1050 opts.LowerBound = []byte(arg.Vals[0]) 1051 case "upper": 1052 opts.UpperBound = []byte(arg.Vals[0]) 1053 case "seek-error": 1054 for i := 0; i < len(arg.Vals); i++ { 1055 n, err := strconv.Atoi(arg.Vals[i]) 1056 if err != nil { 1057 return err.Error() 1058 } 1059 injectSeekGEErrorCounts = append(injectSeekGEErrorCounts, n) 1060 } 1061 default: 1062 return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) 1063 } 1064 } 1065 1066 iter := newIter(opts) 1067 errorIter.injectSeekErrorCounts = injectSeekGEErrorCounts 1068 return runIterCmd(d, iter, true) 1069 1070 default: 1071 return fmt.Sprintf("unknown command: %s", d.Cmd) 1072 } 1073 }) 1074 } 1075 1076 type testBlockIntervalCollector struct { 1077 numLength int 1078 offsetFromEnd int 1079 initialized bool 1080 lower, upper uint64 1081 } 1082 1083 func (bi *testBlockIntervalCollector) Add(key InternalKey, value []byte) error { 1084 k := key.UserKey 1085 if len(k) < bi.numLength+bi.offsetFromEnd { 1086 return nil 1087 } 1088 n := len(k) - bi.offsetFromEnd - bi.numLength 1089 val, err := strconv.Atoi(string(k[n : n+bi.numLength])) 1090 if err != nil { 1091 return err 1092 } 1093 if val < 0 { 1094 panic("testBlockIntervalCollector expects values >= 0") 1095 } 1096 uval := uint64(val) 1097 if !bi.initialized { 1098 bi.lower, bi.upper = uval, uval+1 1099 bi.initialized = true 1100 return nil 1101 } 1102 if bi.lower > uval { 1103 bi.lower = uval 1104 } 1105 if uval >= bi.upper { 1106 bi.upper = uval + 1 1107 } 1108 return nil 1109 } 1110 1111 func (bi *testBlockIntervalCollector) FinishDataBlock() (lower uint64, upper uint64, err error) { 1112 bi.initialized = false 1113 l, u := bi.lower, bi.upper 1114 bi.lower, bi.upper = 0, 0 1115 return l, u, nil 1116 } 1117 1118 func TestIteratorBlockIntervalFilter(t *testing.T) { 1119 var mem vfs.FS 1120 var d *DB 1121 defer func() { 1122 require.NoError(t, d.Close()) 1123 }() 1124 1125 type collector struct { 1126 id uint16 1127 offset int 1128 } 1129 createDB := func(collectors []collector) { 1130 if d != nil { 1131 require.NoError(t, d.Close()) 1132 } 1133 1134 mem = vfs.NewMem() 1135 require.NoError(t, mem.MkdirAll("ext", 0755)) 1136 1137 var bpCollectors []func() BlockPropertyCollector 1138 for _, c := range collectors { 1139 coll := c 1140 bpCollectors = append(bpCollectors, func() BlockPropertyCollector { 1141 return sstable.NewBlockIntervalCollector( 1142 fmt.Sprintf("%d", coll.id), 1143 &testBlockIntervalCollector{numLength: 2, offsetFromEnd: coll.offset}, 1144 nil, /* range key collector */ 1145 ) 1146 }) 1147 } 1148 opts := &Options{ 1149 FS: mem, 1150 FormatMajorVersion: internalFormatNewest, 1151 BlockPropertyCollectors: bpCollectors, 1152 } 1153 lo := LevelOptions{BlockSize: 1, IndexBlockSize: 1} 1154 opts.Levels = append(opts.Levels, lo) 1155 1156 // Automatic compactions may compact away tombstones from L6, making 1157 // some testcases non-deterministic. 1158 opts.DisableAutomaticCompactions = true 1159 var err error 1160 d, err = Open("", opts) 1161 require.NoError(t, err) 1162 } 1163 1164 datadriven.RunTest( 1165 t, "testdata/iterator_block_interval_filter", func(t *testing.T, td *datadriven.TestData) string { 1166 switch td.Cmd { 1167 case "build": 1168 var collectors []collector 1169 for _, arg := range td.CmdArgs { 1170 switch arg.Key { 1171 case "id_offset": 1172 if len(arg.Vals) != 2 { 1173 return "id and offset not provided" 1174 } 1175 var id, offset int 1176 var err error 1177 if id, err = strconv.Atoi(arg.Vals[0]); err != nil { 1178 return err.Error() 1179 } 1180 if offset, err = strconv.Atoi(arg.Vals[1]); err != nil { 1181 return err.Error() 1182 } 1183 collectors = append(collectors, collector{id: uint16(id), offset: offset}) 1184 default: 1185 return fmt.Sprintf("unknown key: %s", arg.Key) 1186 } 1187 } 1188 createDB(collectors) 1189 b := d.NewBatch() 1190 if err := runBatchDefineCmd(td, b); err != nil { 1191 return err.Error() 1192 } 1193 if err := b.Commit(nil); err != nil { 1194 return err.Error() 1195 } 1196 if err := d.Flush(); err != nil { 1197 return err.Error() 1198 } 1199 return runLSMCmd(td, d) 1200 1201 case "iter": 1202 var opts IterOptions 1203 for _, arg := range td.CmdArgs { 1204 switch arg.Key { 1205 case "id_lower_upper": 1206 if len(arg.Vals) != 3 { 1207 return "id, lower, upper not provided" 1208 } 1209 var id, lower, upper int 1210 var err error 1211 if id, err = strconv.Atoi(arg.Vals[0]); err != nil { 1212 return err.Error() 1213 } 1214 if lower, err = strconv.Atoi(arg.Vals[1]); err != nil { 1215 return err.Error() 1216 } 1217 if upper, err = strconv.Atoi(arg.Vals[2]); err != nil { 1218 return err.Error() 1219 } 1220 opts.PointKeyFilters = append(opts.PointKeyFilters, 1221 sstable.NewBlockIntervalFilter(fmt.Sprintf("%d", id), 1222 uint64(lower), uint64(upper))) 1223 default: 1224 return fmt.Sprintf("unknown key: %s", arg.Key) 1225 } 1226 } 1227 rand.Shuffle(len(opts.PointKeyFilters), func(i, j int) { 1228 opts.PointKeyFilters[i], opts.PointKeyFilters[j] = 1229 opts.PointKeyFilters[j], opts.PointKeyFilters[i] 1230 }) 1231 iter, _ := d.NewIter(&opts) 1232 return runIterCmd(td, iter, true) 1233 1234 default: 1235 return fmt.Sprintf("unknown command: %s", td.Cmd) 1236 } 1237 }) 1238 } 1239 1240 var seed = flag.Uint64("seed", 0, "a pseudorandom number generator seed") 1241 1242 func randStr(fill []byte, rng *rand.Rand) { 1243 const letters = "abcdefghijklmnopqrstuvwxyz" 1244 const lettersLen = len(letters) 1245 for i := 0; i < len(fill); i++ { 1246 fill[i] = letters[rng.Intn(lettersLen)] 1247 } 1248 } 1249 1250 func randValue(n int, rng *rand.Rand) []byte { 1251 buf := make([]byte, n) 1252 randStr(buf, rng) 1253 return buf 1254 } 1255 1256 func randKey(n int, rng *rand.Rand) ([]byte, int) { 1257 keyPrefix := randValue(n, rng) 1258 suffix := rng.Intn(100) 1259 return append(keyPrefix, []byte(fmt.Sprintf("%02d", suffix))...), suffix 1260 } 1261 1262 func TestIteratorRandomizedBlockIntervalFilter(t *testing.T) { 1263 mem := vfs.NewMem() 1264 opts := &Options{ 1265 FS: mem, 1266 FormatMajorVersion: internalFormatNewest, 1267 BlockPropertyCollectors: []func() BlockPropertyCollector{ 1268 func() BlockPropertyCollector { 1269 return sstable.NewBlockIntervalCollector( 1270 "0", &testBlockIntervalCollector{numLength: 2}, nil, /* range key collector */ 1271 ) 1272 }, 1273 }, 1274 } 1275 seed := *seed 1276 if seed == 0 { 1277 seed = uint64(time.Now().UnixNano()) 1278 t.Logf("seed: %d", seed) 1279 } 1280 rng := rand.New(rand.NewSource(seed)) 1281 opts.FlushSplitBytes = 1 << rng.Intn(8) // 1B - 256B 1282 opts.L0CompactionThreshold = 1 << rng.Intn(2) // 1-2 1283 opts.L0CompactionFileThreshold = 1 << rng.Intn(11) // 1-1024 1284 opts.LBaseMaxBytes = 1 << rng.Intn(11) // 1B - 1KB 1285 opts.MemTableSize = 2 << 10 // 2KB 1286 var lopts LevelOptions 1287 lopts.BlockSize = 1 << rng.Intn(8) // 1B - 256B 1288 lopts.IndexBlockSize = 1 << rng.Intn(8) // 1B - 256B 1289 opts.Levels = []LevelOptions{lopts} 1290 1291 d, err := Open("", opts) 1292 require.NoError(t, err) 1293 defer func() { 1294 require.NoError(t, d.Close()) 1295 }() 1296 matchingKeyValues := make(map[string]string) 1297 lower := rng.Intn(100) 1298 upper := rng.Intn(100) 1299 if lower > upper { 1300 lower, upper = upper, lower 1301 } 1302 n := 2000 1303 for i := 0; i < n; i++ { 1304 key, suffix := randKey(20+rng.Intn(5), rng) 1305 value := randValue(50, rng) 1306 if lower <= suffix && suffix < upper { 1307 matchingKeyValues[string(key)] = string(value) 1308 } 1309 d.Set(key, value, nil) 1310 } 1311 1312 var iterOpts IterOptions 1313 iterOpts.PointKeyFilters = []BlockPropertyFilter{ 1314 sstable.NewBlockIntervalFilter("0", 1315 uint64(lower), uint64(upper)), 1316 } 1317 iter, _ := d.NewIter(&iterOpts) 1318 defer func() { 1319 require.NoError(t, iter.Close()) 1320 }() 1321 iter.First() 1322 found := 0 1323 matchingCount := len(matchingKeyValues) 1324 for ; iter.Valid(); iter.Next() { 1325 found++ 1326 key := string(iter.Key()) 1327 value, ok := matchingKeyValues[key] 1328 if ok { 1329 require.Equal(t, value, string(iter.Value())) 1330 delete(matchingKeyValues, key) 1331 } 1332 } 1333 t.Logf("generated %d keys: %d matching, %d found", n, matchingCount, found) 1334 require.Equal(t, 0, len(matchingKeyValues)) 1335 } 1336 1337 func TestIteratorGuaranteedDurable(t *testing.T) { 1338 mem := vfs.NewMem() 1339 opts := &Options{FS: mem} 1340 d, err := Open("", opts) 1341 require.NoError(t, err) 1342 defer func() { 1343 require.NoError(t, d.Close()) 1344 }() 1345 iterOptions := IterOptions{OnlyReadGuaranteedDurable: true} 1346 failFunc := func(t *testing.T, reader Reader) { 1347 defer func() { 1348 if r := recover(); r == nil { 1349 require.Fail(t, "expected panic") 1350 } 1351 reader.Close() 1352 }() 1353 iter, _ := reader.NewIter(&iterOptions) 1354 defer iter.Close() 1355 } 1356 t.Run("snapshot", func(t *testing.T) { 1357 failFunc(t, d.NewSnapshot()) 1358 }) 1359 t.Run("batch", func(t *testing.T) { 1360 failFunc(t, d.NewIndexedBatch()) 1361 }) 1362 t.Run("db", func(t *testing.T) { 1363 d.Set([]byte("k"), []byte("v"), nil) 1364 foundKV := func(o *IterOptions) bool { 1365 iter, _ := d.NewIter(o) 1366 defer iter.Close() 1367 iter.SeekGE([]byte("k")) 1368 return iter.Valid() 1369 } 1370 require.True(t, foundKV(nil)) 1371 require.False(t, foundKV(&iterOptions)) 1372 require.NoError(t, d.Flush()) 1373 require.True(t, foundKV(nil)) 1374 require.True(t, foundKV(&iterOptions)) 1375 }) 1376 } 1377 1378 func TestIteratorBoundsLifetimes(t *testing.T) { 1379 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1380 d := newPointTestkeysDatabase(t, testkeys.Alpha(2)) 1381 defer func() { require.NoError(t, d.Close()) }() 1382 1383 var buf bytes.Buffer 1384 iterators := map[string]*Iterator{} 1385 var labels []string 1386 printIters := func(w io.Writer) { 1387 labels = labels[:0] 1388 for label := range iterators { 1389 labels = append(labels, label) 1390 } 1391 sort.Strings(labels) 1392 for _, label := range labels { 1393 it := iterators[label] 1394 fmt.Fprintf(&buf, "%s: (", label) 1395 if it.opts.LowerBound == nil { 1396 fmt.Fprint(&buf, "<nil>, ") 1397 } else { 1398 fmt.Fprintf(&buf, "%q, ", it.opts.LowerBound) 1399 } 1400 if it.opts.UpperBound == nil { 1401 fmt.Fprint(&buf, "<nil>)") 1402 } else { 1403 fmt.Fprintf(&buf, "%q)", it.opts.UpperBound) 1404 } 1405 fmt.Fprintf(&buf, " boundsBufIdx=%d\n", it.boundsBufIdx) 1406 } 1407 } 1408 parseBounds := func(td *datadriven.TestData) (lower, upper []byte) { 1409 for _, arg := range td.CmdArgs { 1410 if arg.Key == "lower" { 1411 lower = []byte(arg.Vals[0]) 1412 } else if arg.Key == "upper" { 1413 upper = []byte(arg.Vals[0]) 1414 } 1415 } 1416 return lower, upper 1417 } 1418 trashBounds := func(bounds ...[]byte) { 1419 for _, bound := range bounds { 1420 rng.Read(bound[:]) 1421 } 1422 } 1423 1424 datadriven.RunTest(t, "testdata/iterator_bounds_lifetimes", func(t *testing.T, td *datadriven.TestData) string { 1425 switch td.Cmd { 1426 case "define": 1427 var err error 1428 if d, err = runDBDefineCmd(td, d.opts); err != nil { 1429 return err.Error() 1430 } 1431 d.mu.Lock() 1432 s := d.mu.versions.currentVersion().String() 1433 d.mu.Unlock() 1434 return s 1435 case "new-iter": 1436 var label string 1437 td.ScanArgs(t, "label", &label) 1438 lower, upper := parseBounds(td) 1439 iterators[label], _ = d.NewIter(&IterOptions{ 1440 LowerBound: lower, 1441 UpperBound: upper, 1442 }) 1443 trashBounds(lower, upper) 1444 buf.Reset() 1445 printIters(&buf) 1446 return buf.String() 1447 case "clone": 1448 var from, to string 1449 td.ScanArgs(t, "from", &from) 1450 td.ScanArgs(t, "to", &to) 1451 var err error 1452 iterators[to], err = iterators[from].Clone(CloneOptions{}) 1453 if err != nil { 1454 return err.Error() 1455 } 1456 buf.Reset() 1457 printIters(&buf) 1458 return buf.String() 1459 case "close": 1460 var label string 1461 td.ScanArgs(t, "label", &label) 1462 iterators[label].Close() 1463 delete(iterators, label) 1464 buf.Reset() 1465 printIters(&buf) 1466 return buf.String() 1467 case "iter": 1468 var label string 1469 td.ScanArgs(t, "label", &label) 1470 return runIterCmd(td, iterators[label], false /* closeIter */) 1471 case "set-bounds": 1472 var label string 1473 td.ScanArgs(t, "label", &label) 1474 lower, upper := parseBounds(td) 1475 iterators[label].SetBounds(lower, upper) 1476 trashBounds(lower, upper) 1477 buf.Reset() 1478 printIters(&buf) 1479 return buf.String() 1480 case "set-options": 1481 var label string 1482 var tableFilter bool 1483 td.ScanArgs(t, "label", &label) 1484 opts := iterators[label].opts 1485 for _, arg := range td.CmdArgs { 1486 if arg.Key == "table-filter" { 1487 tableFilter = true 1488 } 1489 if arg.Key == "key-types" { 1490 switch arg.Vals[0] { 1491 case "points-only": 1492 opts.KeyTypes = IterKeyTypePointsOnly 1493 case "ranges-only": 1494 opts.KeyTypes = IterKeyTypeRangesOnly 1495 case "both": 1496 opts.KeyTypes = IterKeyTypePointsAndRanges 1497 default: 1498 panic(fmt.Sprintf("unrecognized key type %q", arg.Vals[0])) 1499 } 1500 } 1501 } 1502 opts.LowerBound, opts.UpperBound = parseBounds(td) 1503 if tableFilter { 1504 opts.TableFilter = func(userProps map[string]string) bool { return false } 1505 } 1506 iterators[label].SetOptions(&opts) 1507 trashBounds(opts.LowerBound, opts.UpperBound) 1508 buf.Reset() 1509 printIters(&buf) 1510 return buf.String() 1511 default: 1512 return fmt.Sprintf("unrecognized command %q", td.Cmd) 1513 } 1514 }) 1515 } 1516 1517 func TestIteratorStatsMerge(t *testing.T) { 1518 s := IteratorStats{ 1519 ForwardSeekCount: [NumStatsKind]int{1, 2}, 1520 ReverseSeekCount: [NumStatsKind]int{3, 4}, 1521 ForwardStepCount: [NumStatsKind]int{5, 6}, 1522 ReverseStepCount: [NumStatsKind]int{7, 8}, 1523 InternalStats: InternalIteratorStats{ 1524 BlockBytes: 9, 1525 BlockBytesInCache: 10, 1526 BlockReadDuration: 3 * time.Millisecond, 1527 KeyBytes: 11, 1528 ValueBytes: 12, 1529 PointCount: 13, 1530 PointsCoveredByRangeTombstones: 14, 1531 }, 1532 RangeKeyStats: RangeKeyIteratorStats{ 1533 Count: 15, 1534 ContainedPoints: 16, 1535 SkippedPoints: 17, 1536 }, 1537 } 1538 s.InternalStats.SeparatedPointValue.Count = 1 1539 s.InternalStats.SeparatedPointValue.ValueBytes = 5 1540 s.InternalStats.SeparatedPointValue.ValueBytesFetched = 3 1541 s2 := IteratorStats{ 1542 ForwardSeekCount: [NumStatsKind]int{1, 2}, 1543 ReverseSeekCount: [NumStatsKind]int{3, 4}, 1544 ForwardStepCount: [NumStatsKind]int{5, 6}, 1545 ReverseStepCount: [NumStatsKind]int{7, 8}, 1546 InternalStats: InternalIteratorStats{ 1547 BlockBytes: 9, 1548 BlockBytesInCache: 10, 1549 BlockReadDuration: 4 * time.Millisecond, 1550 KeyBytes: 11, 1551 ValueBytes: 12, 1552 PointCount: 13, 1553 PointsCoveredByRangeTombstones: 14, 1554 }, 1555 RangeKeyStats: RangeKeyIteratorStats{ 1556 Count: 15, 1557 ContainedPoints: 16, 1558 SkippedPoints: 17, 1559 }, 1560 } 1561 s2.InternalStats.SeparatedPointValue.Count = 2 1562 s2.InternalStats.SeparatedPointValue.ValueBytes = 10 1563 s2.InternalStats.SeparatedPointValue.ValueBytesFetched = 6 1564 s.Merge(s2) 1565 expected := IteratorStats{ 1566 ForwardSeekCount: [NumStatsKind]int{2, 4}, 1567 ReverseSeekCount: [NumStatsKind]int{6, 8}, 1568 ForwardStepCount: [NumStatsKind]int{10, 12}, 1569 ReverseStepCount: [NumStatsKind]int{14, 16}, 1570 InternalStats: InternalIteratorStats{ 1571 BlockBytes: 18, 1572 BlockBytesInCache: 20, 1573 BlockReadDuration: 7 * time.Millisecond, 1574 KeyBytes: 22, 1575 ValueBytes: 24, 1576 PointCount: 26, 1577 PointsCoveredByRangeTombstones: 28, 1578 }, 1579 RangeKeyStats: RangeKeyIteratorStats{ 1580 Count: 30, 1581 ContainedPoints: 32, 1582 SkippedPoints: 34, 1583 }, 1584 } 1585 expected.InternalStats.SeparatedPointValue.Count = 3 1586 expected.InternalStats.SeparatedPointValue.ValueBytes = 15 1587 expected.InternalStats.SeparatedPointValue.ValueBytesFetched = 9 1588 require.Equal(t, expected, s) 1589 } 1590 1591 // TestSetOptionsEquivalence tests equivalence between SetOptions to mutate an 1592 // iterator and constructing a new iterator with NewIter. The long-lived 1593 // iterator and the new iterator should surface identical iterator states. 1594 func TestSetOptionsEquivalence(t *testing.T) { 1595 seed := uint64(time.Now().UnixNano()) 1596 // Call a helper function with the seed so that the seed appears within 1597 // stack traces if there's a panic. 1598 testSetOptionsEquivalence(t, seed) 1599 } 1600 1601 func testSetOptionsEquivalence(t *testing.T, seed uint64) { 1602 rng := rand.New(rand.NewSource(seed)) 1603 ks := testkeys.Alpha(2) 1604 d := newTestkeysDatabase(t, ks, rng) 1605 defer func() { require.NoError(t, d.Close()) }() 1606 1607 var o IterOptions 1608 generateNewOptions := func() { 1609 // TODO(jackson): Include test coverage for block property filters, etc. 1610 if rng.Intn(2) == 1 { 1611 o.KeyTypes = IterKeyType(rng.Intn(3)) 1612 } 1613 if rng.Intn(2) == 1 { 1614 if rng.Intn(2) == 1 { 1615 o.LowerBound = nil 1616 if rng.Intn(2) == 1 { 1617 o.LowerBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count())) 1618 } 1619 } 1620 if rng.Intn(2) == 1 { 1621 o.UpperBound = nil 1622 if rng.Intn(2) == 1 { 1623 o.UpperBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count())) 1624 } 1625 } 1626 if testkeys.Comparer.Compare(o.LowerBound, o.UpperBound) > 0 { 1627 o.LowerBound, o.UpperBound = o.UpperBound, o.LowerBound 1628 } 1629 } 1630 o.RangeKeyMasking.Suffix = nil 1631 if o.KeyTypes == IterKeyTypePointsAndRanges && rng.Intn(2) == 1 { 1632 o.RangeKeyMasking.Suffix = testkeys.Suffix(rng.Int63n(ks.Count())) 1633 } 1634 } 1635 1636 var longLivedIter, newIter *Iterator 1637 var history, longLivedBuf, newIterBuf bytes.Buffer 1638 defer func() { 1639 if r := recover(); r != nil { 1640 t.Log(history.String()) 1641 panic(r) 1642 } 1643 }() 1644 defer func() { 1645 if longLivedIter != nil { 1646 longLivedIter.Close() 1647 } 1648 if newIter != nil { 1649 newIter.Close() 1650 } 1651 }() 1652 1653 type positioningOp struct { 1654 desc string 1655 run func(*Iterator) IterValidityState 1656 } 1657 positioningOps := []func() positioningOp{ 1658 // SeekGE 1659 func() positioningOp { 1660 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1661 return positioningOp{ 1662 desc: fmt.Sprintf("SeekGE(%q)", k), 1663 run: func(it *Iterator) IterValidityState { 1664 return it.SeekGEWithLimit(k, nil) 1665 }, 1666 } 1667 }, 1668 // SeekLT 1669 func() positioningOp { 1670 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1671 return positioningOp{ 1672 desc: fmt.Sprintf("SeekLT(%q)", k), 1673 run: func(it *Iterator) IterValidityState { 1674 return it.SeekLTWithLimit(k, nil) 1675 }, 1676 } 1677 }, 1678 // SeekPrefixGE 1679 func() positioningOp { 1680 k := testkeys.Key(ks, rng.Int63n(ks.Count())) 1681 return positioningOp{ 1682 desc: fmt.Sprintf("SeekPrefixGE(%q)", k), 1683 run: func(it *Iterator) IterValidityState { 1684 if it.SeekPrefixGE(k) { 1685 return IterValid 1686 } 1687 return IterExhausted 1688 }, 1689 } 1690 }, 1691 } 1692 1693 for i := 0; i < 10_000; i++ { 1694 // Generate new random options. The options in o will be mutated. 1695 generateNewOptions() 1696 fmt.Fprintf(&history, "new options: %s\n", iterOptionsString(&o)) 1697 1698 newIter, _ = d.NewIter(&o) 1699 if longLivedIter == nil { 1700 longLivedIter, _ = d.NewIter(&o) 1701 } else { 1702 longLivedIter.SetOptions(&o) 1703 } 1704 1705 // Apply the same operation to both keys. 1706 iterOp := positioningOps[rng.Intn(len(positioningOps))]() 1707 newIterValidity := iterOp.run(newIter) 1708 longLivedValidity := iterOp.run(longLivedIter) 1709 1710 newIterBuf.Reset() 1711 longLivedBuf.Reset() 1712 printIterState(&newIterBuf, newIter, newIterValidity, true /* printValidityState */) 1713 printIterState(&longLivedBuf, longLivedIter, longLivedValidity, true /* printValidityState */) 1714 fmt.Fprintf(&history, "%s = %s\n", iterOp.desc, newIterBuf.String()) 1715 1716 if newIterBuf.String() != longLivedBuf.String() { 1717 t.Logf("history:\n%s\n", history.String()) 1718 t.Logf("seed: %d\n", seed) 1719 t.Fatalf("expected %q, got %q", newIterBuf.String(), longLivedBuf.String()) 1720 } 1721 _ = newIter.Close() 1722 1723 newIter = nil 1724 } 1725 t.Logf("history:\n%s\n", history.String()) 1726 } 1727 1728 func iterOptionsString(o *IterOptions) string { 1729 var buf bytes.Buffer 1730 fmt.Fprintf(&buf, "key-types=%s, lower=%q, upper=%q", 1731 o.KeyTypes, o.LowerBound, o.UpperBound) 1732 if o.TableFilter != nil { 1733 fmt.Fprintf(&buf, ", table-filter") 1734 } 1735 if o.OnlyReadGuaranteedDurable { 1736 fmt.Fprintf(&buf, ", only-durable") 1737 } 1738 if o.UseL6Filters { 1739 fmt.Fprintf(&buf, ", use-L6-filters") 1740 } 1741 for i, pkf := range o.PointKeyFilters { 1742 fmt.Fprintf(&buf, ", point-key-filter[%d]=%q", i, pkf.Name()) 1743 } 1744 for i, rkf := range o.RangeKeyFilters { 1745 fmt.Fprintf(&buf, ", range-key-filter[%d]=%q", i, rkf.Name()) 1746 } 1747 return buf.String() 1748 } 1749 1750 func newTestkeysDatabase(t *testing.T, ks testkeys.Keyspace, rng *rand.Rand) *DB { 1751 dbOpts := &Options{ 1752 Comparer: testkeys.Comparer, 1753 FS: vfs.NewMem(), 1754 FormatMajorVersion: FormatRangeKeys, 1755 Logger: panicLogger{}, 1756 } 1757 d, err := Open("", dbOpts) 1758 require.NoError(t, err) 1759 1760 // Randomize the order in which we write keys. 1761 order := rng.Perm(int(ks.Count())) 1762 b := d.NewBatch() 1763 keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1764 keyBuf2 := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1765 for i := 0; i < len(order); i++ { 1766 const maxVersionsPerKey = 10 1767 keyIndex := order[i] 1768 for versions := rng.Intn(maxVersionsPerKey); versions > 0; versions-- { 1769 n := testkeys.WriteKeyAt(keyBuf, ks, int64(keyIndex), rng.Int63n(maxVersionsPerKey)) 1770 b.Set(keyBuf[:n], keyBuf[:n], nil) 1771 } 1772 1773 // Sometimes add a range key too. 1774 if rng.Intn(100) == 1 { 1775 startIdx := rng.Int63n(ks.Count()) 1776 endIdx := rng.Int63n(ks.Count()) 1777 startLen := testkeys.WriteKey(keyBuf, ks, startIdx) 1778 endLen := testkeys.WriteKey(keyBuf2, ks, endIdx) 1779 suffixInt := rng.Int63n(maxVersionsPerKey) 1780 require.NoError(t, b.RangeKeySet( 1781 keyBuf[:startLen], 1782 keyBuf2[:endLen], 1783 testkeys.Suffix(suffixInt), 1784 nil, 1785 nil)) 1786 } 1787 1788 // Randomize the flush points. 1789 if !b.Empty() && rng.Intn(10) == 1 { 1790 require.NoError(t, b.Commit(nil)) 1791 require.NoError(t, d.Flush()) 1792 b = d.NewBatch() 1793 } 1794 } 1795 if !b.Empty() { 1796 require.NoError(t, b.Commit(nil)) 1797 } 1798 return d 1799 } 1800 1801 func newPointTestkeysDatabase(t *testing.T, ks testkeys.Keyspace) *DB { 1802 dbOpts := &Options{ 1803 Comparer: testkeys.Comparer, 1804 FS: vfs.NewMem(), 1805 FormatMajorVersion: FormatRangeKeys, 1806 } 1807 d, err := Open("", dbOpts) 1808 require.NoError(t, err) 1809 1810 b := d.NewBatch() 1811 keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) 1812 for i := int64(0); i < ks.Count(); i++ { 1813 n := testkeys.WriteKeyAt(keyBuf, ks, i, i) 1814 b.Set(keyBuf[:n], keyBuf[:n], nil) 1815 } 1816 require.NoError(t, b.Commit(nil)) 1817 return d 1818 } 1819 1820 func BenchmarkIteratorSeekGE(b *testing.B) { 1821 m, keys := buildMemTable(b) 1822 iter := &Iterator{ 1823 comparer: *DefaultComparer, 1824 iter: m.newIter(nil), 1825 } 1826 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1827 1828 b.ResetTimer() 1829 for i := 0; i < b.N; i++ { 1830 key := keys[rng.Intn(len(keys))] 1831 iter.SeekGE(key) 1832 } 1833 } 1834 1835 func BenchmarkIteratorNext(b *testing.B) { 1836 m, _ := buildMemTable(b) 1837 iter := &Iterator{ 1838 comparer: *DefaultComparer, 1839 iter: m.newIter(nil), 1840 } 1841 1842 b.ResetTimer() 1843 for i := 0; i < b.N; i++ { 1844 if !iter.Valid() { 1845 iter.First() 1846 } 1847 iter.Next() 1848 } 1849 } 1850 1851 func BenchmarkIteratorPrev(b *testing.B) { 1852 m, _ := buildMemTable(b) 1853 iter := &Iterator{ 1854 comparer: *DefaultComparer, 1855 iter: m.newIter(nil), 1856 } 1857 1858 b.ResetTimer() 1859 for i := 0; i < b.N; i++ { 1860 if !iter.Valid() { 1861 iter.Last() 1862 } 1863 iter.Prev() 1864 } 1865 } 1866 1867 type twoLevelBloomTombstoneState struct { 1868 keys [][]byte 1869 readers [8][][]*sstable.Reader 1870 levelSlices [8][]manifest.LevelSlice 1871 indexFunc func(twoLevelIndex bool, bloom bool, withTombstone bool) int 1872 } 1873 1874 func setupForTwoLevelBloomTombstone(b *testing.B, keyOffset int) twoLevelBloomTombstoneState { 1875 const blockSize = 32 << 10 1876 const restartInterval = 16 1877 const levelCount = 5 1878 1879 var readers [8][][]*sstable.Reader 1880 var levelSlices [8][]manifest.LevelSlice 1881 var keys [][]byte 1882 indexFunc := func(twoLevelIndex bool, bloom bool, withTombstone bool) int { 1883 index := 0 1884 if twoLevelIndex { 1885 index = 4 1886 } 1887 if bloom { 1888 index += 2 1889 } 1890 if withTombstone { 1891 index++ 1892 } 1893 return index 1894 } 1895 for _, twoLevelIndex := range []bool{false, true} { 1896 for _, bloom := range []bool{false, true} { 1897 for _, withTombstone := range []bool{false, true} { 1898 index := indexFunc(twoLevelIndex, bloom, withTombstone) 1899 levels := levelCount 1900 if withTombstone { 1901 levels = 1 1902 } 1903 readers[index], levelSlices[index], keys = buildLevelsForMergingIterSeqSeek( 1904 b, blockSize, restartInterval, levels, keyOffset, withTombstone, bloom, twoLevelIndex) 1905 } 1906 } 1907 } 1908 return twoLevelBloomTombstoneState{ 1909 keys: keys, readers: readers, levelSlices: levelSlices, indexFunc: indexFunc} 1910 } 1911 1912 // BenchmarkIteratorSeqSeekPrefixGENotFound exercises the case of SeekPrefixGE 1913 // specifying monotonic keys all of which precede actual keys present in L6 of 1914 // the DB. Moreover, with-tombstone=true exercises the sub-case where those 1915 // actual keys are deleted using a range tombstone that has not physically 1916 // deleted those keys due to the presence of a snapshot that needs to see 1917 // those keys. This sub-case needs to be efficient in (a) avoiding iteration 1918 // over all those deleted keys, including repeated iteration, (b) using the 1919 // next optimization, since the seeks are monotonic. 1920 func BenchmarkIteratorSeqSeekPrefixGENotFound(b *testing.B) { 1921 const keyOffset = 100000 1922 state := setupForTwoLevelBloomTombstone(b, keyOffset) 1923 readers := state.readers 1924 levelSlices := state.levelSlices 1925 indexFunc := state.indexFunc 1926 1927 // We will not be seeking to the keys that were written but instead to 1928 // keys before the written keys. This is to validate that the optimization 1929 // to use Next still functions when mergingIter checks for the prefix 1930 // match, and that mergingIter can avoid iterating over all the keys 1931 // deleted by a range tombstone when there is no possibility of matching 1932 // the prefix. 1933 var keys [][]byte 1934 for i := 0; i < keyOffset; i++ { 1935 keys = append(keys, []byte(fmt.Sprintf("%08d", i))) 1936 } 1937 for _, skip := range []int{1, 2, 4} { 1938 for _, twoLevelIndex := range []bool{false, true} { 1939 for _, bloom := range []bool{false, true} { 1940 for _, withTombstone := range []bool{false, true} { 1941 b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t", 1942 skip, twoLevelIndex, bloom, withTombstone), 1943 func(b *testing.B) { 1944 index := indexFunc(twoLevelIndex, bloom, withTombstone) 1945 readers := readers[index] 1946 levelSlices := levelSlices[index] 1947 m := buildMergingIter(readers, levelSlices) 1948 iter := Iterator{ 1949 comparer: *testkeys.Comparer, 1950 merge: DefaultMerger.Merge, 1951 iter: m, 1952 } 1953 pos := 0 1954 b.ResetTimer() 1955 for i := 0; i < b.N; i++ { 1956 // When withTombstone=true, and prior to the 1957 // optimization to stop early due to a range 1958 // tombstone, the iteration would continue into the 1959 // next file, and not be able to use Next at the lower 1960 // level in the next SeekPrefixGE call. So we would 1961 // incur the cost of iterating over all the deleted 1962 // keys for every seek. Note that it is not possible 1963 // to do a noop optimization in Iterator for the 1964 // prefix case, unlike SeekGE/SeekLT, since we don't 1965 // know if the iterators inside mergingIter are all 1966 // appropriately positioned -- some may not be due to 1967 // bloom filters not matching. 1968 valid := iter.SeekPrefixGE(keys[pos]) 1969 if valid { 1970 b.Fatalf("key should not be found") 1971 } 1972 pos += skip 1973 if pos >= keyOffset { 1974 pos = 0 1975 } 1976 } 1977 b.StopTimer() 1978 iter.Close() 1979 }) 1980 } 1981 } 1982 } 1983 } 1984 for _, r := range readers { 1985 for i := range r { 1986 for j := range r[i] { 1987 r[i][j].Close() 1988 } 1989 } 1990 } 1991 } 1992 1993 // BenchmarkIteratorSeqSeekPrefixGEFound exercises the case of SeekPrefixGE 1994 // specifying monotonic keys that are present in L6 of the DB. Moreover, 1995 // with-tombstone=true exercises the sub-case where those actual keys are 1996 // deleted using a range tombstone that has not physically deleted those keys 1997 // due to the presence of a snapshot that needs to see those keys. This 1998 // sub-case needs to be efficient in (a) avoiding iteration over all those 1999 // deleted keys, including repeated iteration, (b) using the next 2000 // optimization, since the seeks are monotonic. 2001 func BenchmarkIteratorSeqSeekPrefixGEFound(b *testing.B) { 2002 state := setupForTwoLevelBloomTombstone(b, 0) 2003 keys := state.keys 2004 readers := state.readers 2005 levelSlices := state.levelSlices 2006 indexFunc := state.indexFunc 2007 2008 for _, skip := range []int{1, 2, 4} { 2009 for _, twoLevelIndex := range []bool{false, true} { 2010 for _, bloom := range []bool{false, true} { 2011 for _, withTombstone := range []bool{false, true} { 2012 b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t", 2013 skip, twoLevelIndex, bloom, withTombstone), 2014 func(b *testing.B) { 2015 index := indexFunc(twoLevelIndex, bloom, withTombstone) 2016 readers := readers[index] 2017 levelSlices := levelSlices[index] 2018 m := buildMergingIter(readers, levelSlices) 2019 iter := Iterator{ 2020 comparer: *testkeys.Comparer, 2021 merge: DefaultMerger.Merge, 2022 iter: m, 2023 } 2024 pos := 0 2025 b.ResetTimer() 2026 for i := 0; i < b.N; i++ { 2027 // When withTombstone=true, and prior to the 2028 // optimization to stop early due to a range 2029 // tombstone, the iteration would continue into the 2030 // next file, and not be able to use Next at the lower 2031 // level in the next SeekPrefixGE call. So we would 2032 // incur the cost of iterating over all the deleted 2033 // keys for every seek. Note that it is not possible 2034 // to do a noop optimization in Iterator for the 2035 // prefix case, unlike SeekGE/SeekLT, since we don't 2036 // know if the iterators inside mergingIter are all 2037 // appropriately positioned -- some may not be due to 2038 // bloom filters not matching. 2039 _ = iter.SeekPrefixGE(keys[pos]) 2040 pos += skip 2041 if pos >= len(keys) { 2042 pos = 0 2043 } 2044 } 2045 b.StopTimer() 2046 iter.Close() 2047 }) 2048 } 2049 } 2050 } 2051 } 2052 for _, r := range readers { 2053 for i := range r { 2054 for j := range r[i] { 2055 r[i][j].Close() 2056 } 2057 } 2058 } 2059 } 2060 2061 // BenchmarkIteratorSeqSeekGEWithBounds is analogous to 2062 // BenchmarkMergingIterSeqSeekGEWithBounds, except for using an Iterator, 2063 // which causes it to exercise the end-to-end code path. 2064 func BenchmarkIteratorSeqSeekGEWithBounds(b *testing.B) { 2065 const blockSize = 32 << 10 2066 const restartInterval = 16 2067 const levelCount = 5 2068 for _, twoLevelIndex := range []bool{false, true} { 2069 b.Run(fmt.Sprintf("two-level=%t", twoLevelIndex), 2070 func(b *testing.B) { 2071 readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek( 2072 b, blockSize, restartInterval, levelCount, 0, /* keyOffset */ 2073 false, false, twoLevelIndex) 2074 m := buildMergingIter(readers, levelSlices) 2075 iter := Iterator{ 2076 comparer: *testkeys.Comparer, 2077 merge: DefaultMerger.Merge, 2078 iter: m, 2079 } 2080 keyCount := len(keys) 2081 b.ResetTimer() 2082 for i := 0; i < b.N; i++ { 2083 pos := i % (keyCount - 1) 2084 iter.SetBounds(keys[pos], keys[pos+1]) 2085 // SeekGE will return keys[pos]. 2086 valid := iter.SeekGE(keys[pos]) 2087 for valid { 2088 valid = iter.Next() 2089 } 2090 if iter.Error() != nil { 2091 b.Fatalf(iter.Error().Error()) 2092 } 2093 } 2094 iter.Close() 2095 for i := range readers { 2096 for j := range readers[i] { 2097 readers[i][j].Close() 2098 } 2099 } 2100 }) 2101 } 2102 } 2103 2104 func BenchmarkIteratorSeekGENoop(b *testing.B) { 2105 const blockSize = 32 << 10 2106 const restartInterval = 16 2107 const levelCount = 5 2108 const keyOffset = 10000 2109 readers, levelSlices, _ := buildLevelsForMergingIterSeqSeek( 2110 b, blockSize, restartInterval, levelCount, keyOffset, false, false, false) 2111 var keys [][]byte 2112 for i := 0; i < keyOffset; i++ { 2113 keys = append(keys, []byte(fmt.Sprintf("%08d", i))) 2114 } 2115 for _, withLimit := range []bool{false, true} { 2116 b.Run(fmt.Sprintf("withLimit=%t", withLimit), func(b *testing.B) { 2117 m := buildMergingIter(readers, levelSlices) 2118 iter := Iterator{ 2119 comparer: *testkeys.Comparer, 2120 merge: DefaultMerger.Merge, 2121 iter: m, 2122 } 2123 b.ResetTimer() 2124 for i := 0; i < b.N; i++ { 2125 pos := i % (len(keys) - 1) 2126 if withLimit { 2127 if iter.SeekGEWithLimit(keys[pos], keys[pos+1]) != IterAtLimit { 2128 b.Fatal("should be at limit") 2129 } 2130 } else { 2131 if !iter.SeekGE(keys[pos]) { 2132 b.Fatal("should be valid") 2133 } 2134 } 2135 } 2136 iter.Close() 2137 }) 2138 } 2139 for i := range readers { 2140 for j := range readers[i] { 2141 readers[i][j].Close() 2142 } 2143 } 2144 } 2145 2146 func BenchmarkBlockPropertyFilter(b *testing.B) { 2147 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 2148 for _, matchInterval := range []int{1, 10, 100, 1000} { 2149 b.Run(fmt.Sprintf("match-interval=%d", matchInterval), func(b *testing.B) { 2150 mem := vfs.NewMem() 2151 opts := &Options{ 2152 FS: mem, 2153 FormatMajorVersion: FormatNewest, 2154 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2155 func() BlockPropertyCollector { 2156 return sstable.NewBlockIntervalCollector( 2157 "0", &testBlockIntervalCollector{numLength: 3}, nil, /* range key collector */ 2158 ) 2159 }, 2160 }, 2161 } 2162 d, err := Open("", opts) 2163 require.NoError(b, err) 2164 defer func() { 2165 require.NoError(b, d.Close()) 2166 }() 2167 batch := d.NewBatch() 2168 const numKeys = 20 * 1000 2169 const valueSize = 1000 2170 for i := 0; i < numKeys; i++ { 2171 key := fmt.Sprintf("%06d%03d", i, i%matchInterval) 2172 value := randValue(valueSize, rng) 2173 require.NoError(b, batch.Set([]byte(key), value, nil)) 2174 } 2175 require.NoError(b, batch.Commit(nil)) 2176 require.NoError(b, d.Flush()) 2177 require.NoError(b, d.Compact(nil, []byte{0xFF}, false)) 2178 2179 for _, filter := range []bool{false, true} { 2180 b.Run(fmt.Sprintf("filter=%t", filter), func(b *testing.B) { 2181 var iterOpts IterOptions 2182 if filter { 2183 iterOpts.PointKeyFilters = []BlockPropertyFilter{ 2184 sstable.NewBlockIntervalFilter("0", 2185 uint64(0), uint64(1)), 2186 } 2187 } 2188 iter, _ := d.NewIter(&iterOpts) 2189 b.ResetTimer() 2190 for i := 0; i < b.N; i++ { 2191 valid := iter.First() 2192 for valid { 2193 valid = iter.Next() 2194 } 2195 } 2196 b.StopTimer() 2197 require.NoError(b, iter.Close()) 2198 }) 2199 } 2200 }) 2201 } 2202 } 2203 2204 func TestRangeKeyMaskingRandomized(t *testing.T) { 2205 seed := *seed 2206 if seed == 0 { 2207 seed = uint64(time.Now().UnixNano()) 2208 t.Logf("seed: %d", seed) 2209 } 2210 rng := rand.New(rand.NewSource(seed)) 2211 2212 // Generate keyspace with point keys, and range keys which will 2213 // mask the point keys. 2214 var timestamps []int64 2215 for i := 0; i <= 100; i++ { 2216 timestamps = append(timestamps, rng.Int63n(1000)) 2217 } 2218 2219 ks := testkeys.Alpha(5) 2220 numKeys := 1000 + rng.Intn(9000) 2221 keys := make([][]byte, numKeys) 2222 keyTimeStamps := make([]int64, numKeys) // ts associated with the keys. 2223 for i := 0; i < numKeys; i++ { 2224 keys[i] = make([]byte, 5+testkeys.MaxSuffixLen) 2225 keyTimeStamps[i] = timestamps[rng.Intn(len(timestamps))] 2226 n := testkeys.WriteKeyAt(keys[i], ks, rng.Int63n(ks.Count()), keyTimeStamps[i]) 2227 keys[i] = keys[i][:n] 2228 } 2229 2230 numRangeKeys := rng.Intn(20) 2231 type rkey struct { 2232 start []byte 2233 end []byte 2234 suffix []byte 2235 } 2236 rkeys := make([]rkey, numRangeKeys) 2237 pointKeyHidden := make([]bool, numKeys) 2238 for i := 0; i < numRangeKeys; i++ { 2239 rkeys[i].start = make([]byte, 5) 2240 rkeys[i].end = make([]byte, 5) 2241 2242 testkeys.WriteKey(rkeys[i].start[:5], ks, rng.Int63n(ks.Count())) 2243 testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count())) 2244 2245 for bytes.Equal(rkeys[i].start[:5], rkeys[i].end[:5]) { 2246 testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count())) 2247 } 2248 2249 if bytes.Compare(rkeys[i].start[:5], rkeys[i].end[:5]) > 0 { 2250 rkeys[i].start, rkeys[i].end = rkeys[i].end, rkeys[i].start 2251 } 2252 2253 rkeyTimestamp := timestamps[rng.Intn(len(timestamps))] 2254 rkeys[i].suffix = []byte("@" + strconv.FormatInt(rkeyTimestamp, 10)) 2255 2256 // Each time we create a range key, check if the range key masks any 2257 // point keys. 2258 for j, pkey := range keys { 2259 if pointKeyHidden[j] { 2260 continue 2261 } 2262 2263 if keyTimeStamps[j] >= rkeyTimestamp { 2264 continue 2265 } 2266 2267 if testkeys.Comparer.Compare(pkey, rkeys[i].start) >= 0 && 2268 testkeys.Comparer.Compare(pkey, rkeys[i].end) < 0 { 2269 pointKeyHidden[j] = true 2270 } 2271 } 2272 } 2273 2274 // Define a simple base testOpts, and a randomized testOpts. The results 2275 // of iteration will be compared. 2276 type testOpts struct { 2277 levelOpts []LevelOptions 2278 filter func() BlockPropertyFilterMask 2279 } 2280 2281 baseOpts := testOpts{ 2282 levelOpts: make([]LevelOptions, 7), 2283 } 2284 for i := 0; i < len(baseOpts.levelOpts); i++ { 2285 baseOpts.levelOpts[i].TargetFileSize = 1 2286 baseOpts.levelOpts[i].BlockSize = 1 2287 } 2288 2289 randomOpts := testOpts{ 2290 levelOpts: []LevelOptions{ 2291 { 2292 TargetFileSize: int64(1 + rng.Intn(2<<20)), // Vary the L0 file size. 2293 BlockSize: 1 + rng.Intn(32<<10), 2294 }, 2295 }, 2296 } 2297 if rng.Intn(2) == 0 { 2298 randomOpts.filter = func() BlockPropertyFilterMask { 2299 return sstable.NewTestKeysMaskingFilter() 2300 } 2301 } 2302 2303 maxProcs := runtime.GOMAXPROCS(0) 2304 2305 opts1 := &Options{ 2306 FS: vfs.NewStrictMem(), 2307 Comparer: testkeys.Comparer, 2308 FormatMajorVersion: FormatNewest, 2309 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2310 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2311 sstable.NewTestKeysBlockPropertyCollector, 2312 }, 2313 } 2314 opts1.Levels = baseOpts.levelOpts 2315 d1, err := Open("", opts1) 2316 require.NoError(t, err) 2317 2318 opts2 := &Options{ 2319 FS: vfs.NewStrictMem(), 2320 Comparer: testkeys.Comparer, 2321 FormatMajorVersion: FormatNewest, 2322 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2323 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2324 sstable.NewTestKeysBlockPropertyCollector, 2325 }, 2326 } 2327 opts2.Levels = randomOpts.levelOpts 2328 d2, err := Open("", opts2) 2329 require.NoError(t, err) 2330 2331 defer func() { 2332 if err := d1.Close(); err != nil { 2333 t.Fatal(err) 2334 } 2335 if err := d2.Close(); err != nil { 2336 t.Fatal(err) 2337 } 2338 }() 2339 2340 // Run test 2341 var batch1 *Batch 2342 var batch2 *Batch 2343 const keysPerBatch = 50 2344 for i := 0; i < numKeys; i++ { 2345 if i%keysPerBatch == 0 { 2346 if batch1 != nil { 2347 require.NoError(t, batch1.Commit(nil)) 2348 require.NoError(t, batch2.Commit(nil)) 2349 } 2350 batch1 = d1.NewBatch() 2351 batch2 = d2.NewBatch() 2352 } 2353 require.NoError(t, batch1.Set(keys[i], []byte{1}, nil)) 2354 require.NoError(t, batch2.Set(keys[i], []byte{1}, nil)) 2355 } 2356 2357 for _, rkey := range rkeys { 2358 require.NoError(t, d1.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil)) 2359 require.NoError(t, d2.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil)) 2360 } 2361 2362 // Scan the keyspace 2363 iter1Opts := IterOptions{ 2364 KeyTypes: IterKeyTypePointsAndRanges, 2365 RangeKeyMasking: RangeKeyMasking{ 2366 Suffix: []byte("@1000"), 2367 Filter: baseOpts.filter, 2368 }, 2369 } 2370 2371 iter2Opts := IterOptions{ 2372 KeyTypes: IterKeyTypePointsAndRanges, 2373 RangeKeyMasking: RangeKeyMasking{ 2374 Suffix: []byte("@1000"), 2375 Filter: randomOpts.filter, 2376 }, 2377 } 2378 2379 iter1, _ := d1.NewIter(&iter1Opts) 2380 iter2, _ := d2.NewIter(&iter2Opts) 2381 defer func() { 2382 if err := iter1.Close(); err != nil { 2383 t.Fatal(err) 2384 } 2385 if err := iter2.Close(); err != nil { 2386 t.Fatal(err) 2387 } 2388 }() 2389 2390 for valid1, valid2 := iter1.First(), iter2.First(); valid1 || valid2; valid1, valid2 = iter1.Next(), iter2.Next() { 2391 if valid1 != valid2 { 2392 t.Fatalf("iteration didn't produce identical results") 2393 } 2394 2395 // Confirm exposed range key state is identical. 2396 hasP1, hasR1 := iter1.HasPointAndRange() 2397 hasP2, hasR2 := iter2.HasPointAndRange() 2398 if hasP1 != hasP2 || hasR1 != hasR2 { 2399 t.Fatalf("iteration didn't produce identical results") 2400 } 2401 if hasP1 && !bytes.Equal(iter1.Key(), iter2.Key()) { 2402 t.Fatalf(fmt.Sprintf("iteration didn't produce identical point keys: %s, %s", iter1.Key(), iter2.Key())) 2403 } 2404 if hasR1 { 2405 // Confirm that the range key is the same. 2406 b1, e1 := iter1.RangeBounds() 2407 b2, e2 := iter2.RangeBounds() 2408 if !bytes.Equal(b1, b2) || !bytes.Equal(e1, e2) { 2409 t.Fatalf(fmt.Sprintf( 2410 "iteration didn't produce identical range keys: [%s, %s], [%s, %s]", 2411 b1, e1, b2, e2, 2412 )) 2413 } 2414 2415 } 2416 2417 // Confirm that the returned point key wasn't hidden. 2418 for j, pkey := range keys { 2419 if bytes.Equal(iter1.Key(), pkey) && pointKeyHidden[j] { 2420 t.Fatalf(fmt.Sprintf("hidden point key was exposed %s %d", pkey, keyTimeStamps[j])) 2421 } 2422 } 2423 } 2424 } 2425 2426 // BenchmarkIterator_RangeKeyMasking benchmarks a scan through a keyspace with 2427 // 10,000 random suffixed point keys, and three range keys covering most of the 2428 // keyspace. It varies the suffix of the range keys in subbenchmarks to exercise 2429 // varying amounts of masking. This benchmark does configure a block-property 2430 // filter, allowing for skipping blocks wholly contained within a range key and 2431 // consisting of points all with a suffix lower than the range key's. 2432 func BenchmarkIterator_RangeKeyMasking(b *testing.B) { 2433 const ( 2434 prefixLen = 20 2435 valueSize = 1024 2436 batches = 200 2437 keysPerBatch = 50 2438 ) 2439 var alloc bytealloc.A 2440 rng := rand.New(rand.NewSource(uint64(1658872515083979000))) 2441 keyBuf := make([]byte, prefixLen+testkeys.MaxSuffixLen) 2442 valBuf := make([]byte, valueSize) 2443 2444 mem := vfs.NewStrictMem() 2445 maxProcs := runtime.GOMAXPROCS(0) 2446 opts := &Options{ 2447 FS: mem, 2448 Comparer: testkeys.Comparer, 2449 FormatMajorVersion: FormatNewest, 2450 MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 }, 2451 BlockPropertyCollectors: []func() BlockPropertyCollector{ 2452 sstable.NewTestKeysBlockPropertyCollector, 2453 }, 2454 } 2455 d, err := Open("", opts) 2456 require.NoError(b, err) 2457 2458 keys := make([][]byte, keysPerBatch*batches) 2459 for bi := 0; bi < batches; bi++ { 2460 batch := d.NewBatch() 2461 for k := 0; k < keysPerBatch; k++ { 2462 randStr(keyBuf[:prefixLen], rng) 2463 suffix := rng.Int63n(100) 2464 suffixLen := testkeys.WriteSuffix(keyBuf[prefixLen:], suffix) 2465 randStr(valBuf[:], rng) 2466 2467 var key []byte 2468 alloc, key = alloc.Copy(keyBuf[:prefixLen+suffixLen]) 2469 keys[bi*keysPerBatch+k] = key 2470 require.NoError(b, batch.Set(key, valBuf[:], nil)) 2471 } 2472 require.NoError(b, batch.Commit(nil)) 2473 } 2474 2475 // Wait for compactions to complete before starting benchmarks. We don't 2476 // want to benchmark while compactions are running. 2477 d.mu.Lock() 2478 for d.mu.compact.compactingCount > 0 { 2479 d.mu.compact.cond.Wait() 2480 } 2481 d.mu.Unlock() 2482 b.Log(d.Metrics().String()) 2483 require.NoError(b, d.Close()) 2484 // Set ignore syncs to true so that each subbenchmark may mutate state and 2485 // then revert back to the original state. 2486 mem.SetIgnoreSyncs(true) 2487 2488 // TODO(jackson): Benchmark lazy-combined iteration versus not. 2489 // TODO(jackson): Benchmark seeks. 2490 for _, rkSuffix := range []string{"@10", "@50", "@75", "@100"} { 2491 b.Run(fmt.Sprintf("range-keys-suffixes=%s", rkSuffix), func(b *testing.B) { 2492 d, err := Open("", opts) 2493 require.NoError(b, err) 2494 require.NoError(b, d.RangeKeySet([]byte("b"), []byte("e"), []byte(rkSuffix), nil, nil)) 2495 require.NoError(b, d.RangeKeySet([]byte("f"), []byte("p"), []byte(rkSuffix), nil, nil)) 2496 require.NoError(b, d.RangeKeySet([]byte("q"), []byte("z"), []byte(rkSuffix), nil, nil)) 2497 require.NoError(b, d.Flush()) 2498 2499 // Populate 3 range keys, covering most of the keyspace, at the 2500 // given suffix. 2501 2502 iterOpts := IterOptions{ 2503 KeyTypes: IterKeyTypePointsAndRanges, 2504 RangeKeyMasking: RangeKeyMasking{ 2505 Suffix: []byte("@100"), 2506 Filter: func() BlockPropertyFilterMask { 2507 return sstable.NewTestKeysMaskingFilter() 2508 }, 2509 }, 2510 } 2511 b.Run("forward", func(b *testing.B) { 2512 b.Run("seekprefix", func(b *testing.B) { 2513 b.ResetTimer() 2514 for i := 0; i < b.N; i++ { 2515 iter, _ := d.NewIter(&iterOpts) 2516 count := 0 2517 for j := 0; j < len(keys); j++ { 2518 if !iter.SeekPrefixGE(keys[j]) { 2519 b.Errorf("unable to find %q\n", keys[j]) 2520 } 2521 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2522 count++ 2523 } 2524 } 2525 if err := iter.Close(); err != nil { 2526 b.Fatal(err) 2527 } 2528 } 2529 }) 2530 b.Run("next", func(b *testing.B) { 2531 b.ResetTimer() 2532 for i := 0; i < b.N; i++ { 2533 iter, _ := d.NewIter(&iterOpts) 2534 count := 0 2535 for valid := iter.First(); valid; valid = iter.Next() { 2536 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2537 count++ 2538 } 2539 } 2540 if err := iter.Close(); err != nil { 2541 b.Fatal(err) 2542 } 2543 } 2544 }) 2545 }) 2546 b.Run("backward", func(b *testing.B) { 2547 b.ResetTimer() 2548 for i := 0; i < b.N; i++ { 2549 iter, _ := d.NewIter(&iterOpts) 2550 count := 0 2551 for valid := iter.Last(); valid; valid = iter.Prev() { 2552 if hasPoint, _ := iter.HasPointAndRange(); hasPoint { 2553 count++ 2554 } 2555 } 2556 if err := iter.Close(); err != nil { 2557 b.Fatal(err) 2558 } 2559 } 2560 }) 2561 2562 // Reset the benchmark state at the end of each run to remove the 2563 // range keys we wrote. 2564 b.StopTimer() 2565 require.NoError(b, d.Close()) 2566 mem.ResetToSyncedState() 2567 }) 2568 } 2569 2570 } 2571 2572 func BenchmarkIteratorScan(b *testing.B) { 2573 const maxPrefixLen = 8 2574 keyBuf := make([]byte, maxPrefixLen+testkeys.MaxSuffixLen) 2575 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 2576 2577 for _, keyCount := range []int64{100, 1000, 10000} { 2578 for _, readAmp := range []int{1, 3, 7, 10} { 2579 func() { 2580 opts := &Options{ 2581 FS: vfs.NewMem(), 2582 FormatMajorVersion: FormatNewest, 2583 } 2584 opts.DisableAutomaticCompactions = true 2585 d, err := Open("", opts) 2586 require.NoError(b, err) 2587 defer func() { require.NoError(b, d.Close()) }() 2588 2589 // Take the very large keyspace consisting of alphabetic 2590 // characters of lengths up to `maxPrefixLen` and reduce it down 2591 // to `keyCount` keys by picking every 1 key every `keyCount` keys. 2592 keys := testkeys.Alpha(maxPrefixLen) 2593 keys = keys.EveryN(keys.Count() / keyCount) 2594 if keys.Count() < keyCount { 2595 b.Fatalf("expected %d keys, found %d", keyCount, keys.Count()) 2596 } 2597 2598 // Portion the keys into `readAmp` overlapping key sets. 2599 for _, ks := range testkeys.Divvy(keys, int64(readAmp)) { 2600 batch := d.NewBatch() 2601 for i := int64(0); i < ks.Count(); i++ { 2602 n := testkeys.WriteKeyAt(keyBuf[:], ks, i, rng.Int63n(100)) 2603 batch.Set(keyBuf[:n], keyBuf[:n], nil) 2604 } 2605 require.NoError(b, batch.Commit(nil)) 2606 require.NoError(b, d.Flush()) 2607 } 2608 // Each level is a sublevel. 2609 m := d.Metrics() 2610 require.Equal(b, readAmp, m.ReadAmp()) 2611 2612 for _, keyTypes := range []IterKeyType{IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} { 2613 iterOpts := IterOptions{KeyTypes: keyTypes} 2614 b.Run(fmt.Sprintf("keys=%d,r-amp=%d,key-types=%s", keyCount, readAmp, keyTypes), func(b *testing.B) { 2615 for i := 0; i < b.N; i++ { 2616 b.StartTimer() 2617 iter, _ := d.NewIter(&iterOpts) 2618 valid := iter.First() 2619 for valid { 2620 valid = iter.Next() 2621 } 2622 b.StopTimer() 2623 require.NoError(b, iter.Close()) 2624 } 2625 }) 2626 } 2627 }() 2628 } 2629 } 2630 } 2631 2632 func BenchmarkIteratorScanNextPrefix(b *testing.B) { 2633 setupBench := func( 2634 b *testing.B, maxKeysPerLevel, versCount, readAmp int, enableValueBlocks bool) *DB { 2635 keyBuf := make([]byte, readAmp+testkeys.MaxSuffixLen) 2636 opts := &Options{ 2637 FS: vfs.NewMem(), 2638 Comparer: testkeys.Comparer, 2639 FormatMajorVersion: FormatNewest, 2640 } 2641 opts.DisableAutomaticCompactions = true 2642 opts.Experimental.EnableValueBlocks = func() bool { return enableValueBlocks } 2643 d, err := Open("", opts) 2644 require.NoError(b, err) 2645 2646 // Create `readAmp` levels. Prefixes in the top of the LSM are length 1. 2647 // Prefixes in the bottom of the LSM are length `readAmp`. Eg,: 2648 // 2649 // a b c... 2650 // aa ab ac... 2651 // aaa aab aac... 2652 // 2653 for l := readAmp; l > 0; l-- { 2654 ks := testkeys.Alpha(l) 2655 if step := ks.Count() / int64(maxKeysPerLevel); step > 1 { 2656 ks = ks.EveryN(step) 2657 } 2658 if ks.Count() > int64(maxKeysPerLevel) { 2659 ks = ks.Slice(0, int64(maxKeysPerLevel)) 2660 } 2661 2662 batch := d.NewBatch() 2663 for i := int64(0); i < ks.Count(); i++ { 2664 for v := 0; v < versCount; v++ { 2665 n := testkeys.WriteKeyAt(keyBuf[:], ks, i, int64(versCount-v+1)) 2666 batch.Set(keyBuf[:n], keyBuf[:n], nil) 2667 } 2668 } 2669 require.NoError(b, batch.Commit(nil)) 2670 require.NoError(b, d.Flush()) 2671 } 2672 2673 // Each level is a sublevel. 2674 m := d.Metrics() 2675 require.Equal(b, readAmp, m.ReadAmp()) 2676 return d 2677 } 2678 2679 for _, keysPerLevel := range []int{10, 100, 1000} { 2680 b.Run(fmt.Sprintf("keysPerLevel=%d", keysPerLevel), func(b *testing.B) { 2681 for _, versionCount := range []int{1, 2, 10, 100} { 2682 b.Run(fmt.Sprintf("versions=%d", versionCount), func(b *testing.B) { 2683 for _, readAmp := range []int{1, 3, 7, 10} { 2684 b.Run(fmt.Sprintf("ramp=%d", readAmp), func(b *testing.B) { 2685 for _, enableValueBlocks := range []bool{false, true} { 2686 b.Run(fmt.Sprintf("value-blocks=%t", enableValueBlocks), func(b *testing.B) { 2687 d := setupBench(b, keysPerLevel, versionCount, readAmp, enableValueBlocks) 2688 defer func() { require.NoError(b, d.Close()) }() 2689 for _, keyTypes := range []IterKeyType{ 2690 IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} { 2691 b.Run(fmt.Sprintf("key-types=%s", keyTypes), func(b *testing.B) { 2692 iterOpts := IterOptions{KeyTypes: keyTypes} 2693 iter, _ := d.NewIter(&iterOpts) 2694 var valid bool 2695 b.ResetTimer() 2696 for i := 0; i < b.N; i++ { 2697 if !valid { 2698 valid = iter.First() 2699 if !valid { 2700 b.Fatalf("iter must be valid") 2701 } 2702 } else { 2703 valid = iter.NextPrefix() 2704 } 2705 } 2706 b.StopTimer() 2707 require.NoError(b, iter.Close()) 2708 }) 2709 } 2710 }) 2711 } 2712 }) 2713 } 2714 }) 2715 } 2716 }) 2717 } 2718 } 2719 2720 func BenchmarkCombinedIteratorSeek(b *testing.B) { 2721 for _, withRangeKey := range []bool{false, true} { 2722 b.Run(fmt.Sprintf("range-key=%t", withRangeKey), func(b *testing.B) { 2723 rng := rand.New(rand.NewSource(uint64(1658872515083979000))) 2724 ks := testkeys.Alpha(1) 2725 opts := &Options{ 2726 FS: vfs.NewMem(), 2727 Comparer: testkeys.Comparer, 2728 FormatMajorVersion: FormatNewest, 2729 } 2730 d, err := Open("", opts) 2731 require.NoError(b, err) 2732 defer func() { require.NoError(b, d.Close()) }() 2733 2734 keys := make([][]byte, ks.Count()) 2735 for i := int64(0); i < ks.Count(); i++ { 2736 keys[i] = testkeys.Key(ks, i) 2737 var val [40]byte 2738 rng.Read(val[:]) 2739 require.NoError(b, d.Set(keys[i], val[:], nil)) 2740 } 2741 if withRangeKey { 2742 require.NoError(b, d.RangeKeySet([]byte("a"), []byte{'z', 0x00}, []byte("@5"), nil, nil)) 2743 } 2744 2745 batch := d.NewIndexedBatch() 2746 defer batch.Close() 2747 2748 for _, useBatch := range []bool{false, true} { 2749 b.Run(fmt.Sprintf("batch=%t", useBatch), func(b *testing.B) { 2750 for i := 0; i < b.N; i++ { 2751 iterOpts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 2752 var it *Iterator 2753 if useBatch { 2754 it, _ = batch.NewIter(&iterOpts) 2755 } else { 2756 it, _ = d.NewIter(&iterOpts) 2757 } 2758 for j := 0; j < len(keys); j++ { 2759 if !it.SeekGE(keys[j]) { 2760 b.Errorf("key %q missing", keys[j]) 2761 } 2762 } 2763 require.NoError(b, it.Close()) 2764 } 2765 }) 2766 } 2767 }) 2768 } 2769 } 2770 2771 // BenchmarkCombinedIteratorSeek_Bounded benchmarks a bounded iterator that 2772 // performs repeated seeks over 5% of the middle of a keyspace covered by a 2773 // range key that's fragmented across hundreds of files. The iterator bounds 2774 // should prevent defragmenting beyond the iterator's bounds. 2775 func BenchmarkCombinedIteratorSeek_Bounded(b *testing.B) { 2776 d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000)) 2777 2778 var lower = len(keys) / 2 2779 var upper = len(keys)/2 + len(keys)/20 // 5% 2780 iterOpts := IterOptions{ 2781 KeyTypes: IterKeyTypePointsAndRanges, 2782 LowerBound: keys[lower], 2783 UpperBound: keys[upper], 2784 } 2785 b.ResetTimer() 2786 for i := 0; i < b.N; i++ { 2787 it, _ := d.NewIter(&iterOpts) 2788 for j := lower; j < upper; j++ { 2789 if !it.SeekGE(keys[j]) { 2790 b.Errorf("key %q missing", keys[j]) 2791 } 2792 } 2793 require.NoError(b, it.Close()) 2794 } 2795 } 2796 2797 // BenchmarkCombinedIteratorSeekPrefix benchmarks an iterator that 2798 // performs repeated prefix seeks over 5% of the middle of a keyspace covered by a 2799 // range key that's fragmented across hundreds of files. The seek prefix should 2800 // avoid defragmenting beyond the seek prefixes. 2801 func BenchmarkCombinedIteratorSeekPrefix(b *testing.B) { 2802 d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000)) 2803 2804 var lower = len(keys) / 2 2805 var upper = len(keys)/2 + len(keys)/20 // 5% 2806 iterOpts := IterOptions{ 2807 KeyTypes: IterKeyTypePointsAndRanges, 2808 } 2809 b.ResetTimer() 2810 for i := 0; i < b.N; i++ { 2811 it, _ := d.NewIter(&iterOpts) 2812 for j := lower; j < upper; j++ { 2813 if !it.SeekPrefixGE(keys[j]) { 2814 b.Errorf("key %q missing", keys[j]) 2815 } 2816 } 2817 require.NoError(b, it.Close()) 2818 } 2819 } 2820 2821 func buildFragmentedRangeKey(b testing.TB, seed uint64) (d *DB, keys [][]byte) { 2822 rng := rand.New(rand.NewSource(seed)) 2823 ks := testkeys.Alpha(2) 2824 opts := &Options{ 2825 FS: vfs.NewMem(), 2826 Comparer: testkeys.Comparer, 2827 FormatMajorVersion: FormatNewest, 2828 L0CompactionFileThreshold: 1, 2829 } 2830 opts.EnsureDefaults() 2831 for l := 0; l < len(opts.Levels); l++ { 2832 opts.Levels[l].TargetFileSize = 1 2833 } 2834 var err error 2835 d, err = Open("", opts) 2836 require.NoError(b, err) 2837 2838 keys = make([][]byte, ks.Count()) 2839 for i := int64(0); i < ks.Count(); i++ { 2840 keys[i] = testkeys.Key(ks, i) 2841 } 2842 for i := 0; i < len(keys); i++ { 2843 var val [40]byte 2844 rng.Read(val[:]) 2845 require.NoError(b, d.Set(keys[i], val[:], nil)) 2846 if i < len(keys)-1 { 2847 require.NoError(b, d.RangeKeySet(keys[i], keys[i+1], []byte("@5"), nil, nil)) 2848 } 2849 require.NoError(b, d.Flush()) 2850 } 2851 2852 d.mu.Lock() 2853 for d.mu.compact.compactingCount > 0 { 2854 d.mu.compact.cond.Wait() 2855 } 2856 v := d.mu.versions.currentVersion() 2857 d.mu.Unlock() 2858 require.GreaterOrEqualf(b, v.Levels[numLevels-1].Len(), 2859 700, "expect many (≥700) L6 files but found %d", v.Levels[numLevels-1].Len()) 2860 return d, keys 2861 } 2862 2863 // BenchmarkSeekPrefixTombstones benchmarks a SeekPrefixGE into the beginning of 2864 // a series of sstables containing exclusively range tombstones. Previously, 2865 // such a seek would next through all the tombstone files until it arrived at a 2866 // point key or exhausted the level's files. The SeekPrefixGE should not next 2867 // beyond the files that contain the prefix. 2868 // 2869 // See cockroachdb/cockroach#89327. 2870 func BenchmarkSeekPrefixTombstones(b *testing.B) { 2871 o := (&Options{ 2872 FS: vfs.NewMem(), 2873 Comparer: testkeys.Comparer, 2874 FormatMajorVersion: FormatNewest, 2875 }).EnsureDefaults() 2876 wOpts := o.MakeWriterOptions(numLevels-1, FormatNewest.MaxTableFormat()) 2877 d, err := Open("", o) 2878 require.NoError(b, err) 2879 defer func() { require.NoError(b, d.Close()) }() 2880 2881 // Keep a snapshot open for the duration of the test to prevent elision-only 2882 // compactions from removing the ingested files containing exclusively 2883 // elidable tombstones. 2884 defer d.NewSnapshot().Close() 2885 2886 ks := testkeys.Alpha(2) 2887 for i := int64(0); i < ks.Count()-1; i++ { 2888 func() { 2889 filename := fmt.Sprintf("ext%2d", i) 2890 f, err := o.FS.Create(filename) 2891 require.NoError(b, err) 2892 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), wOpts) 2893 require.NoError(b, w.DeleteRange(testkeys.Key(ks, i), testkeys.Key(ks, i+1))) 2894 require.NoError(b, w.Close()) 2895 require.NoError(b, d.Ingest([]string{filename})) 2896 }() 2897 } 2898 2899 d.mu.Lock() 2900 require.Equal(b, int64(ks.Count()-1), d.mu.versions.metrics.Levels[numLevels-1].NumFiles) 2901 d.mu.Unlock() 2902 2903 seekKey := testkeys.Key(ks, 1) 2904 iter, _ := d.NewIter(nil) 2905 defer iter.Close() 2906 b.ResetTimer() 2907 defer b.StopTimer() 2908 for i := 0; i < b.N; i++ { 2909 iter.SeekPrefixGE(seekKey) 2910 } 2911 }