github.com/cockroachdb/pebble@v1.1.2/iterator_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"flag"
    11  	"fmt"
    12  	"io"
    13  	"runtime"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/datadriven"
    21  	"github.com/cockroachdb/errors"
    22  	"github.com/cockroachdb/pebble/internal/base"
    23  	"github.com/cockroachdb/pebble/internal/bytealloc"
    24  	"github.com/cockroachdb/pebble/internal/invalidating"
    25  	"github.com/cockroachdb/pebble/internal/keyspan"
    26  	"github.com/cockroachdb/pebble/internal/manifest"
    27  	"github.com/cockroachdb/pebble/internal/testkeys"
    28  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    29  	"github.com/cockroachdb/pebble/sstable"
    30  	"github.com/cockroachdb/pebble/vfs"
    31  	"github.com/stretchr/testify/require"
    32  	"golang.org/x/exp/rand"
    33  )
    34  
    35  var testKeyValuePairs = []string{
    36  	"10:10",
    37  	"11:11",
    38  	"12:12",
    39  	"13:13",
    40  	"14:14",
    41  	"15:15",
    42  	"16:16",
    43  	"17:17",
    44  	"18:18",
    45  	"19:19",
    46  }
    47  
    48  type fakeIter struct {
    49  	lower    []byte
    50  	upper    []byte
    51  	keys     []InternalKey
    52  	vals     [][]byte
    53  	index    int
    54  	valid    bool
    55  	closeErr error
    56  }
    57  
    58  // fakeIter implements the base.InternalIterator interface.
    59  var _ base.InternalIterator = (*fakeIter)(nil)
    60  
    61  func fakeIkey(s string) InternalKey {
    62  	j := strings.Index(s, ":")
    63  	seqNum, err := strconv.Atoi(s[j+1:])
    64  	if err != nil {
    65  		panic(err)
    66  	}
    67  	return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet)
    68  }
    69  
    70  func newFakeIterator(closeErr error, keys ...string) *fakeIter {
    71  	ikeys := make([]InternalKey, len(keys))
    72  	for i, k := range keys {
    73  		ikeys[i] = fakeIkey(k)
    74  	}
    75  	return &fakeIter{
    76  		keys:     ikeys,
    77  		index:    0,
    78  		valid:    len(ikeys) > 0,
    79  		closeErr: closeErr,
    80  	}
    81  }
    82  
    83  func (f *fakeIter) String() string {
    84  	return "fake"
    85  }
    86  
    87  func (f *fakeIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
    88  	f.valid = false
    89  	for f.index = 0; f.index < len(f.keys); f.index++ {
    90  		if DefaultComparer.Compare(key, f.key().UserKey) <= 0 {
    91  			if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 {
    92  				return nil, base.LazyValue{}
    93  			}
    94  			f.valid = true
    95  			return f.Key(), f.Value()
    96  		}
    97  	}
    98  	return nil, base.LazyValue{}
    99  }
   100  
   101  func (f *fakeIter) SeekPrefixGE(
   102  	prefix, key []byte, flags base.SeekGEFlags,
   103  ) (*base.InternalKey, base.LazyValue) {
   104  	return f.SeekGE(key, flags)
   105  }
   106  
   107  func (f *fakeIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
   108  	f.valid = false
   109  	for f.index = len(f.keys) - 1; f.index >= 0; f.index-- {
   110  		if DefaultComparer.Compare(key, f.key().UserKey) > 0 {
   111  			if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 {
   112  				return nil, base.LazyValue{}
   113  			}
   114  			f.valid = true
   115  			return f.Key(), f.Value()
   116  		}
   117  	}
   118  	return nil, base.LazyValue{}
   119  }
   120  
   121  func (f *fakeIter) First() (*InternalKey, base.LazyValue) {
   122  	f.valid = false
   123  	f.index = -1
   124  	if key, _ := f.Next(); key == nil {
   125  		return nil, base.LazyValue{}
   126  	}
   127  	if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 {
   128  		return nil, base.LazyValue{}
   129  	}
   130  	f.valid = true
   131  	return f.Key(), f.Value()
   132  }
   133  
   134  func (f *fakeIter) Last() (*InternalKey, base.LazyValue) {
   135  	f.valid = false
   136  	f.index = len(f.keys)
   137  	if key, _ := f.Prev(); key == nil {
   138  		return nil, base.LazyValue{}
   139  	}
   140  	if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 {
   141  		return nil, base.LazyValue{}
   142  	}
   143  	f.valid = true
   144  	return f.Key(), f.Value()
   145  }
   146  
   147  func (f *fakeIter) Next() (*InternalKey, base.LazyValue) {
   148  	f.valid = false
   149  	if f.index == len(f.keys) {
   150  		return nil, base.LazyValue{}
   151  	}
   152  	f.index++
   153  	if f.index == len(f.keys) {
   154  		return nil, base.LazyValue{}
   155  	}
   156  	if f.upper != nil && DefaultComparer.Compare(f.upper, f.key().UserKey) <= 0 {
   157  		return nil, base.LazyValue{}
   158  	}
   159  	f.valid = true
   160  	return f.Key(), f.Value()
   161  }
   162  
   163  func (f *fakeIter) Prev() (*InternalKey, base.LazyValue) {
   164  	f.valid = false
   165  	if f.index < 0 {
   166  		return nil, base.LazyValue{}
   167  	}
   168  	f.index--
   169  	if f.index < 0 {
   170  		return nil, base.LazyValue{}
   171  	}
   172  	if f.lower != nil && DefaultComparer.Compare(f.lower, f.key().UserKey) > 0 {
   173  		return nil, base.LazyValue{}
   174  	}
   175  	f.valid = true
   176  	return f.Key(), f.Value()
   177  }
   178  
   179  func (f *fakeIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) {
   180  	return f.SeekGE(succKey, base.SeekGEFlagsNone)
   181  }
   182  
   183  // key returns the current Key the iterator is positioned at regardless of the
   184  // value of f.valid.
   185  func (f *fakeIter) key() *InternalKey {
   186  	return &f.keys[f.index]
   187  }
   188  
   189  func (f *fakeIter) Key() *InternalKey {
   190  	if f.valid {
   191  		return &f.keys[f.index]
   192  	}
   193  	// It is invalid to call Key() when Valid() returns false. Rather than
   194  	// returning nil here which would technically be more correct, return a
   195  	// non-nil key which is the behavior of some InternalIterator
   196  	// implementations. This provides better testing of users of
   197  	// InternalIterators.
   198  	if f.index < 0 {
   199  		return &f.keys[0]
   200  	}
   201  	return &f.keys[len(f.keys)-1]
   202  }
   203  
   204  func (f *fakeIter) Value() base.LazyValue {
   205  	if f.index >= 0 && f.index < len(f.vals) {
   206  		return base.MakeInPlaceValue(f.vals[f.index])
   207  	}
   208  	return base.LazyValue{}
   209  }
   210  
   211  func (f *fakeIter) Valid() bool {
   212  	return f.index >= 0 && f.index < len(f.keys) && f.valid
   213  }
   214  
   215  func (f *fakeIter) Error() error {
   216  	return f.closeErr
   217  }
   218  
   219  func (f *fakeIter) Close() error {
   220  	return f.closeErr
   221  }
   222  
   223  func (f *fakeIter) SetBounds(lower, upper []byte) {
   224  	f.lower = lower
   225  	f.upper = upper
   226  }
   227  
   228  // testIterator tests creating a combined iterator from a number of sub-
   229  // iterators. newFunc is a constructor function. splitFunc returns a random
   230  // split of the testKeyValuePairs slice such that walking a combined iterator
   231  // over those splits should recover the original key/value pairs in order.
   232  func testIterator(
   233  	t *testing.T,
   234  	newFunc func(...internalIterator) internalIterator,
   235  	splitFunc func(r *rand.Rand) [][]string,
   236  ) {
   237  	// Test pre-determined sub-iterators. The sub-iterators are designed
   238  	// so that the combined key/value pair order is the same whether the
   239  	// combined iterator is concatenating or merging.
   240  	testCases := []struct {
   241  		desc  string
   242  		iters []internalIterator
   243  		want  string
   244  	}{
   245  		{
   246  			"one sub-iterator",
   247  			[]internalIterator{
   248  				newFakeIterator(nil, "e:1", "w:2"),
   249  			},
   250  			"<e:1><w:2>.",
   251  		},
   252  		{
   253  			"two sub-iterators",
   254  			[]internalIterator{
   255  				newFakeIterator(nil, "a0:0"),
   256  				newFakeIterator(nil, "b1:1", "b2:2"),
   257  			},
   258  			"<a0:0><b1:1><b2:2>.",
   259  		},
   260  		{
   261  			"empty sub-iterators",
   262  			[]internalIterator{
   263  				newFakeIterator(nil),
   264  				newFakeIterator(nil),
   265  				newFakeIterator(nil),
   266  			},
   267  			".",
   268  		},
   269  		{
   270  			"sub-iterator errors",
   271  			[]internalIterator{
   272  				newFakeIterator(nil, "a0:0", "a1:1"),
   273  				newFakeIterator(errors.New("the sky is falling"), "b2:2", "b3:3", "b4:4"),
   274  				newFakeIterator(errors.New("run for your lives"), "c5:5", "c6:6"),
   275  			},
   276  			"<a0:0><a1:1><b2:2><b3:3><b4:4>err=the sky is falling",
   277  		},
   278  	}
   279  	for _, tc := range testCases {
   280  		var b bytes.Buffer
   281  		iter := invalidating.NewIter(newFunc(tc.iters...))
   282  		for key, _ := iter.First(); key != nil; key, _ = iter.Next() {
   283  			fmt.Fprintf(&b, "<%s:%d>", key.UserKey, key.SeqNum())
   284  		}
   285  		if err := iter.Close(); err != nil {
   286  			fmt.Fprintf(&b, "err=%v", err)
   287  		} else {
   288  			b.WriteByte('.')
   289  		}
   290  		if got := b.String(); got != tc.want {
   291  			t.Errorf("%s:\ngot  %q\nwant %q", tc.desc, got, tc.want)
   292  		}
   293  	}
   294  
   295  	// Test randomly generated sub-iterators.
   296  	r := rand.New(rand.NewSource(0))
   297  	for i, nBad := 0, 0; i < 1000; i++ {
   298  		bad := false
   299  
   300  		splits := splitFunc(r)
   301  		iters := make([]internalIterator, len(splits))
   302  		for i, split := range splits {
   303  			iters[i] = newFakeIterator(nil, split...)
   304  		}
   305  		iter := newInternalIterAdapter(invalidating.NewIter(newFunc(iters...)))
   306  		iter.First()
   307  
   308  		j := 0
   309  		for ; iter.Valid() && j < len(testKeyValuePairs); j++ {
   310  			got := fmt.Sprintf("%s:%d", iter.Key().UserKey, iter.Key().SeqNum())
   311  			want := testKeyValuePairs[j]
   312  			if got != want {
   313  				bad = true
   314  				t.Errorf("random splits: i=%d, j=%d: got %q, want %q", i, j, got, want)
   315  			}
   316  			iter.Next()
   317  		}
   318  		if iter.Valid() {
   319  			bad = true
   320  			t.Errorf("random splits: i=%d, j=%d: iter was not exhausted", i, j)
   321  		}
   322  		if j != len(testKeyValuePairs) {
   323  			bad = true
   324  			t.Errorf("random splits: i=%d, j=%d: want j=%d", i, j, len(testKeyValuePairs))
   325  			return
   326  		}
   327  		if err := iter.Close(); err != nil {
   328  			bad = true
   329  			t.Errorf("random splits: i=%d, j=%d: %v", i, j, err)
   330  		}
   331  
   332  		if bad {
   333  			nBad++
   334  			if nBad == 10 {
   335  				t.Fatal("random splits: too many errors; stopping")
   336  			}
   337  		}
   338  	}
   339  }
   340  
   341  // deletableSumValueMerger computes the sum of its arguments,
   342  // but transforms a zero sum into a non-existent entry.
   343  type deletableSumValueMerger struct {
   344  	sum int64
   345  }
   346  
   347  func newDeletableSumValueMerger(key, value []byte) (ValueMerger, error) {
   348  	m := &deletableSumValueMerger{}
   349  	return m, m.MergeNewer(value)
   350  }
   351  
   352  func (m *deletableSumValueMerger) parseAndCalculate(value []byte) error {
   353  	v, err := strconv.ParseInt(string(value), 10, 64)
   354  	if err == nil {
   355  		m.sum += v
   356  	}
   357  	return err
   358  }
   359  
   360  func (m *deletableSumValueMerger) MergeNewer(value []byte) error {
   361  	return m.parseAndCalculate(value)
   362  }
   363  
   364  func (m *deletableSumValueMerger) MergeOlder(value []byte) error {
   365  	return m.parseAndCalculate(value)
   366  }
   367  
   368  func (m *deletableSumValueMerger) Finish(includesBase bool) ([]byte, io.Closer, error) {
   369  	if m.sum == 0 {
   370  		return nil, nil, nil
   371  	}
   372  	return []byte(strconv.FormatInt(m.sum, 10)), nil, nil
   373  }
   374  
   375  func (m *deletableSumValueMerger) DeletableFinish(
   376  	includesBase bool,
   377  ) ([]byte, bool, io.Closer, error) {
   378  	value, closer, err := m.Finish(includesBase)
   379  	return value, len(value) == 0, closer, err
   380  }
   381  
   382  func TestIterator(t *testing.T) {
   383  	var merge Merge
   384  	var keys []InternalKey
   385  	var vals [][]byte
   386  
   387  	newIter := func(seqNum uint64, opts IterOptions) *Iterator {
   388  		if merge == nil {
   389  			merge = DefaultMerger.Merge
   390  		}
   391  		wrappedMerge := func(key, value []byte) (ValueMerger, error) {
   392  			if len(key) == 0 {
   393  				t.Fatalf("an empty key is passed into Merge")
   394  			}
   395  			return merge(key, value)
   396  		}
   397  		it := &Iterator{
   398  			opts:     opts,
   399  			comparer: *testkeys.Comparer,
   400  			merge:    wrappedMerge,
   401  		}
   402  		// NB: Use a mergingIter to filter entries newer than seqNum.
   403  		iter := newMergingIter(nil /* logger */, &it.stats.InternalStats, it.cmp, it.split, &fakeIter{
   404  			lower: opts.GetLowerBound(),
   405  			upper: opts.GetUpperBound(),
   406  			keys:  keys,
   407  			vals:  vals,
   408  		})
   409  		iter.snapshot = seqNum
   410  		// NB: This Iterator cannot be cloned since it is not constructed
   411  		// with a readState. It suffices for this test.
   412  		it.iter = invalidating.NewIter(iter)
   413  		return it
   414  	}
   415  
   416  	datadriven.RunTest(t, "testdata/iterator", func(t *testing.T, d *datadriven.TestData) string {
   417  		switch d.Cmd {
   418  		case "define":
   419  			merge = nil
   420  			if arg, ok := d.Arg("merger"); ok && len(arg.Vals[0]) > 0 && arg.Vals[0] == "deletable" {
   421  				merge = newDeletableSumValueMerger
   422  			}
   423  			keys = keys[:0]
   424  			vals = vals[:0]
   425  			for _, key := range strings.Split(d.Input, "\n") {
   426  				j := strings.Index(key, ":")
   427  				keys = append(keys, base.ParseInternalKey(key[:j]))
   428  				vals = append(vals, []byte(key[j+1:]))
   429  			}
   430  			return ""
   431  
   432  		case "iter":
   433  			var seqNum uint64
   434  			var opts IterOptions
   435  			d.MaybeScanArgs(t, "seq", &seqNum)
   436  			var lower, upper string
   437  			if d.MaybeScanArgs(t, "lower", &lower) {
   438  				opts.LowerBound = []byte(lower)
   439  			}
   440  			if d.MaybeScanArgs(t, "upper", &upper) {
   441  				opts.UpperBound = []byte(upper)
   442  			}
   443  
   444  			iter := newIter(seqNum, opts)
   445  			iterOutput := runIterCmd(d, iter, true)
   446  			stats := iter.Stats()
   447  			return fmt.Sprintf("%sstats: %s\n", iterOutput, stats.String())
   448  
   449  		default:
   450  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   451  		}
   452  	})
   453  }
   454  
   455  type minSeqNumPropertyCollector struct {
   456  	minSeqNum uint64
   457  }
   458  
   459  func (c *minSeqNumPropertyCollector) Add(key InternalKey, value []byte) error {
   460  	if c.minSeqNum == 0 || c.minSeqNum > key.SeqNum() {
   461  		c.minSeqNum = key.SeqNum()
   462  	}
   463  	return nil
   464  }
   465  
   466  func (c *minSeqNumPropertyCollector) Finish(userProps map[string]string) error {
   467  	userProps["test.min-seq-num"] = fmt.Sprint(c.minSeqNum)
   468  	return nil
   469  }
   470  
   471  func (c *minSeqNumPropertyCollector) Name() string {
   472  	return "minSeqNumPropertyCollector"
   473  }
   474  
   475  func TestReadSampling(t *testing.T) {
   476  	var d *DB
   477  	defer func() {
   478  		if d != nil {
   479  			require.NoError(t, d.Close())
   480  		}
   481  	}()
   482  
   483  	var iter *Iterator
   484  	defer func() {
   485  		if iter != nil {
   486  			require.NoError(t, iter.Close())
   487  		}
   488  	}()
   489  
   490  	datadriven.RunTest(t, "testdata/iterator_read_sampling", func(t *testing.T, td *datadriven.TestData) string {
   491  		switch td.Cmd {
   492  		case "define":
   493  			if iter != nil {
   494  				if err := iter.Close(); err != nil {
   495  					return err.Error()
   496  				}
   497  			}
   498  			if d != nil {
   499  				if err := d.Close(); err != nil {
   500  					return err.Error()
   501  				}
   502  			}
   503  
   504  			opts := &Options{}
   505  			opts.TablePropertyCollectors = append(opts.TablePropertyCollectors,
   506  				func() TablePropertyCollector {
   507  					return &minSeqNumPropertyCollector{}
   508  				})
   509  
   510  			var err error
   511  			if d, err = runDBDefineCmd(td, opts); err != nil {
   512  				return err.Error()
   513  			}
   514  
   515  			d.mu.Lock()
   516  			// Disable the "dynamic base level" code for this test.
   517  			// d.mu.versions.picker.forceBaseLevel1()
   518  			s := d.mu.versions.currentVersion().String()
   519  			d.mu.Unlock()
   520  			return s
   521  
   522  		case "set":
   523  			if d == nil {
   524  				return fmt.Sprintf("%s: db is not defined", td.Cmd)
   525  			}
   526  
   527  			var allowedSeeks int64
   528  			td.ScanArgs(t, "allowed-seeks", &allowedSeeks)
   529  
   530  			d.mu.Lock()
   531  			for _, l := range d.mu.versions.currentVersion().Levels {
   532  				l.Slice().Each(func(f *fileMetadata) {
   533  					f.AllowedSeeks.Store(allowedSeeks)
   534  				})
   535  			}
   536  			d.mu.Unlock()
   537  			return ""
   538  
   539  		case "show":
   540  			if d == nil {
   541  				return fmt.Sprintf("%s: db is not defined", td.Cmd)
   542  			}
   543  
   544  			var fileNum int64
   545  			for _, arg := range td.CmdArgs {
   546  				if len(arg.Vals) != 2 {
   547  					return fmt.Sprintf("%s: %s=<value>", td.Cmd, arg.Key)
   548  				}
   549  				switch arg.Key {
   550  				case "allowed-seeks":
   551  					var err error
   552  					fileNum, err = strconv.ParseInt(arg.Vals[0], 10, 64)
   553  					if err != nil {
   554  						return err.Error()
   555  					}
   556  				}
   557  			}
   558  
   559  			var foundAllowedSeeks int64 = -1
   560  			d.mu.Lock()
   561  			for _, l := range d.mu.versions.currentVersion().Levels {
   562  				l.Slice().Each(func(f *fileMetadata) {
   563  					if f.FileNum == base.FileNum(fileNum) {
   564  						actualAllowedSeeks := f.AllowedSeeks.Load()
   565  						foundAllowedSeeks = actualAllowedSeeks
   566  					}
   567  				})
   568  			}
   569  			d.mu.Unlock()
   570  
   571  			if foundAllowedSeeks == -1 {
   572  				return fmt.Sprintf("invalid file num: %d", fileNum)
   573  			}
   574  			return fmt.Sprintf("%d", foundAllowedSeeks)
   575  
   576  		case "iter":
   577  			if iter == nil || iter.iter == nil {
   578  				// TODO(peter): runDBDefineCmd doesn't properly update the visible
   579  				// sequence number. So we have to use a snapshot with a very large
   580  				// sequence number, otherwise the DB appears empty.
   581  				snap := Snapshot{
   582  					db:     d,
   583  					seqNum: InternalKeySeqNumMax,
   584  				}
   585  				iter, _ = snap.NewIter(nil)
   586  				iter.readSampling.forceReadSampling = true
   587  			}
   588  			return runIterCmd(td, iter, false)
   589  
   590  		case "read-compactions":
   591  			if d == nil {
   592  				return fmt.Sprintf("%s: db is not defined", td.Cmd)
   593  			}
   594  
   595  			d.mu.Lock()
   596  			var sb strings.Builder
   597  			if d.mu.compact.readCompactions.size == 0 {
   598  				sb.WriteString("(none)")
   599  			}
   600  			for i := 0; i < d.mu.compact.readCompactions.size; i++ {
   601  				rc := d.mu.compact.readCompactions.at(i)
   602  				sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end)))
   603  			}
   604  			d.mu.Unlock()
   605  			return sb.String()
   606  
   607  		case "iter-read-compactions":
   608  			if iter == nil {
   609  				return fmt.Sprintf("%s: iter is not defined", td.Cmd)
   610  			}
   611  
   612  			var sb strings.Builder
   613  			if iter.readSampling.pendingCompactions.size == 0 {
   614  				sb.WriteString("(none)")
   615  			}
   616  			for i := 0; i < iter.readSampling.pendingCompactions.size; i++ {
   617  				rc := iter.readSampling.pendingCompactions.at(i)
   618  				sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end)))
   619  			}
   620  			return sb.String()
   621  
   622  		case "close-iter":
   623  			if iter != nil {
   624  				if err := iter.Close(); err != nil {
   625  					return err.Error()
   626  				}
   627  			}
   628  			return ""
   629  
   630  		default:
   631  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   632  		}
   633  	})
   634  }
   635  
   636  func TestIteratorTableFilter(t *testing.T) {
   637  	var d *DB
   638  	defer func() {
   639  		if d != nil {
   640  			require.NoError(t, d.Close())
   641  		}
   642  	}()
   643  
   644  	datadriven.RunTest(t, "testdata/iterator_table_filter", func(t *testing.T, td *datadriven.TestData) string {
   645  		switch td.Cmd {
   646  		case "define":
   647  			if d != nil {
   648  				if err := d.Close(); err != nil {
   649  					return err.Error()
   650  				}
   651  			}
   652  
   653  			opts := &Options{}
   654  			opts.TablePropertyCollectors = append(opts.TablePropertyCollectors,
   655  				func() TablePropertyCollector {
   656  					return &minSeqNumPropertyCollector{}
   657  				})
   658  
   659  			var err error
   660  			if d, err = runDBDefineCmd(td, opts); err != nil {
   661  				return err.Error()
   662  			}
   663  
   664  			d.mu.Lock()
   665  			// Disable the "dynamic base level" code for this test.
   666  			d.mu.versions.picker.forceBaseLevel1()
   667  			s := d.mu.versions.currentVersion().String()
   668  			d.mu.Unlock()
   669  			return s
   670  
   671  		case "iter":
   672  			// We're using an iterator table filter to approximate what is done by
   673  			// snapshots.
   674  			iterOpts := &IterOptions{}
   675  			var filterSeqNum uint64
   676  			if td.MaybeScanArgs(t, "filter", &filterSeqNum) {
   677  				iterOpts.TableFilter = func(userProps map[string]string) bool {
   678  					minSeqNum, err := strconv.ParseUint(userProps["test.min-seq-num"], 10, 64)
   679  					if err != nil {
   680  						return true
   681  					}
   682  					return minSeqNum < filterSeqNum
   683  				}
   684  			}
   685  
   686  			// TODO(peter): runDBDefineCmd doesn't properly update the visible
   687  			// sequence number. So we have to use a snapshot with a very large
   688  			// sequence number, otherwise the DB appears empty.
   689  			snap := Snapshot{
   690  				db:     d,
   691  				seqNum: InternalKeySeqNumMax,
   692  			}
   693  			iter, _ := snap.NewIter(iterOpts)
   694  			return runIterCmd(td, iter, true)
   695  
   696  		default:
   697  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   698  		}
   699  	})
   700  }
   701  
   702  func TestIteratorNextPrev(t *testing.T) {
   703  	var mem vfs.FS
   704  	var d *DB
   705  	defer func() {
   706  		require.NoError(t, d.Close())
   707  	}()
   708  
   709  	reset := func() {
   710  		if d != nil {
   711  			require.NoError(t, d.Close())
   712  		}
   713  
   714  		mem = vfs.NewMem()
   715  		require.NoError(t, mem.MkdirAll("ext", 0755))
   716  		opts := &Options{FS: mem}
   717  		// Automatic compactions may compact away tombstones from L6, making
   718  		// some testcases non-deterministic.
   719  		opts.DisableAutomaticCompactions = true
   720  		var err error
   721  		d, err = Open("", opts)
   722  		require.NoError(t, err)
   723  	}
   724  	reset()
   725  
   726  	datadriven.RunTest(t, "testdata/iterator_next_prev", func(t *testing.T, td *datadriven.TestData) string {
   727  		switch td.Cmd {
   728  		case "reset":
   729  			reset()
   730  			return ""
   731  
   732  		case "build":
   733  			if err := runBuildCmd(td, d, mem); err != nil {
   734  				return err.Error()
   735  			}
   736  			return ""
   737  
   738  		case "ingest":
   739  			if err := runIngestCmd(td, d, mem); err != nil {
   740  				return err.Error()
   741  			}
   742  			return runLSMCmd(td, d)
   743  
   744  		case "iter":
   745  			snap := Snapshot{
   746  				db:     d,
   747  				seqNum: InternalKeySeqNumMax,
   748  			}
   749  			td.MaybeScanArgs(t, "seq", &snap.seqNum)
   750  			iter, _ := snap.NewIter(nil)
   751  			return runIterCmd(td, iter, true)
   752  
   753  		default:
   754  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   755  		}
   756  	})
   757  }
   758  
   759  func TestIteratorStats(t *testing.T) {
   760  	var mem vfs.FS
   761  	var d *DB
   762  	defer func() {
   763  		require.NoError(t, d.Close())
   764  	}()
   765  
   766  	reset := func() {
   767  		if d != nil {
   768  			require.NoError(t, d.Close())
   769  		}
   770  
   771  		mem = vfs.NewMem()
   772  		require.NoError(t, mem.MkdirAll("ext", 0755))
   773  		opts := &Options{Comparer: testkeys.Comparer, FS: mem, FormatMajorVersion: internalFormatNewest}
   774  		// Automatic compactions may make some testcases non-deterministic.
   775  		opts.DisableAutomaticCompactions = true
   776  		var err error
   777  		d, err = Open("", opts)
   778  		require.NoError(t, err)
   779  	}
   780  	reset()
   781  
   782  	datadriven.RunTest(t, "testdata/iterator_stats", func(t *testing.T, td *datadriven.TestData) string {
   783  		switch td.Cmd {
   784  		case "reset":
   785  			reset()
   786  			return ""
   787  
   788  		case "build":
   789  			if err := runBuildCmd(td, d, mem); err != nil {
   790  				return err.Error()
   791  			}
   792  			return ""
   793  
   794  		case "ingest":
   795  			if err := runIngestCmd(td, d, mem); err != nil {
   796  				return err.Error()
   797  			}
   798  			return runLSMCmd(td, d)
   799  
   800  		case "iter":
   801  			snap := Snapshot{
   802  				db:     d,
   803  				seqNum: InternalKeySeqNumMax,
   804  			}
   805  			td.MaybeScanArgs(t, "seq", &snap.seqNum)
   806  			iter, _ := snap.NewIter(nil)
   807  			return runIterCmd(td, iter, true)
   808  
   809  		default:
   810  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   811  		}
   812  	})
   813  }
   814  
   815  type iterSeekOptWrapper struct {
   816  	internalIterator
   817  
   818  	seekGEUsingNext, seekPrefixGEUsingNext *int
   819  }
   820  
   821  func (i *iterSeekOptWrapper) SeekGE(
   822  	key []byte, flags base.SeekGEFlags,
   823  ) (*InternalKey, base.LazyValue) {
   824  	if flags.TrySeekUsingNext() {
   825  		*i.seekGEUsingNext++
   826  	}
   827  	return i.internalIterator.SeekGE(key, flags)
   828  }
   829  
   830  func (i *iterSeekOptWrapper) SeekPrefixGE(
   831  	prefix, key []byte, flags base.SeekGEFlags,
   832  ) (*InternalKey, base.LazyValue) {
   833  	if flags.TrySeekUsingNext() {
   834  		*i.seekPrefixGEUsingNext++
   835  	}
   836  	return i.internalIterator.SeekPrefixGE(prefix, key, flags)
   837  }
   838  
   839  func TestIteratorSeekOpt(t *testing.T) {
   840  	var d *DB
   841  	defer func() {
   842  		require.NoError(t, d.Close())
   843  	}()
   844  	var iter *Iterator
   845  	defer func() {
   846  		if iter != nil {
   847  			require.NoError(t, iter.Close())
   848  		}
   849  	}()
   850  	var seekGEUsingNext, seekPrefixGEUsingNext int
   851  
   852  	datadriven.RunTest(t, "testdata/iterator_seek_opt", func(t *testing.T, td *datadriven.TestData) string {
   853  		switch td.Cmd {
   854  		case "define":
   855  			if iter != nil {
   856  				if err := iter.Close(); err != nil {
   857  					return err.Error()
   858  				}
   859  			}
   860  			if d != nil {
   861  				if err := d.Close(); err != nil {
   862  					return err.Error()
   863  				}
   864  			}
   865  			seekGEUsingNext = 0
   866  			seekPrefixGEUsingNext = 0
   867  
   868  			opts := &Options{}
   869  			opts.TablePropertyCollectors = append(opts.TablePropertyCollectors,
   870  				func() TablePropertyCollector {
   871  					return &minSeqNumPropertyCollector{}
   872  				})
   873  
   874  			var err error
   875  			if d, err = runDBDefineCmd(td, opts); err != nil {
   876  				return err.Error()
   877  			}
   878  
   879  			d.mu.Lock()
   880  			s := d.mu.versions.currentVersion().String()
   881  			d.mu.Unlock()
   882  			oldNewIters := d.newIters
   883  			d.newIters = func(
   884  				ctx context.Context, file *manifest.FileMetadata, opts *IterOptions,
   885  				internalOpts internalIterOpts) (internalIterator, keyspan.FragmentIterator, error) {
   886  				iter, rangeIter, err := oldNewIters(ctx, file, opts, internalOpts)
   887  				iterWrapped := &iterSeekOptWrapper{
   888  					internalIterator:      iter,
   889  					seekGEUsingNext:       &seekGEUsingNext,
   890  					seekPrefixGEUsingNext: &seekPrefixGEUsingNext,
   891  				}
   892  				return iterWrapped, rangeIter, err
   893  			}
   894  			return s
   895  
   896  		case "iter":
   897  			if iter == nil || iter.iter == nil {
   898  				// TODO(peter): runDBDefineCmd doesn't properly update the visible
   899  				// sequence number. So we have to use a snapshot with a very large
   900  				// sequence number, otherwise the DB appears empty.
   901  				snap := Snapshot{
   902  					db:     d,
   903  					seqNum: InternalKeySeqNumMax,
   904  				}
   905  				iter, _ = snap.NewIter(nil)
   906  				iter.readSampling.forceReadSampling = true
   907  				iter.comparer.Split = func(a []byte) int { return len(a) }
   908  				iter.forceEnableSeekOpt = true
   909  				iter.merging.forceEnableSeekOpt = true
   910  			}
   911  			iterOutput := runIterCmd(td, iter, false)
   912  			stats := iter.Stats()
   913  			// InternalStats are non-deterministic since they depend on how data is
   914  			// distributed across memtables and sstables in the DB.
   915  			stats.InternalStats = InternalIteratorStats{}
   916  			var builder strings.Builder
   917  			fmt.Fprintf(&builder, "%sstats: %s\n", iterOutput, stats.String())
   918  			fmt.Fprintf(&builder, "SeekGEs with trySeekUsingNext: %d\n", seekGEUsingNext)
   919  			fmt.Fprintf(&builder, "SeekPrefixGEs with trySeekUsingNext: %d\n", seekPrefixGEUsingNext)
   920  			return builder.String()
   921  
   922  		default:
   923  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   924  		}
   925  	})
   926  }
   927  
   928  type errorSeekIter struct {
   929  	internalIterator
   930  	// Fields controlling error injection for seeks.
   931  	injectSeekErrorCounts []int
   932  	seekCount             int
   933  	err                   error
   934  }
   935  
   936  func (i *errorSeekIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) {
   937  	if i.tryInjectError() {
   938  		return nil, base.LazyValue{}
   939  	}
   940  	i.err = nil
   941  	i.seekCount++
   942  	return i.internalIterator.SeekGE(key, flags)
   943  }
   944  
   945  func (i *errorSeekIter) SeekPrefixGE(
   946  	prefix, key []byte, flags base.SeekGEFlags,
   947  ) (*InternalKey, base.LazyValue) {
   948  	if i.tryInjectError() {
   949  		return nil, base.LazyValue{}
   950  	}
   951  	i.err = nil
   952  	i.seekCount++
   953  	return i.internalIterator.SeekPrefixGE(prefix, key, flags)
   954  }
   955  
   956  func (i *errorSeekIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) {
   957  	if i.tryInjectError() {
   958  		return nil, base.LazyValue{}
   959  	}
   960  	i.err = nil
   961  	i.seekCount++
   962  	return i.internalIterator.SeekLT(key, flags)
   963  }
   964  
   965  func (i *errorSeekIter) tryInjectError() bool {
   966  	if len(i.injectSeekErrorCounts) > 0 && i.injectSeekErrorCounts[0] == i.seekCount {
   967  		i.seekCount++
   968  		i.err = errors.Errorf("injecting error")
   969  		i.injectSeekErrorCounts = i.injectSeekErrorCounts[1:]
   970  		return true
   971  	}
   972  	return false
   973  }
   974  
   975  func (i *errorSeekIter) First() (*InternalKey, base.LazyValue) {
   976  	i.err = nil
   977  	return i.internalIterator.First()
   978  }
   979  
   980  func (i *errorSeekIter) Last() (*InternalKey, base.LazyValue) {
   981  	i.err = nil
   982  	return i.internalIterator.Last()
   983  }
   984  
   985  func (i *errorSeekIter) Next() (*InternalKey, base.LazyValue) {
   986  	if i.err != nil {
   987  		return nil, base.LazyValue{}
   988  	}
   989  	return i.internalIterator.Next()
   990  }
   991  
   992  func (i *errorSeekIter) Prev() (*InternalKey, base.LazyValue) {
   993  	if i.err != nil {
   994  		return nil, base.LazyValue{}
   995  	}
   996  	return i.internalIterator.Prev()
   997  }
   998  
   999  func (i *errorSeekIter) Error() error {
  1000  	if i.err != nil {
  1001  		return i.err
  1002  	}
  1003  	return i.internalIterator.Error()
  1004  }
  1005  
  1006  func TestIteratorSeekOptErrors(t *testing.T) {
  1007  	var keys []InternalKey
  1008  	var vals [][]byte
  1009  
  1010  	var errorIter errorSeekIter
  1011  	newIter := func(opts IterOptions) *Iterator {
  1012  		iter := &fakeIter{
  1013  			lower: opts.GetLowerBound(),
  1014  			upper: opts.GetUpperBound(),
  1015  			keys:  keys,
  1016  			vals:  vals,
  1017  		}
  1018  		errorIter = errorSeekIter{internalIterator: invalidating.NewIter(iter)}
  1019  		// NB: This Iterator cannot be cloned since it is not constructed
  1020  		// with a readState. It suffices for this test.
  1021  		return &Iterator{
  1022  			opts:     opts,
  1023  			comparer: *testkeys.Comparer,
  1024  			merge:    DefaultMerger.Merge,
  1025  			iter:     &errorIter,
  1026  		}
  1027  	}
  1028  
  1029  	datadriven.RunTest(t, "testdata/iterator_seek_opt_errors", func(t *testing.T, d *datadriven.TestData) string {
  1030  		switch d.Cmd {
  1031  		case "define":
  1032  			keys = keys[:0]
  1033  			vals = vals[:0]
  1034  			for _, key := range strings.Split(d.Input, "\n") {
  1035  				j := strings.Index(key, ":")
  1036  				keys = append(keys, base.ParseInternalKey(key[:j]))
  1037  				vals = append(vals, []byte(key[j+1:]))
  1038  			}
  1039  			return ""
  1040  
  1041  		case "iter":
  1042  			var opts IterOptions
  1043  			var injectSeekGEErrorCounts []int
  1044  			for _, arg := range d.CmdArgs {
  1045  				if len(arg.Vals) < 1 {
  1046  					return fmt.Sprintf("%s: %s=<value>", d.Cmd, arg.Key)
  1047  				}
  1048  				switch arg.Key {
  1049  				case "lower":
  1050  					opts.LowerBound = []byte(arg.Vals[0])
  1051  				case "upper":
  1052  					opts.UpperBound = []byte(arg.Vals[0])
  1053  				case "seek-error":
  1054  					for i := 0; i < len(arg.Vals); i++ {
  1055  						n, err := strconv.Atoi(arg.Vals[i])
  1056  						if err != nil {
  1057  							return err.Error()
  1058  						}
  1059  						injectSeekGEErrorCounts = append(injectSeekGEErrorCounts, n)
  1060  					}
  1061  				default:
  1062  					return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key)
  1063  				}
  1064  			}
  1065  
  1066  			iter := newIter(opts)
  1067  			errorIter.injectSeekErrorCounts = injectSeekGEErrorCounts
  1068  			return runIterCmd(d, iter, true)
  1069  
  1070  		default:
  1071  			return fmt.Sprintf("unknown command: %s", d.Cmd)
  1072  		}
  1073  	})
  1074  }
  1075  
  1076  type testBlockIntervalCollector struct {
  1077  	numLength     int
  1078  	offsetFromEnd int
  1079  	initialized   bool
  1080  	lower, upper  uint64
  1081  }
  1082  
  1083  func (bi *testBlockIntervalCollector) Add(key InternalKey, value []byte) error {
  1084  	k := key.UserKey
  1085  	if len(k) < bi.numLength+bi.offsetFromEnd {
  1086  		return nil
  1087  	}
  1088  	n := len(k) - bi.offsetFromEnd - bi.numLength
  1089  	val, err := strconv.Atoi(string(k[n : n+bi.numLength]))
  1090  	if err != nil {
  1091  		return err
  1092  	}
  1093  	if val < 0 {
  1094  		panic("testBlockIntervalCollector expects values >= 0")
  1095  	}
  1096  	uval := uint64(val)
  1097  	if !bi.initialized {
  1098  		bi.lower, bi.upper = uval, uval+1
  1099  		bi.initialized = true
  1100  		return nil
  1101  	}
  1102  	if bi.lower > uval {
  1103  		bi.lower = uval
  1104  	}
  1105  	if uval >= bi.upper {
  1106  		bi.upper = uval + 1
  1107  	}
  1108  	return nil
  1109  }
  1110  
  1111  func (bi *testBlockIntervalCollector) FinishDataBlock() (lower uint64, upper uint64, err error) {
  1112  	bi.initialized = false
  1113  	l, u := bi.lower, bi.upper
  1114  	bi.lower, bi.upper = 0, 0
  1115  	return l, u, nil
  1116  }
  1117  
  1118  func TestIteratorBlockIntervalFilter(t *testing.T) {
  1119  	var mem vfs.FS
  1120  	var d *DB
  1121  	defer func() {
  1122  		require.NoError(t, d.Close())
  1123  	}()
  1124  
  1125  	type collector struct {
  1126  		id     uint16
  1127  		offset int
  1128  	}
  1129  	createDB := func(collectors []collector) {
  1130  		if d != nil {
  1131  			require.NoError(t, d.Close())
  1132  		}
  1133  
  1134  		mem = vfs.NewMem()
  1135  		require.NoError(t, mem.MkdirAll("ext", 0755))
  1136  
  1137  		var bpCollectors []func() BlockPropertyCollector
  1138  		for _, c := range collectors {
  1139  			coll := c
  1140  			bpCollectors = append(bpCollectors, func() BlockPropertyCollector {
  1141  				return sstable.NewBlockIntervalCollector(
  1142  					fmt.Sprintf("%d", coll.id),
  1143  					&testBlockIntervalCollector{numLength: 2, offsetFromEnd: coll.offset},
  1144  					nil, /* range key collector */
  1145  				)
  1146  			})
  1147  		}
  1148  		opts := &Options{
  1149  			FS:                      mem,
  1150  			FormatMajorVersion:      internalFormatNewest,
  1151  			BlockPropertyCollectors: bpCollectors,
  1152  		}
  1153  		lo := LevelOptions{BlockSize: 1, IndexBlockSize: 1}
  1154  		opts.Levels = append(opts.Levels, lo)
  1155  
  1156  		// Automatic compactions may compact away tombstones from L6, making
  1157  		// some testcases non-deterministic.
  1158  		opts.DisableAutomaticCompactions = true
  1159  		var err error
  1160  		d, err = Open("", opts)
  1161  		require.NoError(t, err)
  1162  	}
  1163  
  1164  	datadriven.RunTest(
  1165  		t, "testdata/iterator_block_interval_filter", func(t *testing.T, td *datadriven.TestData) string {
  1166  			switch td.Cmd {
  1167  			case "build":
  1168  				var collectors []collector
  1169  				for _, arg := range td.CmdArgs {
  1170  					switch arg.Key {
  1171  					case "id_offset":
  1172  						if len(arg.Vals) != 2 {
  1173  							return "id and offset not provided"
  1174  						}
  1175  						var id, offset int
  1176  						var err error
  1177  						if id, err = strconv.Atoi(arg.Vals[0]); err != nil {
  1178  							return err.Error()
  1179  						}
  1180  						if offset, err = strconv.Atoi(arg.Vals[1]); err != nil {
  1181  							return err.Error()
  1182  						}
  1183  						collectors = append(collectors, collector{id: uint16(id), offset: offset})
  1184  					default:
  1185  						return fmt.Sprintf("unknown key: %s", arg.Key)
  1186  					}
  1187  				}
  1188  				createDB(collectors)
  1189  				b := d.NewBatch()
  1190  				if err := runBatchDefineCmd(td, b); err != nil {
  1191  					return err.Error()
  1192  				}
  1193  				if err := b.Commit(nil); err != nil {
  1194  					return err.Error()
  1195  				}
  1196  				if err := d.Flush(); err != nil {
  1197  					return err.Error()
  1198  				}
  1199  				return runLSMCmd(td, d)
  1200  
  1201  			case "iter":
  1202  				var opts IterOptions
  1203  				for _, arg := range td.CmdArgs {
  1204  					switch arg.Key {
  1205  					case "id_lower_upper":
  1206  						if len(arg.Vals) != 3 {
  1207  							return "id, lower, upper not provided"
  1208  						}
  1209  						var id, lower, upper int
  1210  						var err error
  1211  						if id, err = strconv.Atoi(arg.Vals[0]); err != nil {
  1212  							return err.Error()
  1213  						}
  1214  						if lower, err = strconv.Atoi(arg.Vals[1]); err != nil {
  1215  							return err.Error()
  1216  						}
  1217  						if upper, err = strconv.Atoi(arg.Vals[2]); err != nil {
  1218  							return err.Error()
  1219  						}
  1220  						opts.PointKeyFilters = append(opts.PointKeyFilters,
  1221  							sstable.NewBlockIntervalFilter(fmt.Sprintf("%d", id),
  1222  								uint64(lower), uint64(upper)))
  1223  					default:
  1224  						return fmt.Sprintf("unknown key: %s", arg.Key)
  1225  					}
  1226  				}
  1227  				rand.Shuffle(len(opts.PointKeyFilters), func(i, j int) {
  1228  					opts.PointKeyFilters[i], opts.PointKeyFilters[j] =
  1229  						opts.PointKeyFilters[j], opts.PointKeyFilters[i]
  1230  				})
  1231  				iter, _ := d.NewIter(&opts)
  1232  				return runIterCmd(td, iter, true)
  1233  
  1234  			default:
  1235  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  1236  			}
  1237  		})
  1238  }
  1239  
  1240  var seed = flag.Uint64("seed", 0, "a pseudorandom number generator seed")
  1241  
  1242  func randStr(fill []byte, rng *rand.Rand) {
  1243  	const letters = "abcdefghijklmnopqrstuvwxyz"
  1244  	const lettersLen = len(letters)
  1245  	for i := 0; i < len(fill); i++ {
  1246  		fill[i] = letters[rng.Intn(lettersLen)]
  1247  	}
  1248  }
  1249  
  1250  func randValue(n int, rng *rand.Rand) []byte {
  1251  	buf := make([]byte, n)
  1252  	randStr(buf, rng)
  1253  	return buf
  1254  }
  1255  
  1256  func randKey(n int, rng *rand.Rand) ([]byte, int) {
  1257  	keyPrefix := randValue(n, rng)
  1258  	suffix := rng.Intn(100)
  1259  	return append(keyPrefix, []byte(fmt.Sprintf("%02d", suffix))...), suffix
  1260  }
  1261  
  1262  func TestIteratorRandomizedBlockIntervalFilter(t *testing.T) {
  1263  	mem := vfs.NewMem()
  1264  	opts := &Options{
  1265  		FS:                 mem,
  1266  		FormatMajorVersion: internalFormatNewest,
  1267  		BlockPropertyCollectors: []func() BlockPropertyCollector{
  1268  			func() BlockPropertyCollector {
  1269  				return sstable.NewBlockIntervalCollector(
  1270  					"0", &testBlockIntervalCollector{numLength: 2}, nil, /* range key collector */
  1271  				)
  1272  			},
  1273  		},
  1274  	}
  1275  	seed := *seed
  1276  	if seed == 0 {
  1277  		seed = uint64(time.Now().UnixNano())
  1278  		t.Logf("seed: %d", seed)
  1279  	}
  1280  	rng := rand.New(rand.NewSource(seed))
  1281  	opts.FlushSplitBytes = 1 << rng.Intn(8)            // 1B - 256B
  1282  	opts.L0CompactionThreshold = 1 << rng.Intn(2)      // 1-2
  1283  	opts.L0CompactionFileThreshold = 1 << rng.Intn(11) // 1-1024
  1284  	opts.LBaseMaxBytes = 1 << rng.Intn(11)             // 1B - 1KB
  1285  	opts.MemTableSize = 2 << 10                        // 2KB
  1286  	var lopts LevelOptions
  1287  	lopts.BlockSize = 1 << rng.Intn(8)      // 1B - 256B
  1288  	lopts.IndexBlockSize = 1 << rng.Intn(8) // 1B - 256B
  1289  	opts.Levels = []LevelOptions{lopts}
  1290  
  1291  	d, err := Open("", opts)
  1292  	require.NoError(t, err)
  1293  	defer func() {
  1294  		require.NoError(t, d.Close())
  1295  	}()
  1296  	matchingKeyValues := make(map[string]string)
  1297  	lower := rng.Intn(100)
  1298  	upper := rng.Intn(100)
  1299  	if lower > upper {
  1300  		lower, upper = upper, lower
  1301  	}
  1302  	n := 2000
  1303  	for i := 0; i < n; i++ {
  1304  		key, suffix := randKey(20+rng.Intn(5), rng)
  1305  		value := randValue(50, rng)
  1306  		if lower <= suffix && suffix < upper {
  1307  			matchingKeyValues[string(key)] = string(value)
  1308  		}
  1309  		d.Set(key, value, nil)
  1310  	}
  1311  
  1312  	var iterOpts IterOptions
  1313  	iterOpts.PointKeyFilters = []BlockPropertyFilter{
  1314  		sstable.NewBlockIntervalFilter("0",
  1315  			uint64(lower), uint64(upper)),
  1316  	}
  1317  	iter, _ := d.NewIter(&iterOpts)
  1318  	defer func() {
  1319  		require.NoError(t, iter.Close())
  1320  	}()
  1321  	iter.First()
  1322  	found := 0
  1323  	matchingCount := len(matchingKeyValues)
  1324  	for ; iter.Valid(); iter.Next() {
  1325  		found++
  1326  		key := string(iter.Key())
  1327  		value, ok := matchingKeyValues[key]
  1328  		if ok {
  1329  			require.Equal(t, value, string(iter.Value()))
  1330  			delete(matchingKeyValues, key)
  1331  		}
  1332  	}
  1333  	t.Logf("generated %d keys: %d matching, %d found", n, matchingCount, found)
  1334  	require.Equal(t, 0, len(matchingKeyValues))
  1335  }
  1336  
  1337  func TestIteratorGuaranteedDurable(t *testing.T) {
  1338  	mem := vfs.NewMem()
  1339  	opts := &Options{FS: mem}
  1340  	d, err := Open("", opts)
  1341  	require.NoError(t, err)
  1342  	defer func() {
  1343  		require.NoError(t, d.Close())
  1344  	}()
  1345  	iterOptions := IterOptions{OnlyReadGuaranteedDurable: true}
  1346  	failFunc := func(t *testing.T, reader Reader) {
  1347  		defer func() {
  1348  			if r := recover(); r == nil {
  1349  				require.Fail(t, "expected panic")
  1350  			}
  1351  			reader.Close()
  1352  		}()
  1353  		iter, _ := reader.NewIter(&iterOptions)
  1354  		defer iter.Close()
  1355  	}
  1356  	t.Run("snapshot", func(t *testing.T) {
  1357  		failFunc(t, d.NewSnapshot())
  1358  	})
  1359  	t.Run("batch", func(t *testing.T) {
  1360  		failFunc(t, d.NewIndexedBatch())
  1361  	})
  1362  	t.Run("db", func(t *testing.T) {
  1363  		d.Set([]byte("k"), []byte("v"), nil)
  1364  		foundKV := func(o *IterOptions) bool {
  1365  			iter, _ := d.NewIter(o)
  1366  			defer iter.Close()
  1367  			iter.SeekGE([]byte("k"))
  1368  			return iter.Valid()
  1369  		}
  1370  		require.True(t, foundKV(nil))
  1371  		require.False(t, foundKV(&iterOptions))
  1372  		require.NoError(t, d.Flush())
  1373  		require.True(t, foundKV(nil))
  1374  		require.True(t, foundKV(&iterOptions))
  1375  	})
  1376  }
  1377  
  1378  func TestIteratorBoundsLifetimes(t *testing.T) {
  1379  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1380  	d := newPointTestkeysDatabase(t, testkeys.Alpha(2))
  1381  	defer func() { require.NoError(t, d.Close()) }()
  1382  
  1383  	var buf bytes.Buffer
  1384  	iterators := map[string]*Iterator{}
  1385  	var labels []string
  1386  	printIters := func(w io.Writer) {
  1387  		labels = labels[:0]
  1388  		for label := range iterators {
  1389  			labels = append(labels, label)
  1390  		}
  1391  		sort.Strings(labels)
  1392  		for _, label := range labels {
  1393  			it := iterators[label]
  1394  			fmt.Fprintf(&buf, "%s: (", label)
  1395  			if it.opts.LowerBound == nil {
  1396  				fmt.Fprint(&buf, "<nil>, ")
  1397  			} else {
  1398  				fmt.Fprintf(&buf, "%q, ", it.opts.LowerBound)
  1399  			}
  1400  			if it.opts.UpperBound == nil {
  1401  				fmt.Fprint(&buf, "<nil>)")
  1402  			} else {
  1403  				fmt.Fprintf(&buf, "%q)", it.opts.UpperBound)
  1404  			}
  1405  			fmt.Fprintf(&buf, " boundsBufIdx=%d\n", it.boundsBufIdx)
  1406  		}
  1407  	}
  1408  	parseBounds := func(td *datadriven.TestData) (lower, upper []byte) {
  1409  		for _, arg := range td.CmdArgs {
  1410  			if arg.Key == "lower" {
  1411  				lower = []byte(arg.Vals[0])
  1412  			} else if arg.Key == "upper" {
  1413  				upper = []byte(arg.Vals[0])
  1414  			}
  1415  		}
  1416  		return lower, upper
  1417  	}
  1418  	trashBounds := func(bounds ...[]byte) {
  1419  		for _, bound := range bounds {
  1420  			rng.Read(bound[:])
  1421  		}
  1422  	}
  1423  
  1424  	datadriven.RunTest(t, "testdata/iterator_bounds_lifetimes", func(t *testing.T, td *datadriven.TestData) string {
  1425  		switch td.Cmd {
  1426  		case "define":
  1427  			var err error
  1428  			if d, err = runDBDefineCmd(td, d.opts); err != nil {
  1429  				return err.Error()
  1430  			}
  1431  			d.mu.Lock()
  1432  			s := d.mu.versions.currentVersion().String()
  1433  			d.mu.Unlock()
  1434  			return s
  1435  		case "new-iter":
  1436  			var label string
  1437  			td.ScanArgs(t, "label", &label)
  1438  			lower, upper := parseBounds(td)
  1439  			iterators[label], _ = d.NewIter(&IterOptions{
  1440  				LowerBound: lower,
  1441  				UpperBound: upper,
  1442  			})
  1443  			trashBounds(lower, upper)
  1444  			buf.Reset()
  1445  			printIters(&buf)
  1446  			return buf.String()
  1447  		case "clone":
  1448  			var from, to string
  1449  			td.ScanArgs(t, "from", &from)
  1450  			td.ScanArgs(t, "to", &to)
  1451  			var err error
  1452  			iterators[to], err = iterators[from].Clone(CloneOptions{})
  1453  			if err != nil {
  1454  				return err.Error()
  1455  			}
  1456  			buf.Reset()
  1457  			printIters(&buf)
  1458  			return buf.String()
  1459  		case "close":
  1460  			var label string
  1461  			td.ScanArgs(t, "label", &label)
  1462  			iterators[label].Close()
  1463  			delete(iterators, label)
  1464  			buf.Reset()
  1465  			printIters(&buf)
  1466  			return buf.String()
  1467  		case "iter":
  1468  			var label string
  1469  			td.ScanArgs(t, "label", &label)
  1470  			return runIterCmd(td, iterators[label], false /* closeIter */)
  1471  		case "set-bounds":
  1472  			var label string
  1473  			td.ScanArgs(t, "label", &label)
  1474  			lower, upper := parseBounds(td)
  1475  			iterators[label].SetBounds(lower, upper)
  1476  			trashBounds(lower, upper)
  1477  			buf.Reset()
  1478  			printIters(&buf)
  1479  			return buf.String()
  1480  		case "set-options":
  1481  			var label string
  1482  			var tableFilter bool
  1483  			td.ScanArgs(t, "label", &label)
  1484  			opts := iterators[label].opts
  1485  			for _, arg := range td.CmdArgs {
  1486  				if arg.Key == "table-filter" {
  1487  					tableFilter = true
  1488  				}
  1489  				if arg.Key == "key-types" {
  1490  					switch arg.Vals[0] {
  1491  					case "points-only":
  1492  						opts.KeyTypes = IterKeyTypePointsOnly
  1493  					case "ranges-only":
  1494  						opts.KeyTypes = IterKeyTypeRangesOnly
  1495  					case "both":
  1496  						opts.KeyTypes = IterKeyTypePointsAndRanges
  1497  					default:
  1498  						panic(fmt.Sprintf("unrecognized key type %q", arg.Vals[0]))
  1499  					}
  1500  				}
  1501  			}
  1502  			opts.LowerBound, opts.UpperBound = parseBounds(td)
  1503  			if tableFilter {
  1504  				opts.TableFilter = func(userProps map[string]string) bool { return false }
  1505  			}
  1506  			iterators[label].SetOptions(&opts)
  1507  			trashBounds(opts.LowerBound, opts.UpperBound)
  1508  			buf.Reset()
  1509  			printIters(&buf)
  1510  			return buf.String()
  1511  		default:
  1512  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
  1513  		}
  1514  	})
  1515  }
  1516  
  1517  func TestIteratorStatsMerge(t *testing.T) {
  1518  	s := IteratorStats{
  1519  		ForwardSeekCount: [NumStatsKind]int{1, 2},
  1520  		ReverseSeekCount: [NumStatsKind]int{3, 4},
  1521  		ForwardStepCount: [NumStatsKind]int{5, 6},
  1522  		ReverseStepCount: [NumStatsKind]int{7, 8},
  1523  		InternalStats: InternalIteratorStats{
  1524  			BlockBytes:                     9,
  1525  			BlockBytesInCache:              10,
  1526  			BlockReadDuration:              3 * time.Millisecond,
  1527  			KeyBytes:                       11,
  1528  			ValueBytes:                     12,
  1529  			PointCount:                     13,
  1530  			PointsCoveredByRangeTombstones: 14,
  1531  		},
  1532  		RangeKeyStats: RangeKeyIteratorStats{
  1533  			Count:           15,
  1534  			ContainedPoints: 16,
  1535  			SkippedPoints:   17,
  1536  		},
  1537  	}
  1538  	s.InternalStats.SeparatedPointValue.Count = 1
  1539  	s.InternalStats.SeparatedPointValue.ValueBytes = 5
  1540  	s.InternalStats.SeparatedPointValue.ValueBytesFetched = 3
  1541  	s2 := IteratorStats{
  1542  		ForwardSeekCount: [NumStatsKind]int{1, 2},
  1543  		ReverseSeekCount: [NumStatsKind]int{3, 4},
  1544  		ForwardStepCount: [NumStatsKind]int{5, 6},
  1545  		ReverseStepCount: [NumStatsKind]int{7, 8},
  1546  		InternalStats: InternalIteratorStats{
  1547  			BlockBytes:                     9,
  1548  			BlockBytesInCache:              10,
  1549  			BlockReadDuration:              4 * time.Millisecond,
  1550  			KeyBytes:                       11,
  1551  			ValueBytes:                     12,
  1552  			PointCount:                     13,
  1553  			PointsCoveredByRangeTombstones: 14,
  1554  		},
  1555  		RangeKeyStats: RangeKeyIteratorStats{
  1556  			Count:           15,
  1557  			ContainedPoints: 16,
  1558  			SkippedPoints:   17,
  1559  		},
  1560  	}
  1561  	s2.InternalStats.SeparatedPointValue.Count = 2
  1562  	s2.InternalStats.SeparatedPointValue.ValueBytes = 10
  1563  	s2.InternalStats.SeparatedPointValue.ValueBytesFetched = 6
  1564  	s.Merge(s2)
  1565  	expected := IteratorStats{
  1566  		ForwardSeekCount: [NumStatsKind]int{2, 4},
  1567  		ReverseSeekCount: [NumStatsKind]int{6, 8},
  1568  		ForwardStepCount: [NumStatsKind]int{10, 12},
  1569  		ReverseStepCount: [NumStatsKind]int{14, 16},
  1570  		InternalStats: InternalIteratorStats{
  1571  			BlockBytes:                     18,
  1572  			BlockBytesInCache:              20,
  1573  			BlockReadDuration:              7 * time.Millisecond,
  1574  			KeyBytes:                       22,
  1575  			ValueBytes:                     24,
  1576  			PointCount:                     26,
  1577  			PointsCoveredByRangeTombstones: 28,
  1578  		},
  1579  		RangeKeyStats: RangeKeyIteratorStats{
  1580  			Count:           30,
  1581  			ContainedPoints: 32,
  1582  			SkippedPoints:   34,
  1583  		},
  1584  	}
  1585  	expected.InternalStats.SeparatedPointValue.Count = 3
  1586  	expected.InternalStats.SeparatedPointValue.ValueBytes = 15
  1587  	expected.InternalStats.SeparatedPointValue.ValueBytesFetched = 9
  1588  	require.Equal(t, expected, s)
  1589  }
  1590  
  1591  // TestSetOptionsEquivalence tests equivalence between SetOptions to mutate an
  1592  // iterator and constructing a new iterator with NewIter. The long-lived
  1593  // iterator and the new iterator should surface identical iterator states.
  1594  func TestSetOptionsEquivalence(t *testing.T) {
  1595  	seed := uint64(time.Now().UnixNano())
  1596  	// Call a helper function with the seed so that the seed appears within
  1597  	// stack traces if there's a panic.
  1598  	testSetOptionsEquivalence(t, seed)
  1599  }
  1600  
  1601  func testSetOptionsEquivalence(t *testing.T, seed uint64) {
  1602  	rng := rand.New(rand.NewSource(seed))
  1603  	ks := testkeys.Alpha(2)
  1604  	d := newTestkeysDatabase(t, ks, rng)
  1605  	defer func() { require.NoError(t, d.Close()) }()
  1606  
  1607  	var o IterOptions
  1608  	generateNewOptions := func() {
  1609  		// TODO(jackson): Include test coverage for block property filters, etc.
  1610  		if rng.Intn(2) == 1 {
  1611  			o.KeyTypes = IterKeyType(rng.Intn(3))
  1612  		}
  1613  		if rng.Intn(2) == 1 {
  1614  			if rng.Intn(2) == 1 {
  1615  				o.LowerBound = nil
  1616  				if rng.Intn(2) == 1 {
  1617  					o.LowerBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count()))
  1618  				}
  1619  			}
  1620  			if rng.Intn(2) == 1 {
  1621  				o.UpperBound = nil
  1622  				if rng.Intn(2) == 1 {
  1623  					o.UpperBound = testkeys.KeyAt(ks, rng.Int63n(ks.Count()), rng.Int63n(ks.Count()))
  1624  				}
  1625  			}
  1626  			if testkeys.Comparer.Compare(o.LowerBound, o.UpperBound) > 0 {
  1627  				o.LowerBound, o.UpperBound = o.UpperBound, o.LowerBound
  1628  			}
  1629  		}
  1630  		o.RangeKeyMasking.Suffix = nil
  1631  		if o.KeyTypes == IterKeyTypePointsAndRanges && rng.Intn(2) == 1 {
  1632  			o.RangeKeyMasking.Suffix = testkeys.Suffix(rng.Int63n(ks.Count()))
  1633  		}
  1634  	}
  1635  
  1636  	var longLivedIter, newIter *Iterator
  1637  	var history, longLivedBuf, newIterBuf bytes.Buffer
  1638  	defer func() {
  1639  		if r := recover(); r != nil {
  1640  			t.Log(history.String())
  1641  			panic(r)
  1642  		}
  1643  	}()
  1644  	defer func() {
  1645  		if longLivedIter != nil {
  1646  			longLivedIter.Close()
  1647  		}
  1648  		if newIter != nil {
  1649  			newIter.Close()
  1650  		}
  1651  	}()
  1652  
  1653  	type positioningOp struct {
  1654  		desc string
  1655  		run  func(*Iterator) IterValidityState
  1656  	}
  1657  	positioningOps := []func() positioningOp{
  1658  		// SeekGE
  1659  		func() positioningOp {
  1660  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
  1661  			return positioningOp{
  1662  				desc: fmt.Sprintf("SeekGE(%q)", k),
  1663  				run: func(it *Iterator) IterValidityState {
  1664  					return it.SeekGEWithLimit(k, nil)
  1665  				},
  1666  			}
  1667  		},
  1668  		// SeekLT
  1669  		func() positioningOp {
  1670  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
  1671  			return positioningOp{
  1672  				desc: fmt.Sprintf("SeekLT(%q)", k),
  1673  				run: func(it *Iterator) IterValidityState {
  1674  					return it.SeekLTWithLimit(k, nil)
  1675  				},
  1676  			}
  1677  		},
  1678  		// SeekPrefixGE
  1679  		func() positioningOp {
  1680  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
  1681  			return positioningOp{
  1682  				desc: fmt.Sprintf("SeekPrefixGE(%q)", k),
  1683  				run: func(it *Iterator) IterValidityState {
  1684  					if it.SeekPrefixGE(k) {
  1685  						return IterValid
  1686  					}
  1687  					return IterExhausted
  1688  				},
  1689  			}
  1690  		},
  1691  	}
  1692  
  1693  	for i := 0; i < 10_000; i++ {
  1694  		// Generate new random options. The options in o will be mutated.
  1695  		generateNewOptions()
  1696  		fmt.Fprintf(&history, "new options: %s\n", iterOptionsString(&o))
  1697  
  1698  		newIter, _ = d.NewIter(&o)
  1699  		if longLivedIter == nil {
  1700  			longLivedIter, _ = d.NewIter(&o)
  1701  		} else {
  1702  			longLivedIter.SetOptions(&o)
  1703  		}
  1704  
  1705  		// Apply the same operation to both keys.
  1706  		iterOp := positioningOps[rng.Intn(len(positioningOps))]()
  1707  		newIterValidity := iterOp.run(newIter)
  1708  		longLivedValidity := iterOp.run(longLivedIter)
  1709  
  1710  		newIterBuf.Reset()
  1711  		longLivedBuf.Reset()
  1712  		printIterState(&newIterBuf, newIter, newIterValidity, true /* printValidityState */)
  1713  		printIterState(&longLivedBuf, longLivedIter, longLivedValidity, true /* printValidityState */)
  1714  		fmt.Fprintf(&history, "%s = %s\n", iterOp.desc, newIterBuf.String())
  1715  
  1716  		if newIterBuf.String() != longLivedBuf.String() {
  1717  			t.Logf("history:\n%s\n", history.String())
  1718  			t.Logf("seed: %d\n", seed)
  1719  			t.Fatalf("expected %q, got %q", newIterBuf.String(), longLivedBuf.String())
  1720  		}
  1721  		_ = newIter.Close()
  1722  
  1723  		newIter = nil
  1724  	}
  1725  	t.Logf("history:\n%s\n", history.String())
  1726  }
  1727  
  1728  func iterOptionsString(o *IterOptions) string {
  1729  	var buf bytes.Buffer
  1730  	fmt.Fprintf(&buf, "key-types=%s, lower=%q, upper=%q",
  1731  		o.KeyTypes, o.LowerBound, o.UpperBound)
  1732  	if o.TableFilter != nil {
  1733  		fmt.Fprintf(&buf, ", table-filter")
  1734  	}
  1735  	if o.OnlyReadGuaranteedDurable {
  1736  		fmt.Fprintf(&buf, ", only-durable")
  1737  	}
  1738  	if o.UseL6Filters {
  1739  		fmt.Fprintf(&buf, ", use-L6-filters")
  1740  	}
  1741  	for i, pkf := range o.PointKeyFilters {
  1742  		fmt.Fprintf(&buf, ", point-key-filter[%d]=%q", i, pkf.Name())
  1743  	}
  1744  	for i, rkf := range o.RangeKeyFilters {
  1745  		fmt.Fprintf(&buf, ", range-key-filter[%d]=%q", i, rkf.Name())
  1746  	}
  1747  	return buf.String()
  1748  }
  1749  
  1750  func newTestkeysDatabase(t *testing.T, ks testkeys.Keyspace, rng *rand.Rand) *DB {
  1751  	dbOpts := &Options{
  1752  		Comparer:           testkeys.Comparer,
  1753  		FS:                 vfs.NewMem(),
  1754  		FormatMajorVersion: FormatRangeKeys,
  1755  		Logger:             panicLogger{},
  1756  	}
  1757  	d, err := Open("", dbOpts)
  1758  	require.NoError(t, err)
  1759  
  1760  	// Randomize the order in which we write keys.
  1761  	order := rng.Perm(int(ks.Count()))
  1762  	b := d.NewBatch()
  1763  	keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen)
  1764  	keyBuf2 := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen)
  1765  	for i := 0; i < len(order); i++ {
  1766  		const maxVersionsPerKey = 10
  1767  		keyIndex := order[i]
  1768  		for versions := rng.Intn(maxVersionsPerKey); versions > 0; versions-- {
  1769  			n := testkeys.WriteKeyAt(keyBuf, ks, int64(keyIndex), rng.Int63n(maxVersionsPerKey))
  1770  			b.Set(keyBuf[:n], keyBuf[:n], nil)
  1771  		}
  1772  
  1773  		// Sometimes add a range key too.
  1774  		if rng.Intn(100) == 1 {
  1775  			startIdx := rng.Int63n(ks.Count())
  1776  			endIdx := rng.Int63n(ks.Count())
  1777  			startLen := testkeys.WriteKey(keyBuf, ks, startIdx)
  1778  			endLen := testkeys.WriteKey(keyBuf2, ks, endIdx)
  1779  			suffixInt := rng.Int63n(maxVersionsPerKey)
  1780  			require.NoError(t, b.RangeKeySet(
  1781  				keyBuf[:startLen],
  1782  				keyBuf2[:endLen],
  1783  				testkeys.Suffix(suffixInt),
  1784  				nil,
  1785  				nil))
  1786  		}
  1787  
  1788  		// Randomize the flush points.
  1789  		if !b.Empty() && rng.Intn(10) == 1 {
  1790  			require.NoError(t, b.Commit(nil))
  1791  			require.NoError(t, d.Flush())
  1792  			b = d.NewBatch()
  1793  		}
  1794  	}
  1795  	if !b.Empty() {
  1796  		require.NoError(t, b.Commit(nil))
  1797  	}
  1798  	return d
  1799  }
  1800  
  1801  func newPointTestkeysDatabase(t *testing.T, ks testkeys.Keyspace) *DB {
  1802  	dbOpts := &Options{
  1803  		Comparer:           testkeys.Comparer,
  1804  		FS:                 vfs.NewMem(),
  1805  		FormatMajorVersion: FormatRangeKeys,
  1806  	}
  1807  	d, err := Open("", dbOpts)
  1808  	require.NoError(t, err)
  1809  
  1810  	b := d.NewBatch()
  1811  	keyBuf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen)
  1812  	for i := int64(0); i < ks.Count(); i++ {
  1813  		n := testkeys.WriteKeyAt(keyBuf, ks, i, i)
  1814  		b.Set(keyBuf[:n], keyBuf[:n], nil)
  1815  	}
  1816  	require.NoError(t, b.Commit(nil))
  1817  	return d
  1818  }
  1819  
  1820  func BenchmarkIteratorSeekGE(b *testing.B) {
  1821  	m, keys := buildMemTable(b)
  1822  	iter := &Iterator{
  1823  		comparer: *DefaultComparer,
  1824  		iter:     m.newIter(nil),
  1825  	}
  1826  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1827  
  1828  	b.ResetTimer()
  1829  	for i := 0; i < b.N; i++ {
  1830  		key := keys[rng.Intn(len(keys))]
  1831  		iter.SeekGE(key)
  1832  	}
  1833  }
  1834  
  1835  func BenchmarkIteratorNext(b *testing.B) {
  1836  	m, _ := buildMemTable(b)
  1837  	iter := &Iterator{
  1838  		comparer: *DefaultComparer,
  1839  		iter:     m.newIter(nil),
  1840  	}
  1841  
  1842  	b.ResetTimer()
  1843  	for i := 0; i < b.N; i++ {
  1844  		if !iter.Valid() {
  1845  			iter.First()
  1846  		}
  1847  		iter.Next()
  1848  	}
  1849  }
  1850  
  1851  func BenchmarkIteratorPrev(b *testing.B) {
  1852  	m, _ := buildMemTable(b)
  1853  	iter := &Iterator{
  1854  		comparer: *DefaultComparer,
  1855  		iter:     m.newIter(nil),
  1856  	}
  1857  
  1858  	b.ResetTimer()
  1859  	for i := 0; i < b.N; i++ {
  1860  		if !iter.Valid() {
  1861  			iter.Last()
  1862  		}
  1863  		iter.Prev()
  1864  	}
  1865  }
  1866  
  1867  type twoLevelBloomTombstoneState struct {
  1868  	keys        [][]byte
  1869  	readers     [8][][]*sstable.Reader
  1870  	levelSlices [8][]manifest.LevelSlice
  1871  	indexFunc   func(twoLevelIndex bool, bloom bool, withTombstone bool) int
  1872  }
  1873  
  1874  func setupForTwoLevelBloomTombstone(b *testing.B, keyOffset int) twoLevelBloomTombstoneState {
  1875  	const blockSize = 32 << 10
  1876  	const restartInterval = 16
  1877  	const levelCount = 5
  1878  
  1879  	var readers [8][][]*sstable.Reader
  1880  	var levelSlices [8][]manifest.LevelSlice
  1881  	var keys [][]byte
  1882  	indexFunc := func(twoLevelIndex bool, bloom bool, withTombstone bool) int {
  1883  		index := 0
  1884  		if twoLevelIndex {
  1885  			index = 4
  1886  		}
  1887  		if bloom {
  1888  			index += 2
  1889  		}
  1890  		if withTombstone {
  1891  			index++
  1892  		}
  1893  		return index
  1894  	}
  1895  	for _, twoLevelIndex := range []bool{false, true} {
  1896  		for _, bloom := range []bool{false, true} {
  1897  			for _, withTombstone := range []bool{false, true} {
  1898  				index := indexFunc(twoLevelIndex, bloom, withTombstone)
  1899  				levels := levelCount
  1900  				if withTombstone {
  1901  					levels = 1
  1902  				}
  1903  				readers[index], levelSlices[index], keys = buildLevelsForMergingIterSeqSeek(
  1904  					b, blockSize, restartInterval, levels, keyOffset, withTombstone, bloom, twoLevelIndex)
  1905  			}
  1906  		}
  1907  	}
  1908  	return twoLevelBloomTombstoneState{
  1909  		keys: keys, readers: readers, levelSlices: levelSlices, indexFunc: indexFunc}
  1910  }
  1911  
  1912  // BenchmarkIteratorSeqSeekPrefixGENotFound exercises the case of SeekPrefixGE
  1913  // specifying monotonic keys all of which precede actual keys present in L6 of
  1914  // the DB. Moreover, with-tombstone=true exercises the sub-case where those
  1915  // actual keys are deleted using a range tombstone that has not physically
  1916  // deleted those keys due to the presence of a snapshot that needs to see
  1917  // those keys. This sub-case needs to be efficient in (a) avoiding iteration
  1918  // over all those deleted keys, including repeated iteration, (b) using the
  1919  // next optimization, since the seeks are monotonic.
  1920  func BenchmarkIteratorSeqSeekPrefixGENotFound(b *testing.B) {
  1921  	const keyOffset = 100000
  1922  	state := setupForTwoLevelBloomTombstone(b, keyOffset)
  1923  	readers := state.readers
  1924  	levelSlices := state.levelSlices
  1925  	indexFunc := state.indexFunc
  1926  
  1927  	// We will not be seeking to the keys that were written but instead to
  1928  	// keys before the written keys. This is to validate that the optimization
  1929  	// to use Next still functions when mergingIter checks for the prefix
  1930  	// match, and that mergingIter can avoid iterating over all the keys
  1931  	// deleted by a range tombstone when there is no possibility of matching
  1932  	// the prefix.
  1933  	var keys [][]byte
  1934  	for i := 0; i < keyOffset; i++ {
  1935  		keys = append(keys, []byte(fmt.Sprintf("%08d", i)))
  1936  	}
  1937  	for _, skip := range []int{1, 2, 4} {
  1938  		for _, twoLevelIndex := range []bool{false, true} {
  1939  			for _, bloom := range []bool{false, true} {
  1940  				for _, withTombstone := range []bool{false, true} {
  1941  					b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t",
  1942  						skip, twoLevelIndex, bloom, withTombstone),
  1943  						func(b *testing.B) {
  1944  							index := indexFunc(twoLevelIndex, bloom, withTombstone)
  1945  							readers := readers[index]
  1946  							levelSlices := levelSlices[index]
  1947  							m := buildMergingIter(readers, levelSlices)
  1948  							iter := Iterator{
  1949  								comparer: *testkeys.Comparer,
  1950  								merge:    DefaultMerger.Merge,
  1951  								iter:     m,
  1952  							}
  1953  							pos := 0
  1954  							b.ResetTimer()
  1955  							for i := 0; i < b.N; i++ {
  1956  								// When withTombstone=true, and prior to the
  1957  								// optimization to stop early due to a range
  1958  								// tombstone, the iteration would continue into the
  1959  								// next file, and not be able to use Next at the lower
  1960  								// level in the next SeekPrefixGE call. So we would
  1961  								// incur the cost of iterating over all the deleted
  1962  								// keys for every seek. Note that it is not possible
  1963  								// to do a noop optimization in Iterator for the
  1964  								// prefix case, unlike SeekGE/SeekLT, since we don't
  1965  								// know if the iterators inside mergingIter are all
  1966  								// appropriately positioned -- some may not be due to
  1967  								// bloom filters not matching.
  1968  								valid := iter.SeekPrefixGE(keys[pos])
  1969  								if valid {
  1970  									b.Fatalf("key should not be found")
  1971  								}
  1972  								pos += skip
  1973  								if pos >= keyOffset {
  1974  									pos = 0
  1975  								}
  1976  							}
  1977  							b.StopTimer()
  1978  							iter.Close()
  1979  						})
  1980  				}
  1981  			}
  1982  		}
  1983  	}
  1984  	for _, r := range readers {
  1985  		for i := range r {
  1986  			for j := range r[i] {
  1987  				r[i][j].Close()
  1988  			}
  1989  		}
  1990  	}
  1991  }
  1992  
  1993  // BenchmarkIteratorSeqSeekPrefixGEFound exercises the case of SeekPrefixGE
  1994  // specifying monotonic keys that are present in L6 of the DB. Moreover,
  1995  // with-tombstone=true exercises the sub-case where those actual keys are
  1996  // deleted using a range tombstone that has not physically deleted those keys
  1997  // due to the presence of a snapshot that needs to see those keys. This
  1998  // sub-case needs to be efficient in (a) avoiding iteration over all those
  1999  // deleted keys, including repeated iteration, (b) using the next
  2000  // optimization, since the seeks are monotonic.
  2001  func BenchmarkIteratorSeqSeekPrefixGEFound(b *testing.B) {
  2002  	state := setupForTwoLevelBloomTombstone(b, 0)
  2003  	keys := state.keys
  2004  	readers := state.readers
  2005  	levelSlices := state.levelSlices
  2006  	indexFunc := state.indexFunc
  2007  
  2008  	for _, skip := range []int{1, 2, 4} {
  2009  		for _, twoLevelIndex := range []bool{false, true} {
  2010  			for _, bloom := range []bool{false, true} {
  2011  				for _, withTombstone := range []bool{false, true} {
  2012  					b.Run(fmt.Sprintf("skip=%d/two-level=%t/bloom=%t/with-tombstone=%t",
  2013  						skip, twoLevelIndex, bloom, withTombstone),
  2014  						func(b *testing.B) {
  2015  							index := indexFunc(twoLevelIndex, bloom, withTombstone)
  2016  							readers := readers[index]
  2017  							levelSlices := levelSlices[index]
  2018  							m := buildMergingIter(readers, levelSlices)
  2019  							iter := Iterator{
  2020  								comparer: *testkeys.Comparer,
  2021  								merge:    DefaultMerger.Merge,
  2022  								iter:     m,
  2023  							}
  2024  							pos := 0
  2025  							b.ResetTimer()
  2026  							for i := 0; i < b.N; i++ {
  2027  								// When withTombstone=true, and prior to the
  2028  								// optimization to stop early due to a range
  2029  								// tombstone, the iteration would continue into the
  2030  								// next file, and not be able to use Next at the lower
  2031  								// level in the next SeekPrefixGE call. So we would
  2032  								// incur the cost of iterating over all the deleted
  2033  								// keys for every seek. Note that it is not possible
  2034  								// to do a noop optimization in Iterator for the
  2035  								// prefix case, unlike SeekGE/SeekLT, since we don't
  2036  								// know if the iterators inside mergingIter are all
  2037  								// appropriately positioned -- some may not be due to
  2038  								// bloom filters not matching.
  2039  								_ = iter.SeekPrefixGE(keys[pos])
  2040  								pos += skip
  2041  								if pos >= len(keys) {
  2042  									pos = 0
  2043  								}
  2044  							}
  2045  							b.StopTimer()
  2046  							iter.Close()
  2047  						})
  2048  				}
  2049  			}
  2050  		}
  2051  	}
  2052  	for _, r := range readers {
  2053  		for i := range r {
  2054  			for j := range r[i] {
  2055  				r[i][j].Close()
  2056  			}
  2057  		}
  2058  	}
  2059  }
  2060  
  2061  // BenchmarkIteratorSeqSeekGEWithBounds is analogous to
  2062  // BenchmarkMergingIterSeqSeekGEWithBounds, except for using an Iterator,
  2063  // which causes it to exercise the end-to-end code path.
  2064  func BenchmarkIteratorSeqSeekGEWithBounds(b *testing.B) {
  2065  	const blockSize = 32 << 10
  2066  	const restartInterval = 16
  2067  	const levelCount = 5
  2068  	for _, twoLevelIndex := range []bool{false, true} {
  2069  		b.Run(fmt.Sprintf("two-level=%t", twoLevelIndex),
  2070  			func(b *testing.B) {
  2071  				readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek(
  2072  					b, blockSize, restartInterval, levelCount, 0, /* keyOffset */
  2073  					false, false, twoLevelIndex)
  2074  				m := buildMergingIter(readers, levelSlices)
  2075  				iter := Iterator{
  2076  					comparer: *testkeys.Comparer,
  2077  					merge:    DefaultMerger.Merge,
  2078  					iter:     m,
  2079  				}
  2080  				keyCount := len(keys)
  2081  				b.ResetTimer()
  2082  				for i := 0; i < b.N; i++ {
  2083  					pos := i % (keyCount - 1)
  2084  					iter.SetBounds(keys[pos], keys[pos+1])
  2085  					// SeekGE will return keys[pos].
  2086  					valid := iter.SeekGE(keys[pos])
  2087  					for valid {
  2088  						valid = iter.Next()
  2089  					}
  2090  					if iter.Error() != nil {
  2091  						b.Fatalf(iter.Error().Error())
  2092  					}
  2093  				}
  2094  				iter.Close()
  2095  				for i := range readers {
  2096  					for j := range readers[i] {
  2097  						readers[i][j].Close()
  2098  					}
  2099  				}
  2100  			})
  2101  	}
  2102  }
  2103  
  2104  func BenchmarkIteratorSeekGENoop(b *testing.B) {
  2105  	const blockSize = 32 << 10
  2106  	const restartInterval = 16
  2107  	const levelCount = 5
  2108  	const keyOffset = 10000
  2109  	readers, levelSlices, _ := buildLevelsForMergingIterSeqSeek(
  2110  		b, blockSize, restartInterval, levelCount, keyOffset, false, false, false)
  2111  	var keys [][]byte
  2112  	for i := 0; i < keyOffset; i++ {
  2113  		keys = append(keys, []byte(fmt.Sprintf("%08d", i)))
  2114  	}
  2115  	for _, withLimit := range []bool{false, true} {
  2116  		b.Run(fmt.Sprintf("withLimit=%t", withLimit), func(b *testing.B) {
  2117  			m := buildMergingIter(readers, levelSlices)
  2118  			iter := Iterator{
  2119  				comparer: *testkeys.Comparer,
  2120  				merge:    DefaultMerger.Merge,
  2121  				iter:     m,
  2122  			}
  2123  			b.ResetTimer()
  2124  			for i := 0; i < b.N; i++ {
  2125  				pos := i % (len(keys) - 1)
  2126  				if withLimit {
  2127  					if iter.SeekGEWithLimit(keys[pos], keys[pos+1]) != IterAtLimit {
  2128  						b.Fatal("should be at limit")
  2129  					}
  2130  				} else {
  2131  					if !iter.SeekGE(keys[pos]) {
  2132  						b.Fatal("should be valid")
  2133  					}
  2134  				}
  2135  			}
  2136  			iter.Close()
  2137  		})
  2138  	}
  2139  	for i := range readers {
  2140  		for j := range readers[i] {
  2141  			readers[i][j].Close()
  2142  		}
  2143  	}
  2144  }
  2145  
  2146  func BenchmarkBlockPropertyFilter(b *testing.B) {
  2147  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  2148  	for _, matchInterval := range []int{1, 10, 100, 1000} {
  2149  		b.Run(fmt.Sprintf("match-interval=%d", matchInterval), func(b *testing.B) {
  2150  			mem := vfs.NewMem()
  2151  			opts := &Options{
  2152  				FS:                 mem,
  2153  				FormatMajorVersion: FormatNewest,
  2154  				BlockPropertyCollectors: []func() BlockPropertyCollector{
  2155  					func() BlockPropertyCollector {
  2156  						return sstable.NewBlockIntervalCollector(
  2157  							"0", &testBlockIntervalCollector{numLength: 3}, nil, /* range key collector */
  2158  						)
  2159  					},
  2160  				},
  2161  			}
  2162  			d, err := Open("", opts)
  2163  			require.NoError(b, err)
  2164  			defer func() {
  2165  				require.NoError(b, d.Close())
  2166  			}()
  2167  			batch := d.NewBatch()
  2168  			const numKeys = 20 * 1000
  2169  			const valueSize = 1000
  2170  			for i := 0; i < numKeys; i++ {
  2171  				key := fmt.Sprintf("%06d%03d", i, i%matchInterval)
  2172  				value := randValue(valueSize, rng)
  2173  				require.NoError(b, batch.Set([]byte(key), value, nil))
  2174  			}
  2175  			require.NoError(b, batch.Commit(nil))
  2176  			require.NoError(b, d.Flush())
  2177  			require.NoError(b, d.Compact(nil, []byte{0xFF}, false))
  2178  
  2179  			for _, filter := range []bool{false, true} {
  2180  				b.Run(fmt.Sprintf("filter=%t", filter), func(b *testing.B) {
  2181  					var iterOpts IterOptions
  2182  					if filter {
  2183  						iterOpts.PointKeyFilters = []BlockPropertyFilter{
  2184  							sstable.NewBlockIntervalFilter("0",
  2185  								uint64(0), uint64(1)),
  2186  						}
  2187  					}
  2188  					iter, _ := d.NewIter(&iterOpts)
  2189  					b.ResetTimer()
  2190  					for i := 0; i < b.N; i++ {
  2191  						valid := iter.First()
  2192  						for valid {
  2193  							valid = iter.Next()
  2194  						}
  2195  					}
  2196  					b.StopTimer()
  2197  					require.NoError(b, iter.Close())
  2198  				})
  2199  			}
  2200  		})
  2201  	}
  2202  }
  2203  
  2204  func TestRangeKeyMaskingRandomized(t *testing.T) {
  2205  	seed := *seed
  2206  	if seed == 0 {
  2207  		seed = uint64(time.Now().UnixNano())
  2208  		t.Logf("seed: %d", seed)
  2209  	}
  2210  	rng := rand.New(rand.NewSource(seed))
  2211  
  2212  	// Generate keyspace with point keys, and range keys which will
  2213  	// mask the point keys.
  2214  	var timestamps []int64
  2215  	for i := 0; i <= 100; i++ {
  2216  		timestamps = append(timestamps, rng.Int63n(1000))
  2217  	}
  2218  
  2219  	ks := testkeys.Alpha(5)
  2220  	numKeys := 1000 + rng.Intn(9000)
  2221  	keys := make([][]byte, numKeys)
  2222  	keyTimeStamps := make([]int64, numKeys) // ts associated with the keys.
  2223  	for i := 0; i < numKeys; i++ {
  2224  		keys[i] = make([]byte, 5+testkeys.MaxSuffixLen)
  2225  		keyTimeStamps[i] = timestamps[rng.Intn(len(timestamps))]
  2226  		n := testkeys.WriteKeyAt(keys[i], ks, rng.Int63n(ks.Count()), keyTimeStamps[i])
  2227  		keys[i] = keys[i][:n]
  2228  	}
  2229  
  2230  	numRangeKeys := rng.Intn(20)
  2231  	type rkey struct {
  2232  		start  []byte
  2233  		end    []byte
  2234  		suffix []byte
  2235  	}
  2236  	rkeys := make([]rkey, numRangeKeys)
  2237  	pointKeyHidden := make([]bool, numKeys)
  2238  	for i := 0; i < numRangeKeys; i++ {
  2239  		rkeys[i].start = make([]byte, 5)
  2240  		rkeys[i].end = make([]byte, 5)
  2241  
  2242  		testkeys.WriteKey(rkeys[i].start[:5], ks, rng.Int63n(ks.Count()))
  2243  		testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count()))
  2244  
  2245  		for bytes.Equal(rkeys[i].start[:5], rkeys[i].end[:5]) {
  2246  			testkeys.WriteKey(rkeys[i].end[:5], ks, rng.Int63n(ks.Count()))
  2247  		}
  2248  
  2249  		if bytes.Compare(rkeys[i].start[:5], rkeys[i].end[:5]) > 0 {
  2250  			rkeys[i].start, rkeys[i].end = rkeys[i].end, rkeys[i].start
  2251  		}
  2252  
  2253  		rkeyTimestamp := timestamps[rng.Intn(len(timestamps))]
  2254  		rkeys[i].suffix = []byte("@" + strconv.FormatInt(rkeyTimestamp, 10))
  2255  
  2256  		// Each time we create a range key, check if the range key masks any
  2257  		// point keys.
  2258  		for j, pkey := range keys {
  2259  			if pointKeyHidden[j] {
  2260  				continue
  2261  			}
  2262  
  2263  			if keyTimeStamps[j] >= rkeyTimestamp {
  2264  				continue
  2265  			}
  2266  
  2267  			if testkeys.Comparer.Compare(pkey, rkeys[i].start) >= 0 &&
  2268  				testkeys.Comparer.Compare(pkey, rkeys[i].end) < 0 {
  2269  				pointKeyHidden[j] = true
  2270  			}
  2271  		}
  2272  	}
  2273  
  2274  	// Define a simple base testOpts, and a randomized testOpts. The results
  2275  	// of iteration will be compared.
  2276  	type testOpts struct {
  2277  		levelOpts []LevelOptions
  2278  		filter    func() BlockPropertyFilterMask
  2279  	}
  2280  
  2281  	baseOpts := testOpts{
  2282  		levelOpts: make([]LevelOptions, 7),
  2283  	}
  2284  	for i := 0; i < len(baseOpts.levelOpts); i++ {
  2285  		baseOpts.levelOpts[i].TargetFileSize = 1
  2286  		baseOpts.levelOpts[i].BlockSize = 1
  2287  	}
  2288  
  2289  	randomOpts := testOpts{
  2290  		levelOpts: []LevelOptions{
  2291  			{
  2292  				TargetFileSize: int64(1 + rng.Intn(2<<20)), // Vary the L0 file size.
  2293  				BlockSize:      1 + rng.Intn(32<<10),
  2294  			},
  2295  		},
  2296  	}
  2297  	if rng.Intn(2) == 0 {
  2298  		randomOpts.filter = func() BlockPropertyFilterMask {
  2299  			return sstable.NewTestKeysMaskingFilter()
  2300  		}
  2301  	}
  2302  
  2303  	maxProcs := runtime.GOMAXPROCS(0)
  2304  
  2305  	opts1 := &Options{
  2306  		FS:                       vfs.NewStrictMem(),
  2307  		Comparer:                 testkeys.Comparer,
  2308  		FormatMajorVersion:       FormatNewest,
  2309  		MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 },
  2310  		BlockPropertyCollectors: []func() BlockPropertyCollector{
  2311  			sstable.NewTestKeysBlockPropertyCollector,
  2312  		},
  2313  	}
  2314  	opts1.Levels = baseOpts.levelOpts
  2315  	d1, err := Open("", opts1)
  2316  	require.NoError(t, err)
  2317  
  2318  	opts2 := &Options{
  2319  		FS:                       vfs.NewStrictMem(),
  2320  		Comparer:                 testkeys.Comparer,
  2321  		FormatMajorVersion:       FormatNewest,
  2322  		MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 },
  2323  		BlockPropertyCollectors: []func() BlockPropertyCollector{
  2324  			sstable.NewTestKeysBlockPropertyCollector,
  2325  		},
  2326  	}
  2327  	opts2.Levels = randomOpts.levelOpts
  2328  	d2, err := Open("", opts2)
  2329  	require.NoError(t, err)
  2330  
  2331  	defer func() {
  2332  		if err := d1.Close(); err != nil {
  2333  			t.Fatal(err)
  2334  		}
  2335  		if err := d2.Close(); err != nil {
  2336  			t.Fatal(err)
  2337  		}
  2338  	}()
  2339  
  2340  	// Run test
  2341  	var batch1 *Batch
  2342  	var batch2 *Batch
  2343  	const keysPerBatch = 50
  2344  	for i := 0; i < numKeys; i++ {
  2345  		if i%keysPerBatch == 0 {
  2346  			if batch1 != nil {
  2347  				require.NoError(t, batch1.Commit(nil))
  2348  				require.NoError(t, batch2.Commit(nil))
  2349  			}
  2350  			batch1 = d1.NewBatch()
  2351  			batch2 = d2.NewBatch()
  2352  		}
  2353  		require.NoError(t, batch1.Set(keys[i], []byte{1}, nil))
  2354  		require.NoError(t, batch2.Set(keys[i], []byte{1}, nil))
  2355  	}
  2356  
  2357  	for _, rkey := range rkeys {
  2358  		require.NoError(t, d1.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil))
  2359  		require.NoError(t, d2.RangeKeySet(rkey.start, rkey.end, rkey.suffix, nil, nil))
  2360  	}
  2361  
  2362  	// Scan the keyspace
  2363  	iter1Opts := IterOptions{
  2364  		KeyTypes: IterKeyTypePointsAndRanges,
  2365  		RangeKeyMasking: RangeKeyMasking{
  2366  			Suffix: []byte("@1000"),
  2367  			Filter: baseOpts.filter,
  2368  		},
  2369  	}
  2370  
  2371  	iter2Opts := IterOptions{
  2372  		KeyTypes: IterKeyTypePointsAndRanges,
  2373  		RangeKeyMasking: RangeKeyMasking{
  2374  			Suffix: []byte("@1000"),
  2375  			Filter: randomOpts.filter,
  2376  		},
  2377  	}
  2378  
  2379  	iter1, _ := d1.NewIter(&iter1Opts)
  2380  	iter2, _ := d2.NewIter(&iter2Opts)
  2381  	defer func() {
  2382  		if err := iter1.Close(); err != nil {
  2383  			t.Fatal(err)
  2384  		}
  2385  		if err := iter2.Close(); err != nil {
  2386  			t.Fatal(err)
  2387  		}
  2388  	}()
  2389  
  2390  	for valid1, valid2 := iter1.First(), iter2.First(); valid1 || valid2; valid1, valid2 = iter1.Next(), iter2.Next() {
  2391  		if valid1 != valid2 {
  2392  			t.Fatalf("iteration didn't produce identical results")
  2393  		}
  2394  
  2395  		// Confirm exposed range key state is identical.
  2396  		hasP1, hasR1 := iter1.HasPointAndRange()
  2397  		hasP2, hasR2 := iter2.HasPointAndRange()
  2398  		if hasP1 != hasP2 || hasR1 != hasR2 {
  2399  			t.Fatalf("iteration didn't produce identical results")
  2400  		}
  2401  		if hasP1 && !bytes.Equal(iter1.Key(), iter2.Key()) {
  2402  			t.Fatalf(fmt.Sprintf("iteration didn't produce identical point keys: %s, %s", iter1.Key(), iter2.Key()))
  2403  		}
  2404  		if hasR1 {
  2405  			// Confirm that the range key is the same.
  2406  			b1, e1 := iter1.RangeBounds()
  2407  			b2, e2 := iter2.RangeBounds()
  2408  			if !bytes.Equal(b1, b2) || !bytes.Equal(e1, e2) {
  2409  				t.Fatalf(fmt.Sprintf(
  2410  					"iteration didn't produce identical range keys: [%s, %s], [%s, %s]",
  2411  					b1, e1, b2, e2,
  2412  				))
  2413  			}
  2414  
  2415  		}
  2416  
  2417  		// Confirm that the returned point key wasn't hidden.
  2418  		for j, pkey := range keys {
  2419  			if bytes.Equal(iter1.Key(), pkey) && pointKeyHidden[j] {
  2420  				t.Fatalf(fmt.Sprintf("hidden point key was exposed %s %d", pkey, keyTimeStamps[j]))
  2421  			}
  2422  		}
  2423  	}
  2424  }
  2425  
  2426  // BenchmarkIterator_RangeKeyMasking benchmarks a scan through a keyspace with
  2427  // 10,000 random suffixed point keys, and three range keys covering most of the
  2428  // keyspace. It varies the suffix of the range keys in subbenchmarks to exercise
  2429  // varying amounts of masking. This benchmark does configure a block-property
  2430  // filter, allowing for skipping blocks wholly contained within a range key and
  2431  // consisting of points all with a suffix lower than the range key's.
  2432  func BenchmarkIterator_RangeKeyMasking(b *testing.B) {
  2433  	const (
  2434  		prefixLen    = 20
  2435  		valueSize    = 1024
  2436  		batches      = 200
  2437  		keysPerBatch = 50
  2438  	)
  2439  	var alloc bytealloc.A
  2440  	rng := rand.New(rand.NewSource(uint64(1658872515083979000)))
  2441  	keyBuf := make([]byte, prefixLen+testkeys.MaxSuffixLen)
  2442  	valBuf := make([]byte, valueSize)
  2443  
  2444  	mem := vfs.NewStrictMem()
  2445  	maxProcs := runtime.GOMAXPROCS(0)
  2446  	opts := &Options{
  2447  		FS:                       mem,
  2448  		Comparer:                 testkeys.Comparer,
  2449  		FormatMajorVersion:       FormatNewest,
  2450  		MaxConcurrentCompactions: func() int { return maxProcs/2 + 1 },
  2451  		BlockPropertyCollectors: []func() BlockPropertyCollector{
  2452  			sstable.NewTestKeysBlockPropertyCollector,
  2453  		},
  2454  	}
  2455  	d, err := Open("", opts)
  2456  	require.NoError(b, err)
  2457  
  2458  	keys := make([][]byte, keysPerBatch*batches)
  2459  	for bi := 0; bi < batches; bi++ {
  2460  		batch := d.NewBatch()
  2461  		for k := 0; k < keysPerBatch; k++ {
  2462  			randStr(keyBuf[:prefixLen], rng)
  2463  			suffix := rng.Int63n(100)
  2464  			suffixLen := testkeys.WriteSuffix(keyBuf[prefixLen:], suffix)
  2465  			randStr(valBuf[:], rng)
  2466  
  2467  			var key []byte
  2468  			alloc, key = alloc.Copy(keyBuf[:prefixLen+suffixLen])
  2469  			keys[bi*keysPerBatch+k] = key
  2470  			require.NoError(b, batch.Set(key, valBuf[:], nil))
  2471  		}
  2472  		require.NoError(b, batch.Commit(nil))
  2473  	}
  2474  
  2475  	// Wait for compactions to complete before starting benchmarks. We don't
  2476  	// want to benchmark while compactions are running.
  2477  	d.mu.Lock()
  2478  	for d.mu.compact.compactingCount > 0 {
  2479  		d.mu.compact.cond.Wait()
  2480  	}
  2481  	d.mu.Unlock()
  2482  	b.Log(d.Metrics().String())
  2483  	require.NoError(b, d.Close())
  2484  	// Set ignore syncs to true so that each subbenchmark may mutate state and
  2485  	// then revert back to the original state.
  2486  	mem.SetIgnoreSyncs(true)
  2487  
  2488  	// TODO(jackson): Benchmark lazy-combined iteration versus not.
  2489  	// TODO(jackson): Benchmark seeks.
  2490  	for _, rkSuffix := range []string{"@10", "@50", "@75", "@100"} {
  2491  		b.Run(fmt.Sprintf("range-keys-suffixes=%s", rkSuffix), func(b *testing.B) {
  2492  			d, err := Open("", opts)
  2493  			require.NoError(b, err)
  2494  			require.NoError(b, d.RangeKeySet([]byte("b"), []byte("e"), []byte(rkSuffix), nil, nil))
  2495  			require.NoError(b, d.RangeKeySet([]byte("f"), []byte("p"), []byte(rkSuffix), nil, nil))
  2496  			require.NoError(b, d.RangeKeySet([]byte("q"), []byte("z"), []byte(rkSuffix), nil, nil))
  2497  			require.NoError(b, d.Flush())
  2498  
  2499  			// Populate 3 range keys, covering most of the keyspace, at the
  2500  			// given suffix.
  2501  
  2502  			iterOpts := IterOptions{
  2503  				KeyTypes: IterKeyTypePointsAndRanges,
  2504  				RangeKeyMasking: RangeKeyMasking{
  2505  					Suffix: []byte("@100"),
  2506  					Filter: func() BlockPropertyFilterMask {
  2507  						return sstable.NewTestKeysMaskingFilter()
  2508  					},
  2509  				},
  2510  			}
  2511  			b.Run("forward", func(b *testing.B) {
  2512  				b.Run("seekprefix", func(b *testing.B) {
  2513  					b.ResetTimer()
  2514  					for i := 0; i < b.N; i++ {
  2515  						iter, _ := d.NewIter(&iterOpts)
  2516  						count := 0
  2517  						for j := 0; j < len(keys); j++ {
  2518  							if !iter.SeekPrefixGE(keys[j]) {
  2519  								b.Errorf("unable to find %q\n", keys[j])
  2520  							}
  2521  							if hasPoint, _ := iter.HasPointAndRange(); hasPoint {
  2522  								count++
  2523  							}
  2524  						}
  2525  						if err := iter.Close(); err != nil {
  2526  							b.Fatal(err)
  2527  						}
  2528  					}
  2529  				})
  2530  				b.Run("next", func(b *testing.B) {
  2531  					b.ResetTimer()
  2532  					for i := 0; i < b.N; i++ {
  2533  						iter, _ := d.NewIter(&iterOpts)
  2534  						count := 0
  2535  						for valid := iter.First(); valid; valid = iter.Next() {
  2536  							if hasPoint, _ := iter.HasPointAndRange(); hasPoint {
  2537  								count++
  2538  							}
  2539  						}
  2540  						if err := iter.Close(); err != nil {
  2541  							b.Fatal(err)
  2542  						}
  2543  					}
  2544  				})
  2545  			})
  2546  			b.Run("backward", func(b *testing.B) {
  2547  				b.ResetTimer()
  2548  				for i := 0; i < b.N; i++ {
  2549  					iter, _ := d.NewIter(&iterOpts)
  2550  					count := 0
  2551  					for valid := iter.Last(); valid; valid = iter.Prev() {
  2552  						if hasPoint, _ := iter.HasPointAndRange(); hasPoint {
  2553  							count++
  2554  						}
  2555  					}
  2556  					if err := iter.Close(); err != nil {
  2557  						b.Fatal(err)
  2558  					}
  2559  				}
  2560  			})
  2561  
  2562  			// Reset the benchmark state at the end of each run to remove the
  2563  			// range keys we wrote.
  2564  			b.StopTimer()
  2565  			require.NoError(b, d.Close())
  2566  			mem.ResetToSyncedState()
  2567  		})
  2568  	}
  2569  
  2570  }
  2571  
  2572  func BenchmarkIteratorScan(b *testing.B) {
  2573  	const maxPrefixLen = 8
  2574  	keyBuf := make([]byte, maxPrefixLen+testkeys.MaxSuffixLen)
  2575  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  2576  
  2577  	for _, keyCount := range []int64{100, 1000, 10000} {
  2578  		for _, readAmp := range []int{1, 3, 7, 10} {
  2579  			func() {
  2580  				opts := &Options{
  2581  					FS:                 vfs.NewMem(),
  2582  					FormatMajorVersion: FormatNewest,
  2583  				}
  2584  				opts.DisableAutomaticCompactions = true
  2585  				d, err := Open("", opts)
  2586  				require.NoError(b, err)
  2587  				defer func() { require.NoError(b, d.Close()) }()
  2588  
  2589  				// Take the very large keyspace consisting of alphabetic
  2590  				// characters of lengths up to `maxPrefixLen` and reduce it down
  2591  				// to `keyCount` keys by picking every 1 key every `keyCount` keys.
  2592  				keys := testkeys.Alpha(maxPrefixLen)
  2593  				keys = keys.EveryN(keys.Count() / keyCount)
  2594  				if keys.Count() < keyCount {
  2595  					b.Fatalf("expected %d keys, found %d", keyCount, keys.Count())
  2596  				}
  2597  
  2598  				// Portion the keys into `readAmp` overlapping key sets.
  2599  				for _, ks := range testkeys.Divvy(keys, int64(readAmp)) {
  2600  					batch := d.NewBatch()
  2601  					for i := int64(0); i < ks.Count(); i++ {
  2602  						n := testkeys.WriteKeyAt(keyBuf[:], ks, i, rng.Int63n(100))
  2603  						batch.Set(keyBuf[:n], keyBuf[:n], nil)
  2604  					}
  2605  					require.NoError(b, batch.Commit(nil))
  2606  					require.NoError(b, d.Flush())
  2607  				}
  2608  				// Each level is a sublevel.
  2609  				m := d.Metrics()
  2610  				require.Equal(b, readAmp, m.ReadAmp())
  2611  
  2612  				for _, keyTypes := range []IterKeyType{IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} {
  2613  					iterOpts := IterOptions{KeyTypes: keyTypes}
  2614  					b.Run(fmt.Sprintf("keys=%d,r-amp=%d,key-types=%s", keyCount, readAmp, keyTypes), func(b *testing.B) {
  2615  						for i := 0; i < b.N; i++ {
  2616  							b.StartTimer()
  2617  							iter, _ := d.NewIter(&iterOpts)
  2618  							valid := iter.First()
  2619  							for valid {
  2620  								valid = iter.Next()
  2621  							}
  2622  							b.StopTimer()
  2623  							require.NoError(b, iter.Close())
  2624  						}
  2625  					})
  2626  				}
  2627  			}()
  2628  		}
  2629  	}
  2630  }
  2631  
  2632  func BenchmarkIteratorScanNextPrefix(b *testing.B) {
  2633  	setupBench := func(
  2634  		b *testing.B, maxKeysPerLevel, versCount, readAmp int, enableValueBlocks bool) *DB {
  2635  		keyBuf := make([]byte, readAmp+testkeys.MaxSuffixLen)
  2636  		opts := &Options{
  2637  			FS:                 vfs.NewMem(),
  2638  			Comparer:           testkeys.Comparer,
  2639  			FormatMajorVersion: FormatNewest,
  2640  		}
  2641  		opts.DisableAutomaticCompactions = true
  2642  		opts.Experimental.EnableValueBlocks = func() bool { return enableValueBlocks }
  2643  		d, err := Open("", opts)
  2644  		require.NoError(b, err)
  2645  
  2646  		// Create `readAmp` levels. Prefixes in the top of the LSM are length 1.
  2647  		// Prefixes in the bottom of the LSM are length `readAmp`. Eg,:
  2648  		//
  2649  		//    a  b c...
  2650  		//    aa ab ac...
  2651  		//    aaa aab aac...
  2652  		//
  2653  		for l := readAmp; l > 0; l-- {
  2654  			ks := testkeys.Alpha(l)
  2655  			if step := ks.Count() / int64(maxKeysPerLevel); step > 1 {
  2656  				ks = ks.EveryN(step)
  2657  			}
  2658  			if ks.Count() > int64(maxKeysPerLevel) {
  2659  				ks = ks.Slice(0, int64(maxKeysPerLevel))
  2660  			}
  2661  
  2662  			batch := d.NewBatch()
  2663  			for i := int64(0); i < ks.Count(); i++ {
  2664  				for v := 0; v < versCount; v++ {
  2665  					n := testkeys.WriteKeyAt(keyBuf[:], ks, i, int64(versCount-v+1))
  2666  					batch.Set(keyBuf[:n], keyBuf[:n], nil)
  2667  				}
  2668  			}
  2669  			require.NoError(b, batch.Commit(nil))
  2670  			require.NoError(b, d.Flush())
  2671  		}
  2672  
  2673  		// Each level is a sublevel.
  2674  		m := d.Metrics()
  2675  		require.Equal(b, readAmp, m.ReadAmp())
  2676  		return d
  2677  	}
  2678  
  2679  	for _, keysPerLevel := range []int{10, 100, 1000} {
  2680  		b.Run(fmt.Sprintf("keysPerLevel=%d", keysPerLevel), func(b *testing.B) {
  2681  			for _, versionCount := range []int{1, 2, 10, 100} {
  2682  				b.Run(fmt.Sprintf("versions=%d", versionCount), func(b *testing.B) {
  2683  					for _, readAmp := range []int{1, 3, 7, 10} {
  2684  						b.Run(fmt.Sprintf("ramp=%d", readAmp), func(b *testing.B) {
  2685  							for _, enableValueBlocks := range []bool{false, true} {
  2686  								b.Run(fmt.Sprintf("value-blocks=%t", enableValueBlocks), func(b *testing.B) {
  2687  									d := setupBench(b, keysPerLevel, versionCount, readAmp, enableValueBlocks)
  2688  									defer func() { require.NoError(b, d.Close()) }()
  2689  									for _, keyTypes := range []IterKeyType{
  2690  										IterKeyTypePointsOnly, IterKeyTypePointsAndRanges} {
  2691  										b.Run(fmt.Sprintf("key-types=%s", keyTypes), func(b *testing.B) {
  2692  											iterOpts := IterOptions{KeyTypes: keyTypes}
  2693  											iter, _ := d.NewIter(&iterOpts)
  2694  											var valid bool
  2695  											b.ResetTimer()
  2696  											for i := 0; i < b.N; i++ {
  2697  												if !valid {
  2698  													valid = iter.First()
  2699  													if !valid {
  2700  														b.Fatalf("iter must be valid")
  2701  													}
  2702  												} else {
  2703  													valid = iter.NextPrefix()
  2704  												}
  2705  											}
  2706  											b.StopTimer()
  2707  											require.NoError(b, iter.Close())
  2708  										})
  2709  									}
  2710  								})
  2711  							}
  2712  						})
  2713  					}
  2714  				})
  2715  			}
  2716  		})
  2717  	}
  2718  }
  2719  
  2720  func BenchmarkCombinedIteratorSeek(b *testing.B) {
  2721  	for _, withRangeKey := range []bool{false, true} {
  2722  		b.Run(fmt.Sprintf("range-key=%t", withRangeKey), func(b *testing.B) {
  2723  			rng := rand.New(rand.NewSource(uint64(1658872515083979000)))
  2724  			ks := testkeys.Alpha(1)
  2725  			opts := &Options{
  2726  				FS:                 vfs.NewMem(),
  2727  				Comparer:           testkeys.Comparer,
  2728  				FormatMajorVersion: FormatNewest,
  2729  			}
  2730  			d, err := Open("", opts)
  2731  			require.NoError(b, err)
  2732  			defer func() { require.NoError(b, d.Close()) }()
  2733  
  2734  			keys := make([][]byte, ks.Count())
  2735  			for i := int64(0); i < ks.Count(); i++ {
  2736  				keys[i] = testkeys.Key(ks, i)
  2737  				var val [40]byte
  2738  				rng.Read(val[:])
  2739  				require.NoError(b, d.Set(keys[i], val[:], nil))
  2740  			}
  2741  			if withRangeKey {
  2742  				require.NoError(b, d.RangeKeySet([]byte("a"), []byte{'z', 0x00}, []byte("@5"), nil, nil))
  2743  			}
  2744  
  2745  			batch := d.NewIndexedBatch()
  2746  			defer batch.Close()
  2747  
  2748  			for _, useBatch := range []bool{false, true} {
  2749  				b.Run(fmt.Sprintf("batch=%t", useBatch), func(b *testing.B) {
  2750  					for i := 0; i < b.N; i++ {
  2751  						iterOpts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges}
  2752  						var it *Iterator
  2753  						if useBatch {
  2754  							it, _ = batch.NewIter(&iterOpts)
  2755  						} else {
  2756  							it, _ = d.NewIter(&iterOpts)
  2757  						}
  2758  						for j := 0; j < len(keys); j++ {
  2759  							if !it.SeekGE(keys[j]) {
  2760  								b.Errorf("key %q missing", keys[j])
  2761  							}
  2762  						}
  2763  						require.NoError(b, it.Close())
  2764  					}
  2765  				})
  2766  			}
  2767  		})
  2768  	}
  2769  }
  2770  
  2771  // BenchmarkCombinedIteratorSeek_Bounded benchmarks a bounded iterator that
  2772  // performs repeated seeks over 5% of the middle of a keyspace covered by a
  2773  // range key that's fragmented across hundreds of files. The iterator bounds
  2774  // should prevent defragmenting beyond the iterator's bounds.
  2775  func BenchmarkCombinedIteratorSeek_Bounded(b *testing.B) {
  2776  	d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000))
  2777  
  2778  	var lower = len(keys) / 2
  2779  	var upper = len(keys)/2 + len(keys)/20 // 5%
  2780  	iterOpts := IterOptions{
  2781  		KeyTypes:   IterKeyTypePointsAndRanges,
  2782  		LowerBound: keys[lower],
  2783  		UpperBound: keys[upper],
  2784  	}
  2785  	b.ResetTimer()
  2786  	for i := 0; i < b.N; i++ {
  2787  		it, _ := d.NewIter(&iterOpts)
  2788  		for j := lower; j < upper; j++ {
  2789  			if !it.SeekGE(keys[j]) {
  2790  				b.Errorf("key %q missing", keys[j])
  2791  			}
  2792  		}
  2793  		require.NoError(b, it.Close())
  2794  	}
  2795  }
  2796  
  2797  // BenchmarkCombinedIteratorSeekPrefix benchmarks an iterator that
  2798  // performs repeated prefix seeks over 5% of the middle of a keyspace covered by a
  2799  // range key that's fragmented across hundreds of files. The seek prefix should
  2800  // avoid defragmenting beyond the seek prefixes.
  2801  func BenchmarkCombinedIteratorSeekPrefix(b *testing.B) {
  2802  	d, keys := buildFragmentedRangeKey(b, uint64(1658872515083979000))
  2803  
  2804  	var lower = len(keys) / 2
  2805  	var upper = len(keys)/2 + len(keys)/20 // 5%
  2806  	iterOpts := IterOptions{
  2807  		KeyTypes: IterKeyTypePointsAndRanges,
  2808  	}
  2809  	b.ResetTimer()
  2810  	for i := 0; i < b.N; i++ {
  2811  		it, _ := d.NewIter(&iterOpts)
  2812  		for j := lower; j < upper; j++ {
  2813  			if !it.SeekPrefixGE(keys[j]) {
  2814  				b.Errorf("key %q missing", keys[j])
  2815  			}
  2816  		}
  2817  		require.NoError(b, it.Close())
  2818  	}
  2819  }
  2820  
  2821  func buildFragmentedRangeKey(b testing.TB, seed uint64) (d *DB, keys [][]byte) {
  2822  	rng := rand.New(rand.NewSource(seed))
  2823  	ks := testkeys.Alpha(2)
  2824  	opts := &Options{
  2825  		FS:                        vfs.NewMem(),
  2826  		Comparer:                  testkeys.Comparer,
  2827  		FormatMajorVersion:        FormatNewest,
  2828  		L0CompactionFileThreshold: 1,
  2829  	}
  2830  	opts.EnsureDefaults()
  2831  	for l := 0; l < len(opts.Levels); l++ {
  2832  		opts.Levels[l].TargetFileSize = 1
  2833  	}
  2834  	var err error
  2835  	d, err = Open("", opts)
  2836  	require.NoError(b, err)
  2837  
  2838  	keys = make([][]byte, ks.Count())
  2839  	for i := int64(0); i < ks.Count(); i++ {
  2840  		keys[i] = testkeys.Key(ks, i)
  2841  	}
  2842  	for i := 0; i < len(keys); i++ {
  2843  		var val [40]byte
  2844  		rng.Read(val[:])
  2845  		require.NoError(b, d.Set(keys[i], val[:], nil))
  2846  		if i < len(keys)-1 {
  2847  			require.NoError(b, d.RangeKeySet(keys[i], keys[i+1], []byte("@5"), nil, nil))
  2848  		}
  2849  		require.NoError(b, d.Flush())
  2850  	}
  2851  
  2852  	d.mu.Lock()
  2853  	for d.mu.compact.compactingCount > 0 {
  2854  		d.mu.compact.cond.Wait()
  2855  	}
  2856  	v := d.mu.versions.currentVersion()
  2857  	d.mu.Unlock()
  2858  	require.GreaterOrEqualf(b, v.Levels[numLevels-1].Len(),
  2859  		700, "expect many (≥700) L6 files but found %d", v.Levels[numLevels-1].Len())
  2860  	return d, keys
  2861  }
  2862  
  2863  // BenchmarkSeekPrefixTombstones benchmarks a SeekPrefixGE into the beginning of
  2864  // a series of sstables containing exclusively range tombstones. Previously,
  2865  // such a seek would next through all the tombstone files until it arrived at a
  2866  // point key or exhausted the level's files. The SeekPrefixGE should not next
  2867  // beyond the files that contain the prefix.
  2868  //
  2869  // See cockroachdb/cockroach#89327.
  2870  func BenchmarkSeekPrefixTombstones(b *testing.B) {
  2871  	o := (&Options{
  2872  		FS:                 vfs.NewMem(),
  2873  		Comparer:           testkeys.Comparer,
  2874  		FormatMajorVersion: FormatNewest,
  2875  	}).EnsureDefaults()
  2876  	wOpts := o.MakeWriterOptions(numLevels-1, FormatNewest.MaxTableFormat())
  2877  	d, err := Open("", o)
  2878  	require.NoError(b, err)
  2879  	defer func() { require.NoError(b, d.Close()) }()
  2880  
  2881  	// Keep a snapshot open for the duration of the test to prevent elision-only
  2882  	// compactions from removing the ingested files containing exclusively
  2883  	// elidable tombstones.
  2884  	defer d.NewSnapshot().Close()
  2885  
  2886  	ks := testkeys.Alpha(2)
  2887  	for i := int64(0); i < ks.Count()-1; i++ {
  2888  		func() {
  2889  			filename := fmt.Sprintf("ext%2d", i)
  2890  			f, err := o.FS.Create(filename)
  2891  			require.NoError(b, err)
  2892  			w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), wOpts)
  2893  			require.NoError(b, w.DeleteRange(testkeys.Key(ks, i), testkeys.Key(ks, i+1)))
  2894  			require.NoError(b, w.Close())
  2895  			require.NoError(b, d.Ingest([]string{filename}))
  2896  		}()
  2897  	}
  2898  
  2899  	d.mu.Lock()
  2900  	require.Equal(b, int64(ks.Count()-1), d.mu.versions.metrics.Levels[numLevels-1].NumFiles)
  2901  	d.mu.Unlock()
  2902  
  2903  	seekKey := testkeys.Key(ks, 1)
  2904  	iter, _ := d.NewIter(nil)
  2905  	defer iter.Close()
  2906  	b.ResetTimer()
  2907  	defer b.StopTimer()
  2908  	for i := 0; i < b.N; i++ {
  2909  		iter.SeekPrefixGE(seekKey)
  2910  	}
  2911  }