github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/merging_iter_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"strings"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/cockroachdb/datadriven"
    15  	"github.com/cockroachdb/pebble/bloom"
    16  	"github.com/cockroachdb/pebble/internal/base"
    17  	"github.com/cockroachdb/pebble/internal/itertest"
    18  	"github.com/cockroachdb/pebble/internal/keyspan"
    19  	"github.com/cockroachdb/pebble/internal/manifest"
    20  	"github.com/cockroachdb/pebble/internal/rangedel"
    21  	"github.com/cockroachdb/pebble/internal/testkeys"
    22  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    23  	"github.com/cockroachdb/pebble/sstable"
    24  	"github.com/cockroachdb/pebble/vfs"
    25  	"github.com/stretchr/testify/require"
    26  	"golang.org/x/exp/rand"
    27  )
    28  
    29  func TestMergingIter(t *testing.T) {
    30  	var stats base.InternalIteratorStats
    31  	newFunc := func(iters ...internalIterator) internalIterator {
    32  		return newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
    33  			func(a []byte) int { return len(a) }, iters...)
    34  	}
    35  	testIterator(t, newFunc, func(r *rand.Rand) [][]string {
    36  		// Shuffle testKeyValuePairs into one or more splits. Each individual
    37  		// split is in increasing order, but different splits may overlap in
    38  		// range. Some of the splits may be empty.
    39  		splits := make([][]string, 1+r.Intn(2+len(testKeyValuePairs)))
    40  		for _, kv := range testKeyValuePairs {
    41  			j := r.Intn(len(splits))
    42  			splits[j] = append(splits[j], kv)
    43  		}
    44  		return splits
    45  	})
    46  }
    47  
    48  func TestMergingIterSeek(t *testing.T) {
    49  	var def string
    50  	datadriven.RunTest(t, "testdata/merging_iter_seek", func(t *testing.T, d *datadriven.TestData) string {
    51  		switch d.Cmd {
    52  		case "define":
    53  			def = d.Input
    54  			return ""
    55  
    56  		case "iter":
    57  			var iters []internalIterator
    58  			for _, line := range strings.Split(def, "\n") {
    59  				f := &fakeIter{}
    60  				for _, key := range strings.Fields(line) {
    61  					j := strings.Index(key, ":")
    62  					f.keys = append(f.keys, base.ParseInternalKey(key[:j]))
    63  					f.vals = append(f.vals, []byte(key[j+1:]))
    64  				}
    65  				iters = append(iters, f)
    66  			}
    67  
    68  			var stats base.InternalIteratorStats
    69  			iter := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
    70  				func(a []byte) int { return len(a) }, iters...)
    71  			defer iter.Close()
    72  			return itertest.RunInternalIterCmd(t, d, iter)
    73  
    74  		default:
    75  			return fmt.Sprintf("unknown command: %s", d.Cmd)
    76  		}
    77  	})
    78  }
    79  
    80  func TestMergingIterNextPrev(t *testing.T) {
    81  	// The data is the same in each of these cases, but divided up amongst the
    82  	// iterators differently. This data must match the definition in
    83  	// testdata/internal_iter_next.
    84  	iterCases := [][]string{
    85  		{
    86  			"a.SET.2:2 a.SET.1:1 b.SET.2:2 b.SET.1:1 c.SET.2:2 c.SET.1:1",
    87  		},
    88  		{
    89  			"a.SET.2:2 b.SET.2:2 c.SET.2:2",
    90  			"a.SET.1:1 b.SET.1:1 c.SET.1:1",
    91  		},
    92  		{
    93  			"a.SET.2:2 b.SET.2:2",
    94  			"a.SET.1:1 b.SET.1:1",
    95  			"c.SET.2:2 c.SET.1:1",
    96  		},
    97  		{
    98  			"a.SET.2:2",
    99  			"a.SET.1:1",
   100  			"b.SET.2:2",
   101  			"b.SET.1:1",
   102  			"c.SET.2:2",
   103  			"c.SET.1:1",
   104  		},
   105  	}
   106  
   107  	for _, c := range iterCases {
   108  		t.Run("", func(t *testing.T) {
   109  			datadriven.RunTest(t, "testdata/internal_iter_next", func(t *testing.T, d *datadriven.TestData) string {
   110  				switch d.Cmd {
   111  				case "define":
   112  					// Ignore. We've defined the iterator data above.
   113  					return ""
   114  
   115  				case "iter":
   116  					iters := make([]internalIterator, len(c))
   117  					for i := range c {
   118  						f := &fakeIter{}
   119  						iters[i] = f
   120  						for _, key := range strings.Fields(c[i]) {
   121  							j := strings.Index(key, ":")
   122  							f.keys = append(f.keys, base.ParseInternalKey(key[:j]))
   123  							f.vals = append(f.vals, []byte(key[j+1:]))
   124  						}
   125  					}
   126  
   127  					var stats base.InternalIteratorStats
   128  					iter := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   129  						func(a []byte) int { return len(a) }, iters...)
   130  					defer iter.Close()
   131  					return itertest.RunInternalIterCmd(t, d, iter)
   132  
   133  				default:
   134  					return fmt.Sprintf("unknown command: %s", d.Cmd)
   135  				}
   136  			})
   137  		})
   138  	}
   139  }
   140  
   141  func TestMergingIterCornerCases(t *testing.T) {
   142  	memFS := vfs.NewMem()
   143  	cmp := DefaultComparer.Compare
   144  	fmtKey := DefaultComparer.FormatKey
   145  	opts := (*Options)(nil).EnsureDefaults()
   146  	var v *version
   147  
   148  	// Indexed by fileNum.
   149  	var readers []*sstable.Reader
   150  	defer func() {
   151  		for _, r := range readers {
   152  			r.Close()
   153  		}
   154  	}()
   155  
   156  	var fileNum base.FileNum
   157  	newIters :=
   158  		func(_ context.Context, file *manifest.FileMetadata, opts *IterOptions, iio internalIterOpts,
   159  		) (internalIterator, keyspan.FragmentIterator, error) {
   160  			r := readers[file.FileNum]
   161  			rangeDelIter, err := r.NewRawRangeDelIter()
   162  			if err != nil {
   163  				return nil, nil, err
   164  			}
   165  			iter, err := r.NewIterWithBlockPropertyFilters(
   166  				opts.GetLowerBound(), opts.GetUpperBound(), nil, true /* useFilterBlock */, iio.stats,
   167  				sstable.CategoryAndQoS{}, nil, sstable.TrivialReaderProvider{Reader: r})
   168  			if err != nil {
   169  				return nil, nil, err
   170  			}
   171  			return iter, rangeDelIter, nil
   172  		}
   173  
   174  	datadriven.RunTest(t, "testdata/merging_iter", func(t *testing.T, d *datadriven.TestData) string {
   175  		switch d.Cmd {
   176  		case "define":
   177  			lines := strings.Split(d.Input, "\n")
   178  
   179  			var files [numLevels][]*fileMetadata
   180  			var level int
   181  			for i := 0; i < len(lines); i++ {
   182  				line := lines[i]
   183  				line = strings.TrimSpace(line)
   184  				if line == "L" || line == "L0" {
   185  					// start next level
   186  					level++
   187  					continue
   188  				}
   189  				keys := strings.Fields(line)
   190  				smallestKey := base.ParseInternalKey(keys[0])
   191  				largestKey := base.ParseInternalKey(keys[1])
   192  				m := (&fileMetadata{
   193  					FileNum: fileNum,
   194  				}).ExtendPointKeyBounds(cmp, smallestKey, largestKey)
   195  				m.InitPhysicalBacking()
   196  				files[level] = append(files[level], m)
   197  
   198  				i++
   199  				line = lines[i]
   200  				line = strings.TrimSpace(line)
   201  				name := fmt.Sprint(fileNum)
   202  				fileNum++
   203  				f, err := memFS.Create(name)
   204  				if err != nil {
   205  					return err.Error()
   206  				}
   207  				w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{})
   208  				var tombstones []keyspan.Span
   209  				frag := keyspan.Fragmenter{
   210  					Cmp:    cmp,
   211  					Format: fmtKey,
   212  					Emit: func(fragmented keyspan.Span) {
   213  						tombstones = append(tombstones, fragmented)
   214  					},
   215  				}
   216  				keyvalues := strings.Fields(line)
   217  				for _, kv := range keyvalues {
   218  					j := strings.Index(kv, ":")
   219  					ikey := base.ParseInternalKey(kv[:j])
   220  					value := []byte(kv[j+1:])
   221  					switch ikey.Kind() {
   222  					case InternalKeyKindRangeDelete:
   223  						frag.Add(keyspan.Span{Start: ikey.UserKey, End: value, Keys: []keyspan.Key{{Trailer: ikey.Trailer}}})
   224  					default:
   225  						if err := w.Add(ikey, value); err != nil {
   226  							return err.Error()
   227  						}
   228  					}
   229  				}
   230  				frag.Finish()
   231  				for _, v := range tombstones {
   232  					if err := rangedel.Encode(&v, w.Add); err != nil {
   233  						return err.Error()
   234  					}
   235  				}
   236  				if err := w.Close(); err != nil {
   237  					return err.Error()
   238  				}
   239  				f, err = memFS.Open(name)
   240  				if err != nil {
   241  					return err.Error()
   242  				}
   243  				readable, err := sstable.NewSimpleReadable(f)
   244  				if err != nil {
   245  					return err.Error()
   246  				}
   247  				r, err := sstable.NewReader(readable, sstable.ReaderOptions{})
   248  				if err != nil {
   249  					return err.Error()
   250  				}
   251  				readers = append(readers, r)
   252  			}
   253  
   254  			v = newVersion(opts, files)
   255  			return v.String()
   256  		case "iter":
   257  			levelIters := make([]mergingIterLevel, 0, len(v.Levels))
   258  			var stats base.InternalIteratorStats
   259  			for i, l := range v.Levels {
   260  				slice := l.Slice()
   261  				if slice.Empty() {
   262  					continue
   263  				}
   264  				li := &levelIter{}
   265  				li.init(context.Background(), IterOptions{}, testkeys.Comparer,
   266  					newIters, slice.Iter(), manifest.Level(i), internalIterOpts{stats: &stats})
   267  				i := len(levelIters)
   268  				levelIters = append(levelIters, mergingIterLevel{iter: li})
   269  				li.initRangeDel(&levelIters[i].rangeDelIter)
   270  				li.initBoundaryContext(&levelIters[i].levelIterBoundaryContext)
   271  			}
   272  			miter := &mergingIter{}
   273  			miter.init(nil /* opts */, &stats, cmp, func(a []byte) int { return len(a) }, levelIters...)
   274  			defer miter.Close()
   275  			miter.forceEnableSeekOpt = true
   276  			// Exercise SetContext for fun
   277  			// (https://github.com/cockroachdb/pebble/pull/3037 caused a SIGSEGV due
   278  			// to a nil pointer dereference).
   279  			miter.SetContext(context.Background())
   280  			return itertest.RunInternalIterCmd(t, d, miter,
   281  				itertest.Verbose, itertest.WithStats(&stats))
   282  		default:
   283  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   284  		}
   285  	})
   286  }
   287  
   288  func buildMergingIterTables(
   289  	b *testing.B, blockSize, restartInterval, count int,
   290  ) ([]*sstable.Reader, [][]byte, func()) {
   291  	mem := vfs.NewMem()
   292  	files := make([]vfs.File, count)
   293  	for i := range files {
   294  		f, err := mem.Create(fmt.Sprintf("bench%d", i))
   295  		if err != nil {
   296  			b.Fatal(err)
   297  		}
   298  		files[i] = f
   299  	}
   300  
   301  	writers := make([]*sstable.Writer, len(files))
   302  	for i := range files {
   303  		writers[i] = sstable.NewWriter(objstorageprovider.NewFileWritable(files[i]), sstable.WriterOptions{
   304  			BlockRestartInterval: restartInterval,
   305  			BlockSize:            blockSize,
   306  			Compression:          NoCompression,
   307  		})
   308  	}
   309  
   310  	estimatedSize := func() uint64 {
   311  		var sum uint64
   312  		for _, w := range writers {
   313  			sum += w.EstimatedSize()
   314  		}
   315  		return sum
   316  	}
   317  
   318  	var keys [][]byte
   319  	var ikey InternalKey
   320  	targetSize := uint64(count * (2 << 20))
   321  	for i := 0; estimatedSize() < targetSize; i++ {
   322  		key := []byte(fmt.Sprintf("%08d", i))
   323  		keys = append(keys, key)
   324  		ikey.UserKey = key
   325  		j := rand.Intn(len(writers))
   326  		w := writers[j]
   327  		w.Add(ikey, nil)
   328  	}
   329  
   330  	for _, w := range writers {
   331  		if err := w.Close(); err != nil {
   332  			b.Fatal(err)
   333  		}
   334  	}
   335  
   336  	opts := sstable.ReaderOptions{Cache: NewCache(128 << 20)}
   337  	defer opts.Cache.Unref()
   338  
   339  	readers := make([]*sstable.Reader, len(files))
   340  	for i := range files {
   341  		f, err := mem.Open(fmt.Sprintf("bench%d", i))
   342  		if err != nil {
   343  			b.Fatal(err)
   344  		}
   345  		readable, err := sstable.NewSimpleReadable(f)
   346  		if err != nil {
   347  			b.Fatal(err)
   348  		}
   349  		readers[i], err = sstable.NewReader(readable, opts)
   350  		if err != nil {
   351  			b.Fatal(err)
   352  		}
   353  	}
   354  	return readers, keys, func() {
   355  		for _, r := range readers {
   356  			require.NoError(b, r.Close())
   357  		}
   358  	}
   359  }
   360  
   361  func BenchmarkMergingIterSeekGE(b *testing.B) {
   362  	const blockSize = 32 << 10
   363  
   364  	for _, restartInterval := range []int{16} {
   365  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   366  			func(b *testing.B) {
   367  				for _, count := range []int{1, 2, 3, 4, 5} {
   368  					b.Run(fmt.Sprintf("count=%d", count),
   369  						func(b *testing.B) {
   370  							readers, keys, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   371  							defer cleanup()
   372  							iters := make([]internalIterator, len(readers))
   373  							for i := range readers {
   374  								var err error
   375  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   376  								require.NoError(b, err)
   377  							}
   378  							var stats base.InternalIteratorStats
   379  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   380  								func(a []byte) int { return len(a) }, iters...)
   381  							rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   382  
   383  							b.ResetTimer()
   384  							for i := 0; i < b.N; i++ {
   385  								m.SeekGE(keys[rng.Intn(len(keys))], base.SeekGEFlagsNone)
   386  							}
   387  							m.Close()
   388  						})
   389  				}
   390  			})
   391  	}
   392  }
   393  
   394  func BenchmarkMergingIterNext(b *testing.B) {
   395  	const blockSize = 32 << 10
   396  
   397  	for _, restartInterval := range []int{16} {
   398  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   399  			func(b *testing.B) {
   400  				for _, count := range []int{1, 2, 3, 4, 5} {
   401  					b.Run(fmt.Sprintf("count=%d", count),
   402  						func(b *testing.B) {
   403  							readers, _, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   404  							defer cleanup()
   405  							iters := make([]internalIterator, len(readers))
   406  							for i := range readers {
   407  								var err error
   408  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   409  								require.NoError(b, err)
   410  							}
   411  							var stats base.InternalIteratorStats
   412  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   413  								func(a []byte) int { return len(a) }, iters...)
   414  
   415  							b.ResetTimer()
   416  							for i := 0; i < b.N; i++ {
   417  								key, _ := m.Next()
   418  								if key == nil {
   419  									key, _ = m.First()
   420  								}
   421  								_ = key
   422  							}
   423  							m.Close()
   424  						})
   425  				}
   426  			})
   427  	}
   428  }
   429  
   430  func BenchmarkMergingIterPrev(b *testing.B) {
   431  	const blockSize = 32 << 10
   432  
   433  	for _, restartInterval := range []int{16} {
   434  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   435  			func(b *testing.B) {
   436  				for _, count := range []int{1, 2, 3, 4, 5} {
   437  					b.Run(fmt.Sprintf("count=%d", count),
   438  						func(b *testing.B) {
   439  							readers, _, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   440  							defer cleanup()
   441  							iters := make([]internalIterator, len(readers))
   442  							for i := range readers {
   443  								var err error
   444  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   445  								require.NoError(b, err)
   446  							}
   447  							var stats base.InternalIteratorStats
   448  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   449  								func(a []byte) int { return len(a) }, iters...)
   450  
   451  							b.ResetTimer()
   452  							for i := 0; i < b.N; i++ {
   453  								key, _ := m.Prev()
   454  								if key == nil {
   455  									key, _ = m.Last()
   456  								}
   457  								_ = key
   458  							}
   459  							m.Close()
   460  						})
   461  				}
   462  			})
   463  	}
   464  }
   465  
   466  // Builds levels for BenchmarkMergingIterSeqSeekGEWithBounds. The lowest level,
   467  // index 0 here, contains most of the data. Each level has 2 files, to allow for
   468  // stepping into the second file if needed. The lowest level has all the keys in
   469  // the file 0, and a single "lastIKey" in file 1. File 0 in all other levels have
   470  // only the first and last key of file 0 of the aforementioned level -- this
   471  // simulates sparseness of data, but not necessarily of file width, in higher
   472  // levels. File 1 in other levels is similar to File 1 in the aforementioned level
   473  // since it is only for stepping into. If writeRangeTombstoneToLowestLevel is
   474  // true, a range tombstone is written to the first lowest level file that
   475  // deletes all the keys in it, and no other levels should be written.
   476  func buildLevelsForMergingIterSeqSeek(
   477  	b *testing.B,
   478  	blockSize, restartInterval, levelCount int,
   479  	keyOffset int,
   480  	writeRangeTombstoneToLowestLevel bool,
   481  	writeBloomFilters bool,
   482  	forceTwoLevelIndex bool,
   483  ) (readers [][]*sstable.Reader, levelSlices []manifest.LevelSlice, keys [][]byte) {
   484  	mem := vfs.NewMem()
   485  	if writeRangeTombstoneToLowestLevel && levelCount != 1 {
   486  		panic("expect to write only 1 level")
   487  	}
   488  	files := make([][]vfs.File, levelCount)
   489  	for i := range files {
   490  		for j := 0; j < 2; j++ {
   491  			f, err := mem.Create(fmt.Sprintf("bench%d_%d", i, j))
   492  			if err != nil {
   493  				b.Fatal(err)
   494  			}
   495  			files[i] = append(files[i], f)
   496  		}
   497  	}
   498  
   499  	const targetL6FirstFileSize = 2 << 20
   500  	writers := make([][]*sstable.Writer, levelCount)
   501  	// A policy unlikely to have false positives.
   502  	filterPolicy := bloom.FilterPolicy(100)
   503  	for i := range files {
   504  		for j := range files[i] {
   505  			writerOptions := sstable.WriterOptions{
   506  				BlockRestartInterval: restartInterval,
   507  				BlockSize:            blockSize,
   508  				Compression:          NoCompression,
   509  			}
   510  			if writeBloomFilters {
   511  				writerOptions.FilterPolicy = filterPolicy
   512  				writerOptions.FilterType = base.TableFilter
   513  			}
   514  			if forceTwoLevelIndex {
   515  				if i == 0 && j == 0 {
   516  					// Ignoring compression, approximate number of blocks
   517  					numDataBlocks := targetL6FirstFileSize / blockSize
   518  					if numDataBlocks < 4 {
   519  						b.Fatalf("cannot produce two level index")
   520  					}
   521  					// Produce ~2 lower-level index blocks.
   522  					writerOptions.IndexBlockSize = (numDataBlocks / 2) * 8
   523  				} else if j == 0 {
   524  					// Only 2 keys in these files, so to produce two level indexes we
   525  					// set the block sizes to 1.
   526  					writerOptions.BlockSize = 1
   527  					writerOptions.IndexBlockSize = 1
   528  				}
   529  			}
   530  			writers[i] = append(writers[i], sstable.NewWriter(objstorageprovider.NewFileWritable(files[i][j]), writerOptions))
   531  		}
   532  	}
   533  
   534  	i := keyOffset
   535  	w := writers[0][0]
   536  	for ; w.EstimatedSize() < targetL6FirstFileSize; i++ {
   537  		key := []byte(fmt.Sprintf("%08d", i))
   538  		keys = append(keys, key)
   539  		ikey := base.MakeInternalKey(key, 0, InternalKeyKindSet)
   540  		w.Add(ikey, nil)
   541  	}
   542  	if writeRangeTombstoneToLowestLevel {
   543  		tombstoneKey := base.MakeInternalKey(keys[0], 1, InternalKeyKindRangeDelete)
   544  		w.Add(tombstoneKey, []byte(fmt.Sprintf("%08d", i)))
   545  	}
   546  	for j := 1; j < len(files); j++ {
   547  		for _, k := range []int{0, len(keys) - 1} {
   548  			ikey := base.MakeInternalKey(keys[k], uint64(j), InternalKeyKindSet)
   549  			writers[j][0].Add(ikey, nil)
   550  		}
   551  	}
   552  	lastKey := []byte(fmt.Sprintf("%08d", i))
   553  	keys = append(keys, lastKey)
   554  	for j := 0; j < len(files); j++ {
   555  		lastIKey := base.MakeInternalKey(lastKey, uint64(j), InternalKeyKindSet)
   556  		writers[j][1].Add(lastIKey, nil)
   557  	}
   558  	for _, levelWriters := range writers {
   559  		for j, w := range levelWriters {
   560  			if err := w.Close(); err != nil {
   561  				b.Fatal(err)
   562  			}
   563  			meta, err := w.Metadata()
   564  			require.NoError(b, err)
   565  			if forceTwoLevelIndex && j == 0 && meta.Properties.IndexType != 2 {
   566  				b.Fatalf("did not produce two level index")
   567  			}
   568  		}
   569  	}
   570  
   571  	opts := sstable.ReaderOptions{Cache: NewCache(128 << 20), Comparer: DefaultComparer}
   572  	if writeBloomFilters {
   573  		opts.Filters = make(map[string]FilterPolicy)
   574  		opts.Filters[filterPolicy.Name()] = filterPolicy
   575  	}
   576  	defer opts.Cache.Unref()
   577  
   578  	readers = make([][]*sstable.Reader, levelCount)
   579  	for i := range files {
   580  		for j := range files[i] {
   581  			f, err := mem.Open(fmt.Sprintf("bench%d_%d", i, j))
   582  			if err != nil {
   583  				b.Fatal(err)
   584  			}
   585  			readable, err := sstable.NewSimpleReadable(f)
   586  			if err != nil {
   587  				b.Fatal(err)
   588  			}
   589  			r, err := sstable.NewReader(readable, opts)
   590  			if err != nil {
   591  				b.Fatal(err)
   592  			}
   593  			readers[i] = append(readers[i], r)
   594  		}
   595  	}
   596  	levelSlices = make([]manifest.LevelSlice, levelCount)
   597  	for i := range readers {
   598  		meta := make([]*fileMetadata, len(readers[i]))
   599  		for j := range readers[i] {
   600  			iter, err := readers[i][j].NewIter(nil /* lower */, nil /* upper */)
   601  			require.NoError(b, err)
   602  			smallest, _ := iter.First()
   603  			meta[j] = &fileMetadata{}
   604  			// The same FileNum is being reused across different levels, which
   605  			// is harmless for the benchmark since each level has its own iterator
   606  			// creation func.
   607  			meta[j].FileNum = FileNum(j)
   608  			largest, _ := iter.Last()
   609  			meta[j].ExtendPointKeyBounds(opts.Comparer.Compare, smallest.Clone(), largest.Clone())
   610  			meta[j].InitPhysicalBacking()
   611  		}
   612  		levelSlices[i] = manifest.NewLevelSliceSpecificOrder(meta)
   613  	}
   614  	return readers, levelSlices, keys
   615  }
   616  
   617  func buildMergingIter(readers [][]*sstable.Reader, levelSlices []manifest.LevelSlice) *mergingIter {
   618  	mils := make([]mergingIterLevel, len(levelSlices))
   619  	for i := len(readers) - 1; i >= 0; i-- {
   620  		levelIndex := i
   621  		level := len(readers) - 1 - i
   622  		newIters := func(
   623  			_ context.Context, file *manifest.FileMetadata, opts *IterOptions, _ internalIterOpts,
   624  		) (internalIterator, keyspan.FragmentIterator, error) {
   625  			iter, err := readers[levelIndex][file.FileNum].NewIter(
   626  				opts.LowerBound, opts.UpperBound)
   627  			if err != nil {
   628  				return nil, nil, err
   629  			}
   630  			rdIter, err := readers[levelIndex][file.FileNum].NewRawRangeDelIter()
   631  			if err != nil {
   632  				iter.Close()
   633  				return nil, nil, err
   634  			}
   635  			return iter, rdIter, err
   636  		}
   637  		l := newLevelIter(
   638  			context.Background(), IterOptions{}, testkeys.Comparer, newIters, levelSlices[i].Iter(),
   639  			manifest.Level(level), internalIterOpts{})
   640  		l.initRangeDel(&mils[level].rangeDelIter)
   641  		l.initBoundaryContext(&mils[level].levelIterBoundaryContext)
   642  		mils[level].iter = l
   643  	}
   644  	var stats base.InternalIteratorStats
   645  	m := &mergingIter{}
   646  	m.init(nil /* logger */, &stats, testkeys.Comparer.Compare,
   647  		func(a []byte) int { return len(a) }, mils...)
   648  	return m
   649  }
   650  
   651  // A benchmark that simulates the behavior of a mergingIter where
   652  // monotonically increasing narrow bounds are repeatedly set and used to Seek
   653  // and then iterate over the keys within the bounds. This resembles MVCC
   654  // scanning by CockroachDB when doing a lookup/index join with a large number
   655  // of left rows, that are batched and reuse the same iterator, and which can
   656  // have good locality of access. This results in the successive bounds being
   657  // in the same file.
   658  func BenchmarkMergingIterSeqSeekGEWithBounds(b *testing.B) {
   659  	const blockSize = 32 << 10
   660  
   661  	restartInterval := 16
   662  	for _, levelCount := range []int{5} {
   663  		b.Run(fmt.Sprintf("levelCount=%d", levelCount),
   664  			func(b *testing.B) {
   665  				readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek(
   666  					b, blockSize, restartInterval, levelCount, 0 /* keyOffset */, false, false, false)
   667  				m := buildMergingIter(readers, levelSlices)
   668  				keyCount := len(keys)
   669  				b.ResetTimer()
   670  				for i := 0; i < b.N; i++ {
   671  					pos := i % (keyCount - 1)
   672  					m.SetBounds(keys[pos], keys[pos+1])
   673  					// SeekGE will return keys[pos].
   674  					k, _ := m.SeekGE(keys[pos], base.SeekGEFlagsNone)
   675  					for k != nil {
   676  						k, _ = m.Next()
   677  					}
   678  				}
   679  				m.Close()
   680  				for i := range readers {
   681  					for j := range readers[i] {
   682  						readers[i][j].Close()
   683  					}
   684  				}
   685  			})
   686  	}
   687  }
   688  
   689  func BenchmarkMergingIterSeqSeekPrefixGE(b *testing.B) {
   690  	const blockSize = 32 << 10
   691  	const restartInterval = 16
   692  	const levelCount = 5
   693  	readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek(
   694  		b, blockSize, restartInterval, levelCount, 0 /* keyOffset */, false, false, false)
   695  
   696  	for _, skip := range []int{1, 2, 4, 8, 16} {
   697  		for _, useNext := range []bool{false, true} {
   698  			b.Run(fmt.Sprintf("skip=%d/use-next=%t", skip, useNext),
   699  				func(b *testing.B) {
   700  					m := buildMergingIter(readers, levelSlices)
   701  					keyCount := len(keys)
   702  					pos := 0
   703  
   704  					m.SeekPrefixGE(keys[pos], keys[pos], base.SeekGEFlagsNone)
   705  					b.ResetTimer()
   706  					for i := 0; i < b.N; i++ {
   707  						pos += skip
   708  						var flags base.SeekGEFlags
   709  						if useNext {
   710  							flags = flags.EnableTrySeekUsingNext()
   711  						}
   712  						if pos >= keyCount {
   713  							pos = 0
   714  							flags = flags.DisableTrySeekUsingNext()
   715  						}
   716  						// SeekPrefixGE will return keys[pos].
   717  						m.SeekPrefixGE(keys[pos], keys[pos], flags)
   718  					}
   719  					b.StopTimer()
   720  					m.Close()
   721  				})
   722  		}
   723  	}
   724  	for i := range readers {
   725  		for j := range readers[i] {
   726  			readers[i][j].Close()
   727  		}
   728  	}
   729  }