github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/merging_iter_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"strings"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/cockroachdb/datadriven"
    15  	"github.com/cockroachdb/pebble/bloom"
    16  	"github.com/cockroachdb/pebble/internal/base"
    17  	"github.com/cockroachdb/pebble/internal/keyspan"
    18  	"github.com/cockroachdb/pebble/internal/manifest"
    19  	"github.com/cockroachdb/pebble/internal/rangedel"
    20  	"github.com/cockroachdb/pebble/internal/testkeys"
    21  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    22  	"github.com/cockroachdb/pebble/sstable"
    23  	"github.com/cockroachdb/pebble/vfs"
    24  	"github.com/stretchr/testify/require"
    25  	"golang.org/x/exp/rand"
    26  )
    27  
    28  func TestMergingIter(t *testing.T) {
    29  	var stats base.InternalIteratorStats
    30  	newFunc := func(iters ...internalIterator) internalIterator {
    31  		return newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
    32  			func(a []byte) int { return len(a) }, iters...)
    33  	}
    34  	testIterator(t, newFunc, func(r *rand.Rand) [][]string {
    35  		// Shuffle testKeyValuePairs into one or more splits. Each individual
    36  		// split is in increasing order, but different splits may overlap in
    37  		// range. Some of the splits may be empty.
    38  		splits := make([][]string, 1+r.Intn(2+len(testKeyValuePairs)))
    39  		for _, kv := range testKeyValuePairs {
    40  			j := r.Intn(len(splits))
    41  			splits[j] = append(splits[j], kv)
    42  		}
    43  		return splits
    44  	})
    45  }
    46  
    47  func TestMergingIterSeek(t *testing.T) {
    48  	var def string
    49  	datadriven.RunTest(t, "testdata/merging_iter_seek", func(t *testing.T, d *datadriven.TestData) string {
    50  		switch d.Cmd {
    51  		case "define":
    52  			def = d.Input
    53  			return ""
    54  
    55  		case "iter":
    56  			var iters []internalIterator
    57  			for _, line := range strings.Split(def, "\n") {
    58  				f := &fakeIter{}
    59  				for _, key := range strings.Fields(line) {
    60  					j := strings.Index(key, ":")
    61  					f.keys = append(f.keys, base.ParseInternalKey(key[:j]))
    62  					f.vals = append(f.vals, []byte(key[j+1:]))
    63  				}
    64  				iters = append(iters, f)
    65  			}
    66  
    67  			var stats base.InternalIteratorStats
    68  			iter := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
    69  				func(a []byte) int { return len(a) }, iters...)
    70  			defer iter.Close()
    71  			return runInternalIterCmd(t, d, iter)
    72  
    73  		default:
    74  			return fmt.Sprintf("unknown command: %s", d.Cmd)
    75  		}
    76  	})
    77  }
    78  
    79  func TestMergingIterNextPrev(t *testing.T) {
    80  	// The data is the same in each of these cases, but divided up amongst the
    81  	// iterators differently. This data must match the definition in
    82  	// testdata/internal_iter_next.
    83  	iterCases := [][]string{
    84  		{
    85  			"a.SET.2:2 a.SET.1:1 b.SET.2:2 b.SET.1:1 c.SET.2:2 c.SET.1:1",
    86  		},
    87  		{
    88  			"a.SET.2:2 b.SET.2:2 c.SET.2:2",
    89  			"a.SET.1:1 b.SET.1:1 c.SET.1:1",
    90  		},
    91  		{
    92  			"a.SET.2:2 b.SET.2:2",
    93  			"a.SET.1:1 b.SET.1:1",
    94  			"c.SET.2:2 c.SET.1:1",
    95  		},
    96  		{
    97  			"a.SET.2:2",
    98  			"a.SET.1:1",
    99  			"b.SET.2:2",
   100  			"b.SET.1:1",
   101  			"c.SET.2:2",
   102  			"c.SET.1:1",
   103  		},
   104  	}
   105  
   106  	for _, c := range iterCases {
   107  		t.Run("", func(t *testing.T) {
   108  			datadriven.RunTest(t, "testdata/internal_iter_next", func(t *testing.T, d *datadriven.TestData) string {
   109  				switch d.Cmd {
   110  				case "define":
   111  					// Ignore. We've defined the iterator data above.
   112  					return ""
   113  
   114  				case "iter":
   115  					iters := make([]internalIterator, len(c))
   116  					for i := range c {
   117  						f := &fakeIter{}
   118  						iters[i] = f
   119  						for _, key := range strings.Fields(c[i]) {
   120  							j := strings.Index(key, ":")
   121  							f.keys = append(f.keys, base.ParseInternalKey(key[:j]))
   122  							f.vals = append(f.vals, []byte(key[j+1:]))
   123  						}
   124  					}
   125  
   126  					var stats base.InternalIteratorStats
   127  					iter := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   128  						func(a []byte) int { return len(a) }, iters...)
   129  					defer iter.Close()
   130  					return runInternalIterCmd(t, d, iter)
   131  
   132  				default:
   133  					return fmt.Sprintf("unknown command: %s", d.Cmd)
   134  				}
   135  			})
   136  		})
   137  	}
   138  }
   139  
   140  func TestMergingIterCornerCases(t *testing.T) {
   141  	memFS := vfs.NewMem()
   142  	cmp := DefaultComparer.Compare
   143  	fmtKey := DefaultComparer.FormatKey
   144  	opts := (*Options)(nil).EnsureDefaults()
   145  	var v *version
   146  
   147  	// Indexed by fileNum.
   148  	var readers []*sstable.Reader
   149  	defer func() {
   150  		for _, r := range readers {
   151  			r.Close()
   152  		}
   153  	}()
   154  
   155  	var fileNum base.FileNum
   156  	newIters :=
   157  		func(_ context.Context, file *manifest.FileMetadata, opts *IterOptions, iio internalIterOpts,
   158  		) (internalIterator, keyspan.FragmentIterator, error) {
   159  			r := readers[file.FileNum]
   160  			rangeDelIter, err := r.NewRawRangeDelIter()
   161  			if err != nil {
   162  				return nil, nil, err
   163  			}
   164  			iter, err := r.NewIterWithBlockPropertyFilters(
   165  				opts.GetLowerBound(), opts.GetUpperBound(), nil, true /* useFilterBlock */, iio.stats,
   166  				sstable.TrivialReaderProvider{Reader: r})
   167  			if err != nil {
   168  				return nil, nil, err
   169  			}
   170  			return iter, rangeDelIter, nil
   171  		}
   172  
   173  	datadriven.RunTest(t, "testdata/merging_iter", func(t *testing.T, d *datadriven.TestData) string {
   174  		switch d.Cmd {
   175  		case "define":
   176  			lines := strings.Split(d.Input, "\n")
   177  
   178  			var files [numLevels][]*fileMetadata
   179  			var level int
   180  			for i := 0; i < len(lines); i++ {
   181  				line := lines[i]
   182  				line = strings.TrimSpace(line)
   183  				if line == "L" || line == "L0" {
   184  					// start next level
   185  					level++
   186  					continue
   187  				}
   188  				keys := strings.Fields(line)
   189  				smallestKey := base.ParseInternalKey(keys[0])
   190  				largestKey := base.ParseInternalKey(keys[1])
   191  				m := (&fileMetadata{
   192  					FileNum: fileNum,
   193  				}).ExtendPointKeyBounds(cmp, smallestKey, largestKey)
   194  				m.InitPhysicalBacking()
   195  				files[level] = append(files[level], m)
   196  
   197  				i++
   198  				line = lines[i]
   199  				line = strings.TrimSpace(line)
   200  				name := fmt.Sprint(fileNum)
   201  				fileNum++
   202  				f, err := memFS.Create(name)
   203  				if err != nil {
   204  					return err.Error()
   205  				}
   206  				w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{})
   207  				var tombstones []keyspan.Span
   208  				frag := keyspan.Fragmenter{
   209  					Cmp:    cmp,
   210  					Format: fmtKey,
   211  					Emit: func(fragmented keyspan.Span) {
   212  						tombstones = append(tombstones, fragmented)
   213  					},
   214  				}
   215  				keyvalues := strings.Fields(line)
   216  				for _, kv := range keyvalues {
   217  					j := strings.Index(kv, ":")
   218  					ikey := base.ParseInternalKey(kv[:j])
   219  					value := []byte(kv[j+1:])
   220  					switch ikey.Kind() {
   221  					case InternalKeyKindRangeDelete:
   222  						frag.Add(keyspan.Span{Start: ikey.UserKey, End: value, Keys: []keyspan.Key{{Trailer: ikey.Trailer}}})
   223  					default:
   224  						if err := w.Add(ikey, value); err != nil {
   225  							return err.Error()
   226  						}
   227  					}
   228  				}
   229  				frag.Finish()
   230  				for _, v := range tombstones {
   231  					if err := rangedel.Encode(&v, w.Add); err != nil {
   232  						return err.Error()
   233  					}
   234  				}
   235  				if err := w.Close(); err != nil {
   236  					return err.Error()
   237  				}
   238  				f, err = memFS.Open(name)
   239  				if err != nil {
   240  					return err.Error()
   241  				}
   242  				readable, err := sstable.NewSimpleReadable(f)
   243  				if err != nil {
   244  					return err.Error()
   245  				}
   246  				r, err := sstable.NewReader(readable, sstable.ReaderOptions{})
   247  				if err != nil {
   248  					return err.Error()
   249  				}
   250  				readers = append(readers, r)
   251  			}
   252  
   253  			v = newVersion(opts, files)
   254  			return v.String()
   255  		case "iter":
   256  			levelIters := make([]mergingIterLevel, 0, len(v.Levels))
   257  			var stats base.InternalIteratorStats
   258  			for i, l := range v.Levels {
   259  				slice := l.Slice()
   260  				if slice.Empty() {
   261  					continue
   262  				}
   263  				li := &levelIter{}
   264  				li.init(context.Background(), IterOptions{}, testkeys.Comparer,
   265  					newIters, slice.Iter(), manifest.Level(i), internalIterOpts{stats: &stats})
   266  				i := len(levelIters)
   267  				levelIters = append(levelIters, mergingIterLevel{iter: li})
   268  				li.initRangeDel(&levelIters[i].rangeDelIter)
   269  				li.initBoundaryContext(&levelIters[i].levelIterBoundaryContext)
   270  			}
   271  			miter := &mergingIter{}
   272  			miter.init(nil /* opts */, &stats, cmp, func(a []byte) int { return len(a) }, levelIters...)
   273  			defer miter.Close()
   274  			miter.forceEnableSeekOpt = true
   275  			return runInternalIterCmd(t, d, miter, iterCmdVerboseKey, iterCmdStats(&stats))
   276  		default:
   277  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   278  		}
   279  	})
   280  }
   281  
   282  func buildMergingIterTables(
   283  	b *testing.B, blockSize, restartInterval, count int,
   284  ) ([]*sstable.Reader, [][]byte, func()) {
   285  	mem := vfs.NewMem()
   286  	files := make([]vfs.File, count)
   287  	for i := range files {
   288  		f, err := mem.Create(fmt.Sprintf("bench%d", i))
   289  		if err != nil {
   290  			b.Fatal(err)
   291  		}
   292  		files[i] = f
   293  	}
   294  
   295  	writers := make([]*sstable.Writer, len(files))
   296  	for i := range files {
   297  		writers[i] = sstable.NewWriter(objstorageprovider.NewFileWritable(files[i]), sstable.WriterOptions{
   298  			BlockRestartInterval: restartInterval,
   299  			BlockSize:            blockSize,
   300  			Compression:          NoCompression,
   301  		})
   302  	}
   303  
   304  	estimatedSize := func() uint64 {
   305  		var sum uint64
   306  		for _, w := range writers {
   307  			sum += w.EstimatedSize()
   308  		}
   309  		return sum
   310  	}
   311  
   312  	var keys [][]byte
   313  	var ikey InternalKey
   314  	targetSize := uint64(count * (2 << 20))
   315  	for i := 0; estimatedSize() < targetSize; i++ {
   316  		key := []byte(fmt.Sprintf("%08d", i))
   317  		keys = append(keys, key)
   318  		ikey.UserKey = key
   319  		j := rand.Intn(len(writers))
   320  		w := writers[j]
   321  		w.Add(ikey, nil)
   322  	}
   323  
   324  	for _, w := range writers {
   325  		if err := w.Close(); err != nil {
   326  			b.Fatal(err)
   327  		}
   328  	}
   329  
   330  	opts := sstable.ReaderOptions{Cache: NewCache(128 << 20)}
   331  	defer opts.Cache.Unref()
   332  
   333  	readers := make([]*sstable.Reader, len(files))
   334  	for i := range files {
   335  		f, err := mem.Open(fmt.Sprintf("bench%d", i))
   336  		if err != nil {
   337  			b.Fatal(err)
   338  		}
   339  		readable, err := sstable.NewSimpleReadable(f)
   340  		if err != nil {
   341  			b.Fatal(err)
   342  		}
   343  		readers[i], err = sstable.NewReader(readable, opts)
   344  		if err != nil {
   345  			b.Fatal(err)
   346  		}
   347  	}
   348  	return readers, keys, func() {
   349  		for _, r := range readers {
   350  			require.NoError(b, r.Close())
   351  		}
   352  	}
   353  }
   354  
   355  func BenchmarkMergingIterSeekGE(b *testing.B) {
   356  	const blockSize = 32 << 10
   357  
   358  	for _, restartInterval := range []int{16} {
   359  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   360  			func(b *testing.B) {
   361  				for _, count := range []int{1, 2, 3, 4, 5} {
   362  					b.Run(fmt.Sprintf("count=%d", count),
   363  						func(b *testing.B) {
   364  							readers, keys, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   365  							defer cleanup()
   366  							iters := make([]internalIterator, len(readers))
   367  							for i := range readers {
   368  								var err error
   369  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   370  								require.NoError(b, err)
   371  							}
   372  							var stats base.InternalIteratorStats
   373  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   374  								func(a []byte) int { return len(a) }, iters...)
   375  							rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   376  
   377  							b.ResetTimer()
   378  							for i := 0; i < b.N; i++ {
   379  								m.SeekGE(keys[rng.Intn(len(keys))], base.SeekGEFlagsNone)
   380  							}
   381  							m.Close()
   382  						})
   383  				}
   384  			})
   385  	}
   386  }
   387  
   388  func BenchmarkMergingIterNext(b *testing.B) {
   389  	const blockSize = 32 << 10
   390  
   391  	for _, restartInterval := range []int{16} {
   392  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   393  			func(b *testing.B) {
   394  				for _, count := range []int{1, 2, 3, 4, 5} {
   395  					b.Run(fmt.Sprintf("count=%d", count),
   396  						func(b *testing.B) {
   397  							readers, _, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   398  							defer cleanup()
   399  							iters := make([]internalIterator, len(readers))
   400  							for i := range readers {
   401  								var err error
   402  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   403  								require.NoError(b, err)
   404  							}
   405  							var stats base.InternalIteratorStats
   406  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   407  								func(a []byte) int { return len(a) }, iters...)
   408  
   409  							b.ResetTimer()
   410  							for i := 0; i < b.N; i++ {
   411  								key, _ := m.Next()
   412  								if key == nil {
   413  									key, _ = m.First()
   414  								}
   415  								_ = key
   416  							}
   417  							m.Close()
   418  						})
   419  				}
   420  			})
   421  	}
   422  }
   423  
   424  func BenchmarkMergingIterPrev(b *testing.B) {
   425  	const blockSize = 32 << 10
   426  
   427  	for _, restartInterval := range []int{16} {
   428  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   429  			func(b *testing.B) {
   430  				for _, count := range []int{1, 2, 3, 4, 5} {
   431  					b.Run(fmt.Sprintf("count=%d", count),
   432  						func(b *testing.B) {
   433  							readers, _, cleanup := buildMergingIterTables(b, blockSize, restartInterval, count)
   434  							defer cleanup()
   435  							iters := make([]internalIterator, len(readers))
   436  							for i := range readers {
   437  								var err error
   438  								iters[i], err = readers[i].NewIter(nil /* lower */, nil /* upper */)
   439  								require.NoError(b, err)
   440  							}
   441  							var stats base.InternalIteratorStats
   442  							m := newMergingIter(nil /* logger */, &stats, DefaultComparer.Compare,
   443  								func(a []byte) int { return len(a) }, iters...)
   444  
   445  							b.ResetTimer()
   446  							for i := 0; i < b.N; i++ {
   447  								key, _ := m.Prev()
   448  								if key == nil {
   449  									key, _ = m.Last()
   450  								}
   451  								_ = key
   452  							}
   453  							m.Close()
   454  						})
   455  				}
   456  			})
   457  	}
   458  }
   459  
   460  // Builds levels for BenchmarkMergingIterSeqSeekGEWithBounds. The lowest level,
   461  // index 0 here, contains most of the data. Each level has 2 files, to allow for
   462  // stepping into the second file if needed. The lowest level has all the keys in
   463  // the file 0, and a single "lastIKey" in file 1. File 0 in all other levels have
   464  // only the first and last key of file 0 of the aforementioned level -- this
   465  // simulates sparseness of data, but not necessarily of file width, in higher
   466  // levels. File 1 in other levels is similar to File 1 in the aforementioned level
   467  // since it is only for stepping into. If writeRangeTombstoneToLowestLevel is
   468  // true, a range tombstone is written to the first lowest level file that
   469  // deletes all the keys in it, and no other levels should be written.
   470  func buildLevelsForMergingIterSeqSeek(
   471  	b *testing.B,
   472  	blockSize, restartInterval, levelCount int,
   473  	keyOffset int,
   474  	writeRangeTombstoneToLowestLevel bool,
   475  	writeBloomFilters bool,
   476  	forceTwoLevelIndex bool,
   477  ) (readers [][]*sstable.Reader, levelSlices []manifest.LevelSlice, keys [][]byte) {
   478  	mem := vfs.NewMem()
   479  	if writeRangeTombstoneToLowestLevel && levelCount != 1 {
   480  		panic("expect to write only 1 level")
   481  	}
   482  	files := make([][]vfs.File, levelCount)
   483  	for i := range files {
   484  		for j := 0; j < 2; j++ {
   485  			f, err := mem.Create(fmt.Sprintf("bench%d_%d", i, j))
   486  			if err != nil {
   487  				b.Fatal(err)
   488  			}
   489  			files[i] = append(files[i], f)
   490  		}
   491  	}
   492  
   493  	const targetL6FirstFileSize = 2 << 20
   494  	writers := make([][]*sstable.Writer, levelCount)
   495  	// A policy unlikely to have false positives.
   496  	filterPolicy := bloom.FilterPolicy(100)
   497  	for i := range files {
   498  		for j := range files[i] {
   499  			writerOptions := sstable.WriterOptions{
   500  				BlockRestartInterval: restartInterval,
   501  				BlockSize:            blockSize,
   502  				Compression:          NoCompression,
   503  			}
   504  			if writeBloomFilters {
   505  				writerOptions.FilterPolicy = filterPolicy
   506  				writerOptions.FilterType = base.TableFilter
   507  			}
   508  			if forceTwoLevelIndex {
   509  				if i == 0 && j == 0 {
   510  					// Ignoring compression, approximate number of blocks
   511  					numDataBlocks := targetL6FirstFileSize / blockSize
   512  					if numDataBlocks < 4 {
   513  						b.Fatalf("cannot produce two level index")
   514  					}
   515  					// Produce ~2 lower-level index blocks.
   516  					writerOptions.IndexBlockSize = (numDataBlocks / 2) * 8
   517  				} else if j == 0 {
   518  					// Only 2 keys in these files, so to produce two level indexes we
   519  					// set the block sizes to 1.
   520  					writerOptions.BlockSize = 1
   521  					writerOptions.IndexBlockSize = 1
   522  				}
   523  			}
   524  			writers[i] = append(writers[i], sstable.NewWriter(objstorageprovider.NewFileWritable(files[i][j]), writerOptions))
   525  		}
   526  	}
   527  
   528  	i := keyOffset
   529  	w := writers[0][0]
   530  	for ; w.EstimatedSize() < targetL6FirstFileSize; i++ {
   531  		key := []byte(fmt.Sprintf("%08d", i))
   532  		keys = append(keys, key)
   533  		ikey := base.MakeInternalKey(key, 0, InternalKeyKindSet)
   534  		w.Add(ikey, nil)
   535  	}
   536  	if writeRangeTombstoneToLowestLevel {
   537  		tombstoneKey := base.MakeInternalKey(keys[0], 1, InternalKeyKindRangeDelete)
   538  		w.Add(tombstoneKey, []byte(fmt.Sprintf("%08d", i)))
   539  	}
   540  	for j := 1; j < len(files); j++ {
   541  		for _, k := range []int{0, len(keys) - 1} {
   542  			ikey := base.MakeInternalKey(keys[k], uint64(j), InternalKeyKindSet)
   543  			writers[j][0].Add(ikey, nil)
   544  		}
   545  	}
   546  	lastKey := []byte(fmt.Sprintf("%08d", i))
   547  	keys = append(keys, lastKey)
   548  	for j := 0; j < len(files); j++ {
   549  		lastIKey := base.MakeInternalKey(lastKey, uint64(j), InternalKeyKindSet)
   550  		writers[j][1].Add(lastIKey, nil)
   551  	}
   552  	for _, levelWriters := range writers {
   553  		for j, w := range levelWriters {
   554  			if err := w.Close(); err != nil {
   555  				b.Fatal(err)
   556  			}
   557  			meta, err := w.Metadata()
   558  			require.NoError(b, err)
   559  			if forceTwoLevelIndex && j == 0 && meta.Properties.IndexType != 2 {
   560  				b.Fatalf("did not produce two level index")
   561  			}
   562  		}
   563  	}
   564  
   565  	opts := sstable.ReaderOptions{Cache: NewCache(128 << 20), Comparer: DefaultComparer}
   566  	if writeBloomFilters {
   567  		opts.Filters = make(map[string]FilterPolicy)
   568  		opts.Filters[filterPolicy.Name()] = filterPolicy
   569  	}
   570  	defer opts.Cache.Unref()
   571  
   572  	readers = make([][]*sstable.Reader, levelCount)
   573  	for i := range files {
   574  		for j := range files[i] {
   575  			f, err := mem.Open(fmt.Sprintf("bench%d_%d", i, j))
   576  			if err != nil {
   577  				b.Fatal(err)
   578  			}
   579  			readable, err := sstable.NewSimpleReadable(f)
   580  			if err != nil {
   581  				b.Fatal(err)
   582  			}
   583  			r, err := sstable.NewReader(readable, opts)
   584  			if err != nil {
   585  				b.Fatal(err)
   586  			}
   587  			readers[i] = append(readers[i], r)
   588  		}
   589  	}
   590  	levelSlices = make([]manifest.LevelSlice, levelCount)
   591  	for i := range readers {
   592  		meta := make([]*fileMetadata, len(readers[i]))
   593  		for j := range readers[i] {
   594  			iter, err := readers[i][j].NewIter(nil /* lower */, nil /* upper */)
   595  			require.NoError(b, err)
   596  			smallest, _ := iter.First()
   597  			meta[j] = &fileMetadata{}
   598  			// The same FileNum is being reused across different levels, which
   599  			// is harmless for the benchmark since each level has its own iterator
   600  			// creation func.
   601  			meta[j].FileNum = FileNum(j)
   602  			largest, _ := iter.Last()
   603  			meta[j].ExtendPointKeyBounds(opts.Comparer.Compare, smallest.Clone(), largest.Clone())
   604  			meta[j].InitPhysicalBacking()
   605  		}
   606  		levelSlices[i] = manifest.NewLevelSliceSpecificOrder(meta)
   607  	}
   608  	return readers, levelSlices, keys
   609  }
   610  
   611  func buildMergingIter(readers [][]*sstable.Reader, levelSlices []manifest.LevelSlice) *mergingIter {
   612  	mils := make([]mergingIterLevel, len(levelSlices))
   613  	for i := len(readers) - 1; i >= 0; i-- {
   614  		levelIndex := i
   615  		level := len(readers) - 1 - i
   616  		newIters := func(
   617  			_ context.Context, file *manifest.FileMetadata, opts *IterOptions, _ internalIterOpts,
   618  		) (internalIterator, keyspan.FragmentIterator, error) {
   619  			iter, err := readers[levelIndex][file.FileNum].NewIter(
   620  				opts.LowerBound, opts.UpperBound)
   621  			if err != nil {
   622  				return nil, nil, err
   623  			}
   624  			rdIter, err := readers[levelIndex][file.FileNum].NewRawRangeDelIter()
   625  			if err != nil {
   626  				iter.Close()
   627  				return nil, nil, err
   628  			}
   629  			return iter, rdIter, err
   630  		}
   631  		l := newLevelIter(IterOptions{}, testkeys.Comparer, newIters, levelSlices[i].Iter(),
   632  			manifest.Level(level), internalIterOpts{})
   633  		l.initRangeDel(&mils[level].rangeDelIter)
   634  		l.initBoundaryContext(&mils[level].levelIterBoundaryContext)
   635  		mils[level].iter = l
   636  	}
   637  	var stats base.InternalIteratorStats
   638  	m := &mergingIter{}
   639  	m.init(nil /* logger */, &stats, testkeys.Comparer.Compare,
   640  		func(a []byte) int { return len(a) }, mils...)
   641  	return m
   642  }
   643  
   644  // A benchmark that simulates the behavior of a mergingIter where
   645  // monotonically increasing narrow bounds are repeatedly set and used to Seek
   646  // and then iterate over the keys within the bounds. This resembles MVCC
   647  // scanning by CockroachDB when doing a lookup/index join with a large number
   648  // of left rows, that are batched and reuse the same iterator, and which can
   649  // have good locality of access. This results in the successive bounds being
   650  // in the same file.
   651  func BenchmarkMergingIterSeqSeekGEWithBounds(b *testing.B) {
   652  	const blockSize = 32 << 10
   653  
   654  	restartInterval := 16
   655  	for _, levelCount := range []int{5} {
   656  		b.Run(fmt.Sprintf("levelCount=%d", levelCount),
   657  			func(b *testing.B) {
   658  				readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek(
   659  					b, blockSize, restartInterval, levelCount, 0 /* keyOffset */, false, false, false)
   660  				m := buildMergingIter(readers, levelSlices)
   661  				keyCount := len(keys)
   662  				b.ResetTimer()
   663  				for i := 0; i < b.N; i++ {
   664  					pos := i % (keyCount - 1)
   665  					m.SetBounds(keys[pos], keys[pos+1])
   666  					// SeekGE will return keys[pos].
   667  					k, _ := m.SeekGE(keys[pos], base.SeekGEFlagsNone)
   668  					for k != nil {
   669  						k, _ = m.Next()
   670  					}
   671  				}
   672  				m.Close()
   673  				for i := range readers {
   674  					for j := range readers[i] {
   675  						readers[i][j].Close()
   676  					}
   677  				}
   678  			})
   679  	}
   680  }
   681  
   682  func BenchmarkMergingIterSeqSeekPrefixGE(b *testing.B) {
   683  	const blockSize = 32 << 10
   684  	const restartInterval = 16
   685  	const levelCount = 5
   686  	readers, levelSlices, keys := buildLevelsForMergingIterSeqSeek(
   687  		b, blockSize, restartInterval, levelCount, 0 /* keyOffset */, false, false, false)
   688  
   689  	for _, skip := range []int{1, 2, 4, 8, 16} {
   690  		for _, useNext := range []bool{false, true} {
   691  			b.Run(fmt.Sprintf("skip=%d/use-next=%t", skip, useNext),
   692  				func(b *testing.B) {
   693  					m := buildMergingIter(readers, levelSlices)
   694  					keyCount := len(keys)
   695  					pos := 0
   696  
   697  					m.SeekPrefixGE(keys[pos], keys[pos], base.SeekGEFlagsNone)
   698  					b.ResetTimer()
   699  					for i := 0; i < b.N; i++ {
   700  						pos += skip
   701  						var flags base.SeekGEFlags
   702  						if useNext {
   703  							flags = flags.EnableTrySeekUsingNext()
   704  						}
   705  						if pos >= keyCount {
   706  							pos = 0
   707  							flags = flags.DisableTrySeekUsingNext()
   708  						}
   709  						// SeekPrefixGE will return keys[pos].
   710  						m.SeekPrefixGE(keys[pos], keys[pos], flags)
   711  					}
   712  					b.StopTimer()
   713  					m.Close()
   714  				})
   715  		}
   716  	}
   717  	for i := range readers {
   718  		for j := range readers[i] {
   719  			readers[i][j].Close()
   720  		}
   721  	}
   722  }