github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/external_iterator_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/cockroachdb/datadriven"
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble/internal/base"
    17  	"github.com/cockroachdb/pebble/internal/cache"
    18  	"github.com/cockroachdb/pebble/internal/testkeys"
    19  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    20  	"github.com/cockroachdb/pebble/sstable"
    21  	"github.com/cockroachdb/pebble/vfs"
    22  	"github.com/stretchr/testify/require"
    23  	"golang.org/x/exp/rand"
    24  )
    25  
    26  func TestExternalIterator(t *testing.T) {
    27  	mem := vfs.NewMem()
    28  	o := &Options{
    29  		FS:                 mem,
    30  		Comparer:           testkeys.Comparer,
    31  		FormatMajorVersion: FormatRangeKeys,
    32  	}
    33  	o.EnsureDefaults()
    34  	d, err := Open("", o)
    35  	require.NoError(t, err)
    36  	defer func() { require.NoError(t, d.Close()) }()
    37  
    38  	datadriven.RunTest(t, "testdata/external_iterator", func(t *testing.T, td *datadriven.TestData) string {
    39  		switch td.Cmd {
    40  		case "reset":
    41  			mem = vfs.NewMem()
    42  			return ""
    43  		case "build":
    44  			if err := runBuildCmd(td, d, mem); err != nil {
    45  				return err.Error()
    46  			}
    47  			return ""
    48  		case "iter":
    49  			opts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges}
    50  			var externalIterOpts []ExternalIterOption
    51  			var files [][]sstable.ReadableFile
    52  			for _, arg := range td.CmdArgs {
    53  				switch arg.Key {
    54  				case "fwd-only":
    55  					externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{})
    56  				case "mask-suffix":
    57  					opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0])
    58  				case "lower":
    59  					opts.LowerBound = []byte(arg.Vals[0])
    60  				case "upper":
    61  					opts.UpperBound = []byte(arg.Vals[0])
    62  				case "files":
    63  					for _, v := range arg.Vals {
    64  						f, err := mem.Open(v)
    65  						require.NoError(t, err)
    66  						files = append(files, []sstable.ReadableFile{f})
    67  					}
    68  				}
    69  			}
    70  			it, err := NewExternalIter(o, &opts, files, externalIterOpts...)
    71  			require.NoError(t, err)
    72  			return runIterCmd(td, it, true /* close iter */)
    73  		default:
    74  			return fmt.Sprintf("unknown command: %s", td.Cmd)
    75  		}
    76  	})
    77  }
    78  
    79  func TestSimpleLevelIter(t *testing.T) {
    80  	mem := vfs.NewMem()
    81  	o := &Options{
    82  		FS:                 mem,
    83  		Comparer:           testkeys.Comparer,
    84  		FormatMajorVersion: FormatRangeKeys,
    85  	}
    86  	o.EnsureDefaults()
    87  	d, err := Open("", o)
    88  	require.NoError(t, err)
    89  	defer func() { require.NoError(t, d.Close()) }()
    90  
    91  	datadriven.RunTest(t, "testdata/simple_level_iter", func(t *testing.T, td *datadriven.TestData) string {
    92  		switch td.Cmd {
    93  		case "reset":
    94  			mem = vfs.NewMem()
    95  			return ""
    96  		case "build":
    97  			if err := runBuildCmd(td, d, mem); err != nil {
    98  				return err.Error()
    99  			}
   100  			return ""
   101  		case "iter":
   102  			var files []sstable.ReadableFile
   103  			var filenames []string
   104  			td.ScanArgs(t, "files", &filenames)
   105  			for _, name := range filenames {
   106  				f, err := mem.Open(name)
   107  				require.NoError(t, err)
   108  				files = append(files, f)
   109  			}
   110  			readers, err := openExternalTables(o, files, 0, o.MakeReaderOptions())
   111  			require.NoError(t, err)
   112  			defer func() {
   113  				for i := range readers {
   114  					_ = readers[i].Close()
   115  				}
   116  			}()
   117  			var internalIters []internalIterator
   118  			for i := range readers {
   119  				iter, err := readers[i].NewIter(nil, nil)
   120  				require.NoError(t, err)
   121  				internalIters = append(internalIters, iter)
   122  			}
   123  			it := &simpleLevelIter{cmp: o.Comparer.Compare, iters: internalIters}
   124  			it.init(IterOptions{})
   125  
   126  			response := runInternalIterCmd(t, td, it)
   127  			require.NoError(t, it.Close())
   128  			return response
   129  		default:
   130  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   131  		}
   132  	})
   133  }
   134  
   135  func TestSimpleIterError(t *testing.T) {
   136  	s := simpleLevelIter{cmp: DefaultComparer.Compare, iters: []internalIterator{&errorIter{err: errors.New("injected")}}}
   137  	s.init(IterOptions{})
   138  	defer s.Close()
   139  
   140  	iterKey, _ := s.First()
   141  	require.Nil(t, iterKey)
   142  	require.Error(t, s.Error())
   143  }
   144  
   145  func TestIterRandomizedMaybeFilteredKeys(t *testing.T) {
   146  	mem := vfs.NewMem()
   147  
   148  	seed := *seed
   149  	if seed == 0 {
   150  		seed = uint64(time.Now().UnixNano())
   151  		t.Logf("seed: %d", seed)
   152  	}
   153  	rng := rand.New(rand.NewSource(seed))
   154  	numKeys := 100 + rng.Intn(5000)
   155  	// The block property filter will exclude keys with suffixes [0, tsSeparator-1].
   156  	// We use the first "part" of the keyspace below to write keys >= tsSeparator,
   157  	// and the second part to write keys < tsSeparator. Successive parts (if any)
   158  	// will contain keys at random before or after the separator.
   159  	tsSeparator := 10 + rng.Int63n(5000)
   160  	const keyLen = 5
   161  
   162  	// We split the keyspace into logical "parts" which are disjoint slices of the
   163  	// keyspace. That is, the keyspace a-z could be comprised of parts {a-k, l-z}.
   164  	// We rely on this partitioning when generating timestamps to give us some
   165  	// predictable clustering of timestamps in sstable blocks, however it is not
   166  	// strictly necessary for this test.
   167  	alpha := testkeys.Alpha(keyLen)
   168  	numParts := rng.Intn(3) + 2
   169  	blockSize := 16 + rng.Intn(64)
   170  
   171  	c := cache.New(128 << 20)
   172  	defer c.Unref()
   173  
   174  	for fileIdx, twoLevelIndex := range []bool{false, true} {
   175  		t.Run(fmt.Sprintf("twoLevelIndex=%v", twoLevelIndex), func(t *testing.T) {
   176  			keys := make([][]byte, 0, numKeys)
   177  
   178  			filename := fmt.Sprintf("test-%d", fileIdx)
   179  			f0, err := mem.Create(filename)
   180  			require.NoError(t, err)
   181  
   182  			indexBlockSize := 4096
   183  			if twoLevelIndex {
   184  				indexBlockSize = 1
   185  			}
   186  			w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{
   187  				BlockSize:      blockSize,
   188  				Comparer:       testkeys.Comparer,
   189  				IndexBlockSize: indexBlockSize,
   190  				TableFormat:    sstable.TableFormatPebblev2,
   191  				BlockPropertyCollectors: []func() BlockPropertyCollector{
   192  					func() BlockPropertyCollector {
   193  						return sstable.NewTestKeysBlockPropertyCollector()
   194  					},
   195  				},
   196  			})
   197  			buf := make([]byte, alpha.MaxLen()+testkeys.MaxSuffixLen)
   198  			valBuf := make([]byte, 20)
   199  			keyIdx := int64(0)
   200  			for i := 0; i < numParts; i++ {
   201  				// The first two parts of the keyspace are special. The first one has
   202  				// all keys with timestamps greater than tsSeparator, while the second
   203  				// one has all keys with timestamps less than tsSeparator. Any additional
   204  				// keys could have timestamps at random before or after the tsSeparator.
   205  				maxKeysPerPart := numKeys / numParts
   206  				for j := 0; j < maxKeysPerPart; j++ {
   207  					var ts int64
   208  					if i == 0 {
   209  						ts = rng.Int63n(5000) + tsSeparator
   210  					} else if i == 1 {
   211  						ts = rng.Int63n(tsSeparator)
   212  					} else {
   213  						ts = rng.Int63n(tsSeparator + 5000)
   214  					}
   215  					n := testkeys.WriteKeyAt(buf, alpha, keyIdx*alpha.Count()/int64(numKeys), ts)
   216  					keys = append(keys, append([]byte(nil), buf[:n]...))
   217  					randStr(valBuf, rng)
   218  					require.NoError(t, w.Set(buf[:n], valBuf))
   219  					keyIdx++
   220  				}
   221  			}
   222  			require.NoError(t, w.Close())
   223  
   224  			// Re-open that filename for reading.
   225  			f1, err := mem.Open(filename)
   226  			require.NoError(t, err)
   227  
   228  			readable, err := sstable.NewSimpleReadable(f1)
   229  			require.NoError(t, err)
   230  
   231  			r, err := sstable.NewReader(readable, sstable.ReaderOptions{
   232  				Cache:    c,
   233  				Comparer: testkeys.Comparer,
   234  			})
   235  			require.NoError(t, err)
   236  			defer r.Close()
   237  
   238  			filter := sstable.NewTestKeysBlockPropertyFilter(uint64(tsSeparator), math.MaxUint64)
   239  			filterer, err := sstable.IntersectsTable([]BlockPropertyFilter{filter}, nil, r.Properties.UserProperties)
   240  			require.NoError(t, err)
   241  			require.NotNil(t, filterer)
   242  
   243  			var iter sstable.Iterator
   244  			iter, err = r.NewIterWithBlockPropertyFilters(
   245  				nil, nil, filterer, false /* useFilterBlock */, nil, /* stats */
   246  				sstable.TrivialReaderProvider{Reader: r})
   247  			require.NoError(t, err)
   248  			defer iter.Close()
   249  			var lastSeekKey, lowerBound, upperBound []byte
   250  			narrowBoundsMode := false
   251  
   252  			for i := 0; i < 10000; i++ {
   253  				if rng.Intn(8) == 0 {
   254  					// Toggle narrow bounds mode.
   255  					if narrowBoundsMode {
   256  						// Reset bounds.
   257  						lowerBound, upperBound = nil, nil
   258  						iter.SetBounds(nil /* lower */, nil /* upper */)
   259  					}
   260  					narrowBoundsMode = !narrowBoundsMode
   261  				}
   262  				keyIdx := rng.Intn(len(keys))
   263  				seekKey := keys[keyIdx]
   264  				if narrowBoundsMode {
   265  					// Case 1: We just entered narrow bounds mode, and both bounds
   266  					// are nil. Set a lower/upper bound.
   267  					//
   268  					// Case 2: The seek key is outside our last bounds.
   269  					//
   270  					// In either case, pick a narrow range of keys to set bounds on,
   271  					// let's say keys[keyIdx-5] and keys[keyIdx+5], before doing our
   272  					// seek operation. Picking narrow bounds increases the chance of
   273  					// monotonic bound changes.
   274  					cmp := testkeys.Comparer.Compare
   275  					case1 := lowerBound == nil && upperBound == nil
   276  					case2 := (lowerBound != nil && cmp(lowerBound, seekKey) > 0) || (upperBound != nil && cmp(upperBound, seekKey) <= 0)
   277  					if case1 || case2 {
   278  						lowerBound = nil
   279  						if keyIdx-5 >= 0 {
   280  							lowerBound = keys[keyIdx-5]
   281  						}
   282  						upperBound = nil
   283  						if keyIdx+5 < len(keys) {
   284  							upperBound = keys[keyIdx+5]
   285  						}
   286  						iter.SetBounds(lowerBound, upperBound)
   287  					}
   288  					// Case 3: The current seek key is within the previously-set bounds.
   289  					// No need to change bounds.
   290  				}
   291  				flags := base.SeekGEFlagsNone
   292  				if lastSeekKey != nil && bytes.Compare(seekKey, lastSeekKey) > 0 {
   293  					flags = flags.EnableTrySeekUsingNext()
   294  				}
   295  				lastSeekKey = append(lastSeekKey[:0], seekKey...)
   296  
   297  				newKey, _ := iter.SeekGE(seekKey, flags)
   298  				if newKey == nil || !bytes.Equal(newKey.UserKey, seekKey) {
   299  					// We skipped some keys. Check if maybeFilteredKeys is true.
   300  					formattedNewKey := "<nil>"
   301  					if newKey != nil {
   302  						formattedNewKey = fmt.Sprintf("%s", testkeys.Comparer.FormatKey(newKey.UserKey))
   303  					}
   304  					require.True(t, iter.MaybeFilteredKeys(), "seeked for key = %s, got key = %s indicating block property filtering but MaybeFilteredKeys = false", testkeys.Comparer.FormatKey(seekKey), formattedNewKey)
   305  				}
   306  			}
   307  		})
   308  	}
   309  }
   310  
   311  func BenchmarkExternalIter_NonOverlapping_SeekNextScan(b *testing.B) {
   312  	ks := testkeys.Alpha(6)
   313  	opts := (&Options{}).EnsureDefaults()
   314  	iterOpts := &IterOptions{
   315  		KeyTypes: IterKeyTypePointsAndRanges,
   316  	}
   317  	writeOpts := opts.MakeWriterOptions(6, sstable.TableFormatPebblev2)
   318  
   319  	for _, keyCount := range []int{100, 10_000, 100_000} {
   320  		b.Run(fmt.Sprintf("keys=%d", keyCount), func(b *testing.B) {
   321  			for _, fileCount := range []int{1, 10, 100} {
   322  				b.Run(fmt.Sprintf("files=%d", fileCount), func(b *testing.B) {
   323  					var fs vfs.FS = vfs.NewMem()
   324  					filenames := make([]string, fileCount)
   325  					var keys [][]byte
   326  					for i := 0; i < fileCount; i++ {
   327  						filename := fmt.Sprintf("%03d.sst", i)
   328  						wf, err := fs.Create(filename)
   329  						require.NoError(b, err)
   330  						w := sstable.NewWriter(objstorageprovider.NewFileWritable(wf), writeOpts)
   331  						for j := 0; j < keyCount/fileCount; j++ {
   332  							key := testkeys.Key(ks, int64(len(keys)))
   333  							keys = append(keys, key)
   334  							require.NoError(b, w.Set(key, key))
   335  						}
   336  						require.NoError(b, w.Close())
   337  						filenames[i] = filename
   338  					}
   339  
   340  					for _, forwardOnly := range []bool{false, true} {
   341  						b.Run(fmt.Sprintf("forward-only=%t", forwardOnly), func(b *testing.B) {
   342  							var externalIterOpts []ExternalIterOption
   343  							if forwardOnly {
   344  								externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{})
   345  							}
   346  
   347  							for i := 0; i < b.N; i++ {
   348  								func() {
   349  									files := make([][]sstable.ReadableFile, fileCount)
   350  									for i := 0; i < fileCount; i++ {
   351  										f, err := fs.Open(filenames[i])
   352  										require.NoError(b, err)
   353  										files[i] = []sstable.ReadableFile{f}
   354  									}
   355  
   356  									it, err := NewExternalIter(opts, iterOpts, files, externalIterOpts...)
   357  									require.NoError(b, err)
   358  									defer it.Close()
   359  
   360  									for k := 0; k+1 < len(keys); k += 2 {
   361  										if !it.SeekGE(keys[k]) {
   362  											b.Fatalf("key %q not found", keys[k])
   363  										}
   364  										if !it.Next() {
   365  											b.Fatalf("key %q not found", keys[k+1])
   366  										}
   367  										if !bytes.Equal(it.Key(), keys[k+1]) {
   368  											b.Fatalf("expected key %q, found %q", keys[k+1], it.Key())
   369  										}
   370  									}
   371  								}()
   372  							}
   373  						})
   374  					}
   375  				})
   376  			}
   377  		})
   378  	}
   379  }