github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/external_iterator_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/cockroachdb/datadriven"
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble/internal/base"
    17  	"github.com/cockroachdb/pebble/internal/cache"
    18  	"github.com/cockroachdb/pebble/internal/itertest"
    19  	"github.com/cockroachdb/pebble/internal/testkeys"
    20  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    21  	"github.com/cockroachdb/pebble/sstable"
    22  	"github.com/cockroachdb/pebble/vfs"
    23  	"github.com/stretchr/testify/require"
    24  	"golang.org/x/exp/rand"
    25  )
    26  
    27  func TestExternalIterator(t *testing.T) {
    28  	mem := vfs.NewMem()
    29  	o := &Options{
    30  		FS:                 mem,
    31  		Comparer:           testkeys.Comparer,
    32  		FormatMajorVersion: FormatRangeKeys,
    33  	}
    34  	o.EnsureDefaults()
    35  	d, err := Open("", o)
    36  	require.NoError(t, err)
    37  	defer func() { require.NoError(t, d.Close()) }()
    38  
    39  	datadriven.RunTest(t, "testdata/external_iterator", func(t *testing.T, td *datadriven.TestData) string {
    40  		switch td.Cmd {
    41  		case "reset":
    42  			mem = vfs.NewMem()
    43  			return ""
    44  		case "build":
    45  			if err := runBuildCmd(td, d, mem); err != nil {
    46  				return err.Error()
    47  			}
    48  			return ""
    49  		case "iter":
    50  			opts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges}
    51  			var externalIterOpts []ExternalIterOption
    52  			var files [][]sstable.ReadableFile
    53  			for _, arg := range td.CmdArgs {
    54  				switch arg.Key {
    55  				case "fwd-only":
    56  					externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{})
    57  				case "mask-suffix":
    58  					opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0])
    59  				case "lower":
    60  					opts.LowerBound = []byte(arg.Vals[0])
    61  				case "upper":
    62  					opts.UpperBound = []byte(arg.Vals[0])
    63  				case "files":
    64  					for _, v := range arg.Vals {
    65  						f, err := mem.Open(v)
    66  						require.NoError(t, err)
    67  						files = append(files, []sstable.ReadableFile{f})
    68  					}
    69  				}
    70  			}
    71  			it, err := NewExternalIter(o, &opts, files, externalIterOpts...)
    72  			require.NoError(t, err)
    73  			return runIterCmd(td, it, true /* close iter */)
    74  		default:
    75  			return fmt.Sprintf("unknown command: %s", td.Cmd)
    76  		}
    77  	})
    78  }
    79  
    80  func TestSimpleLevelIter(t *testing.T) {
    81  	mem := vfs.NewMem()
    82  	o := &Options{
    83  		FS:                 mem,
    84  		Comparer:           testkeys.Comparer,
    85  		FormatMajorVersion: FormatRangeKeys,
    86  	}
    87  	o.EnsureDefaults()
    88  	d, err := Open("", o)
    89  	require.NoError(t, err)
    90  	defer func() { require.NoError(t, d.Close()) }()
    91  
    92  	datadriven.RunTest(t, "testdata/simple_level_iter", func(t *testing.T, td *datadriven.TestData) string {
    93  		switch td.Cmd {
    94  		case "reset":
    95  			mem = vfs.NewMem()
    96  			return ""
    97  		case "build":
    98  			if err := runBuildCmd(td, d, mem); err != nil {
    99  				return err.Error()
   100  			}
   101  			return ""
   102  		case "iter":
   103  			var files []sstable.ReadableFile
   104  			var filenames []string
   105  			td.ScanArgs(t, "files", &filenames)
   106  			for _, name := range filenames {
   107  				f, err := mem.Open(name)
   108  				require.NoError(t, err)
   109  				files = append(files, f)
   110  			}
   111  			readers, err := openExternalTables(o, files, 0, o.MakeReaderOptions())
   112  			require.NoError(t, err)
   113  			defer func() {
   114  				for i := range readers {
   115  					_ = readers[i].Close()
   116  				}
   117  			}()
   118  			var internalIters []internalIterator
   119  			for i := range readers {
   120  				iter, err := readers[i].NewIter(nil, nil)
   121  				require.NoError(t, err)
   122  				internalIters = append(internalIters, iter)
   123  			}
   124  			it := &simpleLevelIter{cmp: o.Comparer.Compare, iters: internalIters}
   125  			it.init(IterOptions{})
   126  
   127  			response := itertest.RunInternalIterCmd(t, td, it)
   128  			require.NoError(t, it.Close())
   129  			return response
   130  		default:
   131  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   132  		}
   133  	})
   134  }
   135  
   136  func TestSimpleIterError(t *testing.T) {
   137  	s := simpleLevelIter{cmp: DefaultComparer.Compare, iters: []internalIterator{&errorIter{err: errors.New("injected")}}}
   138  	s.init(IterOptions{})
   139  	defer s.Close()
   140  
   141  	iterKey, _ := s.First()
   142  	require.Nil(t, iterKey)
   143  	require.Error(t, s.Error())
   144  }
   145  
   146  func TestIterRandomizedMaybeFilteredKeys(t *testing.T) {
   147  	mem := vfs.NewMem()
   148  
   149  	seed := *seed
   150  	if seed == 0 {
   151  		seed = uint64(time.Now().UnixNano())
   152  		t.Logf("seed: %d", seed)
   153  	}
   154  	rng := rand.New(rand.NewSource(seed))
   155  	numKeys := 100 + rng.Intn(5000)
   156  	// The block property filter will exclude keys with suffixes [0, tsSeparator-1].
   157  	// We use the first "part" of the keyspace below to write keys >= tsSeparator,
   158  	// and the second part to write keys < tsSeparator. Successive parts (if any)
   159  	// will contain keys at random before or after the separator.
   160  	tsSeparator := 10 + rng.Int63n(5000)
   161  	const keyLen = 5
   162  
   163  	// We split the keyspace into logical "parts" which are disjoint slices of the
   164  	// keyspace. That is, the keyspace a-z could be comprised of parts {a-k, l-z}.
   165  	// We rely on this partitioning when generating timestamps to give us some
   166  	// predictable clustering of timestamps in sstable blocks, however it is not
   167  	// strictly necessary for this test.
   168  	alpha := testkeys.Alpha(keyLen)
   169  	numParts := rng.Intn(3) + 2
   170  	blockSize := 16 + rng.Intn(64)
   171  
   172  	c := cache.New(128 << 20)
   173  	defer c.Unref()
   174  
   175  	for fileIdx, twoLevelIndex := range []bool{false, true} {
   176  		t.Run(fmt.Sprintf("twoLevelIndex=%v", twoLevelIndex), func(t *testing.T) {
   177  			keys := make([][]byte, 0, numKeys)
   178  
   179  			filename := fmt.Sprintf("test-%d", fileIdx)
   180  			f0, err := mem.Create(filename)
   181  			require.NoError(t, err)
   182  
   183  			indexBlockSize := 4096
   184  			if twoLevelIndex {
   185  				indexBlockSize = 1
   186  			}
   187  			w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{
   188  				BlockSize:      blockSize,
   189  				Comparer:       testkeys.Comparer,
   190  				IndexBlockSize: indexBlockSize,
   191  				TableFormat:    sstable.TableFormatPebblev2,
   192  				BlockPropertyCollectors: []func() BlockPropertyCollector{
   193  					func() BlockPropertyCollector {
   194  						return sstable.NewTestKeysBlockPropertyCollector()
   195  					},
   196  				},
   197  			})
   198  			buf := make([]byte, alpha.MaxLen()+testkeys.MaxSuffixLen)
   199  			valBuf := make([]byte, 20)
   200  			keyIdx := int64(0)
   201  			for i := 0; i < numParts; i++ {
   202  				// The first two parts of the keyspace are special. The first one has
   203  				// all keys with timestamps greater than tsSeparator, while the second
   204  				// one has all keys with timestamps less than tsSeparator. Any additional
   205  				// keys could have timestamps at random before or after the tsSeparator.
   206  				maxKeysPerPart := numKeys / numParts
   207  				for j := 0; j < maxKeysPerPart; j++ {
   208  					var ts int64
   209  					if i == 0 {
   210  						ts = rng.Int63n(5000) + tsSeparator
   211  					} else if i == 1 {
   212  						ts = rng.Int63n(tsSeparator)
   213  					} else {
   214  						ts = rng.Int63n(tsSeparator + 5000)
   215  					}
   216  					n := testkeys.WriteKeyAt(buf, alpha, keyIdx*alpha.Count()/int64(numKeys), ts)
   217  					keys = append(keys, append([]byte(nil), buf[:n]...))
   218  					randStr(valBuf, rng)
   219  					require.NoError(t, w.Set(buf[:n], valBuf))
   220  					keyIdx++
   221  				}
   222  			}
   223  			require.NoError(t, w.Close())
   224  
   225  			// Re-open that filename for reading.
   226  			f1, err := mem.Open(filename)
   227  			require.NoError(t, err)
   228  
   229  			readable, err := sstable.NewSimpleReadable(f1)
   230  			require.NoError(t, err)
   231  
   232  			r, err := sstable.NewReader(readable, sstable.ReaderOptions{
   233  				Cache:    c,
   234  				Comparer: testkeys.Comparer,
   235  			})
   236  			require.NoError(t, err)
   237  			defer r.Close()
   238  
   239  			filter := sstable.NewTestKeysBlockPropertyFilter(uint64(tsSeparator), math.MaxUint64)
   240  			filterer, err := sstable.IntersectsTable([]BlockPropertyFilter{filter}, nil, r.Properties.UserProperties)
   241  			require.NoError(t, err)
   242  			require.NotNil(t, filterer)
   243  
   244  			var iter sstable.Iterator
   245  			iter, err = r.NewIterWithBlockPropertyFilters(
   246  				nil, nil, filterer, false /* useFilterBlock */, nil, /* stats */
   247  				sstable.CategoryAndQoS{}, nil, sstable.TrivialReaderProvider{Reader: r})
   248  			require.NoError(t, err)
   249  			defer iter.Close()
   250  			var lastSeekKey, lowerBound, upperBound []byte
   251  			narrowBoundsMode := false
   252  
   253  			for i := 0; i < 10000; i++ {
   254  				if rng.Intn(8) == 0 {
   255  					// Toggle narrow bounds mode.
   256  					if narrowBoundsMode {
   257  						// Reset bounds.
   258  						lowerBound, upperBound = nil, nil
   259  						iter.SetBounds(nil /* lower */, nil /* upper */)
   260  					}
   261  					narrowBoundsMode = !narrowBoundsMode
   262  				}
   263  				keyIdx := rng.Intn(len(keys))
   264  				seekKey := keys[keyIdx]
   265  				if narrowBoundsMode {
   266  					// Case 1: We just entered narrow bounds mode, and both bounds
   267  					// are nil. Set a lower/upper bound.
   268  					//
   269  					// Case 2: The seek key is outside our last bounds.
   270  					//
   271  					// In either case, pick a narrow range of keys to set bounds on,
   272  					// let's say keys[keyIdx-5] and keys[keyIdx+5], before doing our
   273  					// seek operation. Picking narrow bounds increases the chance of
   274  					// monotonic bound changes.
   275  					cmp := testkeys.Comparer.Compare
   276  					case1 := lowerBound == nil && upperBound == nil
   277  					case2 := (lowerBound != nil && cmp(lowerBound, seekKey) > 0) || (upperBound != nil && cmp(upperBound, seekKey) <= 0)
   278  					if case1 || case2 {
   279  						lowerBound = nil
   280  						if keyIdx-5 >= 0 {
   281  							lowerBound = keys[keyIdx-5]
   282  						}
   283  						upperBound = nil
   284  						if keyIdx+5 < len(keys) {
   285  							upperBound = keys[keyIdx+5]
   286  						}
   287  						iter.SetBounds(lowerBound, upperBound)
   288  					}
   289  					// Case 3: The current seek key is within the previously-set bounds.
   290  					// No need to change bounds.
   291  				}
   292  				flags := base.SeekGEFlagsNone
   293  				if lastSeekKey != nil && bytes.Compare(seekKey, lastSeekKey) > 0 {
   294  					flags = flags.EnableTrySeekUsingNext()
   295  				}
   296  				lastSeekKey = append(lastSeekKey[:0], seekKey...)
   297  
   298  				newKey, _ := iter.SeekGE(seekKey, flags)
   299  				if newKey == nil || !bytes.Equal(newKey.UserKey, seekKey) {
   300  					// We skipped some keys. Check if maybeFilteredKeys is true.
   301  					formattedNewKey := "<nil>"
   302  					if newKey != nil {
   303  						formattedNewKey = fmt.Sprintf("%s", testkeys.Comparer.FormatKey(newKey.UserKey))
   304  					}
   305  					require.True(t, iter.MaybeFilteredKeys(), "seeked for key = %s, got key = %s indicating block property filtering but MaybeFilteredKeys = false", testkeys.Comparer.FormatKey(seekKey), formattedNewKey)
   306  				}
   307  			}
   308  		})
   309  	}
   310  }
   311  
   312  func BenchmarkExternalIter_NonOverlapping_SeekNextScan(b *testing.B) {
   313  	ks := testkeys.Alpha(6)
   314  	opts := (&Options{}).EnsureDefaults()
   315  	iterOpts := &IterOptions{
   316  		KeyTypes: IterKeyTypePointsAndRanges,
   317  	}
   318  	writeOpts := opts.MakeWriterOptions(6, sstable.TableFormatPebblev2)
   319  
   320  	for _, keyCount := range []int{100, 10_000, 100_000} {
   321  		b.Run(fmt.Sprintf("keys=%d", keyCount), func(b *testing.B) {
   322  			for _, fileCount := range []int{1, 10, 100} {
   323  				b.Run(fmt.Sprintf("files=%d", fileCount), func(b *testing.B) {
   324  					var fs vfs.FS = vfs.NewMem()
   325  					filenames := make([]string, fileCount)
   326  					var keys [][]byte
   327  					for i := 0; i < fileCount; i++ {
   328  						filename := fmt.Sprintf("%03d.sst", i)
   329  						wf, err := fs.Create(filename)
   330  						require.NoError(b, err)
   331  						w := sstable.NewWriter(objstorageprovider.NewFileWritable(wf), writeOpts)
   332  						for j := 0; j < keyCount/fileCount; j++ {
   333  							key := testkeys.Key(ks, int64(len(keys)))
   334  							keys = append(keys, key)
   335  							require.NoError(b, w.Set(key, key))
   336  						}
   337  						require.NoError(b, w.Close())
   338  						filenames[i] = filename
   339  					}
   340  
   341  					for _, forwardOnly := range []bool{false, true} {
   342  						b.Run(fmt.Sprintf("forward-only=%t", forwardOnly), func(b *testing.B) {
   343  							var externalIterOpts []ExternalIterOption
   344  							if forwardOnly {
   345  								externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{})
   346  							}
   347  
   348  							for i := 0; i < b.N; i++ {
   349  								func() {
   350  									files := make([][]sstable.ReadableFile, fileCount)
   351  									for i := 0; i < fileCount; i++ {
   352  										f, err := fs.Open(filenames[i])
   353  										require.NoError(b, err)
   354  										files[i] = []sstable.ReadableFile{f}
   355  									}
   356  
   357  									it, err := NewExternalIter(opts, iterOpts, files, externalIterOpts...)
   358  									require.NoError(b, err)
   359  									defer it.Close()
   360  
   361  									for k := 0; k+1 < len(keys); k += 2 {
   362  										if !it.SeekGE(keys[k]) {
   363  											b.Fatalf("key %q not found", keys[k])
   364  										}
   365  										if !it.Next() {
   366  											b.Fatalf("key %q not found", keys[k+1])
   367  										}
   368  										if !bytes.Equal(it.Key(), keys[k+1]) {
   369  											b.Fatalf("expected key %q, found %q", keys[k+1], it.Key())
   370  										}
   371  									}
   372  								}()
   373  							}
   374  						})
   375  					}
   376  				})
   377  			}
   378  		})
   379  	}
   380  }