github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/scan_internal_test.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"math"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/errors"
    18  	"github.com/cockroachdb/pebble/bloom"
    19  	"github.com/cockroachdb/pebble/internal/base"
    20  	"github.com/cockroachdb/pebble/internal/keyspan"
    21  	"github.com/cockroachdb/pebble/internal/rangekey"
    22  	"github.com/cockroachdb/pebble/internal/testkeys"
    23  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    24  	"github.com/cockroachdb/pebble/objstorage/remote"
    25  	"github.com/cockroachdb/pebble/sstable"
    26  	"github.com/cockroachdb/pebble/vfs"
    27  	"github.com/stretchr/testify/require"
    28  )
    29  
    30  func TestScanStatistics(t *testing.T) {
    31  	var d *DB
    32  	type scanInternalReader interface {
    33  		ScanStatistics(
    34  			ctx context.Context,
    35  			lower, upper []byte,
    36  			opts ScanStatisticsOptions,
    37  		) (LSMKeyStatistics, error)
    38  	}
    39  	batches := map[string]*Batch{}
    40  	snaps := map[string]*Snapshot{}
    41  	ctx := context.TODO()
    42  
    43  	getOpts := func() *Options {
    44  		opts := &Options{
    45  			FS:                 vfs.NewMem(),
    46  			Logger:             testLogger{t: t},
    47  			Comparer:           testkeys.Comparer,
    48  			FormatMajorVersion: FormatRangeKeys,
    49  			BlockPropertyCollectors: []func() BlockPropertyCollector{
    50  				sstable.NewTestKeysBlockPropertyCollector,
    51  			},
    52  		}
    53  		opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
    54  			"": remote.NewInMem(),
    55  		})
    56  		opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
    57  		opts.Experimental.CreateOnSharedLocator = ""
    58  		opts.DisableAutomaticCompactions = true
    59  		opts.EnsureDefaults()
    60  		opts.WithFSDefaults()
    61  		return opts
    62  	}
    63  	cleanup := func() (err error) {
    64  		for key, batch := range batches {
    65  			err = firstError(err, batch.Close())
    66  			delete(batches, key)
    67  		}
    68  		for key, snap := range snaps {
    69  			err = firstError(err, snap.Close())
    70  			delete(snaps, key)
    71  		}
    72  		if d != nil {
    73  			err = firstError(err, d.Close())
    74  			d = nil
    75  		}
    76  		return err
    77  	}
    78  	defer cleanup()
    79  
    80  	datadriven.RunTest(t, "testdata/scan_statistics", func(t *testing.T, td *datadriven.TestData) string {
    81  		switch td.Cmd {
    82  		case "reset":
    83  			if err := cleanup(); err != nil {
    84  				t.Fatal(err)
    85  				return err.Error()
    86  			}
    87  			var err error
    88  			d, err = Open("", getOpts())
    89  			require.NoError(t, err)
    90  			require.NoError(t, d.SetCreatorID(1))
    91  			return ""
    92  		case "snapshot":
    93  			s := d.NewSnapshot()
    94  			var name string
    95  			td.ScanArgs(t, "name", &name)
    96  			snaps[name] = s
    97  			return ""
    98  		case "batch":
    99  			var name string
   100  			td.MaybeScanArgs(t, "name", &name)
   101  			commit := td.HasArg("commit")
   102  			b := d.NewIndexedBatch()
   103  			require.NoError(t, runBatchDefineCmd(td, b))
   104  			var err error
   105  			if commit {
   106  				func() {
   107  					defer func() {
   108  						if r := recover(); r != nil {
   109  							err = errors.New(r.(string))
   110  						}
   111  					}()
   112  					err = b.Commit(nil)
   113  				}()
   114  			} else if name != "" {
   115  				batches[name] = b
   116  			}
   117  			if err != nil {
   118  				return err.Error()
   119  			}
   120  			count := b.Count()
   121  			if commit {
   122  				return fmt.Sprintf("committed %d keys\n", count)
   123  			}
   124  			return fmt.Sprintf("wrote %d keys to batch %q\n", count, name)
   125  		case "compact":
   126  			if err := runCompactCmd(td, d); err != nil {
   127  				return err.Error()
   128  			}
   129  			return runLSMCmd(td, d)
   130  		case "flush":
   131  			err := d.Flush()
   132  			if err != nil {
   133  				return err.Error()
   134  			}
   135  			return ""
   136  		case "commit":
   137  			name := pluckStringCmdArg(td, "batch")
   138  			b := batches[name]
   139  			defer b.Close()
   140  			count := b.Count()
   141  			require.NoError(t, d.Apply(b, nil))
   142  			delete(batches, name)
   143  			return fmt.Sprintf("committed %d keys\n", count)
   144  		case "scan-statistics":
   145  			var lower, upper []byte
   146  			var reader scanInternalReader = d
   147  			var b strings.Builder
   148  			var showSnapshotPinned = false
   149  			var keyKindsToDisplay []InternalKeyKind
   150  			var showLevels []string
   151  
   152  			for _, arg := range td.CmdArgs {
   153  				switch arg.Key {
   154  				case "lower":
   155  					lower = []byte(arg.Vals[0])
   156  				case "upper":
   157  					upper = []byte(arg.Vals[0])
   158  				case "show-snapshot-pinned":
   159  					showSnapshotPinned = true
   160  				case "keys":
   161  					for _, key := range arg.Vals {
   162  						keyKindsToDisplay = append(keyKindsToDisplay, base.ParseKind(key))
   163  					}
   164  				case "levels":
   165  					showLevels = append(showLevels, arg.Vals...)
   166  				default:
   167  				}
   168  			}
   169  			stats, err := reader.ScanStatistics(ctx, lower, upper, ScanStatisticsOptions{})
   170  			if err != nil {
   171  				return err.Error()
   172  			}
   173  
   174  			for _, level := range showLevels {
   175  				lvl, err := strconv.Atoi(level)
   176  				if err != nil || lvl >= numLevels {
   177  					return fmt.Sprintf("invalid level %s", level)
   178  				}
   179  
   180  				fmt.Fprintf(&b, "Level %d:\n", lvl)
   181  				if showSnapshotPinned {
   182  					fmt.Fprintf(&b, "  compaction pinned count: %d\n", stats.Levels[lvl].SnapshotPinnedKeys)
   183  				}
   184  				for _, kind := range keyKindsToDisplay {
   185  					fmt.Fprintf(&b, "  %s key count: %d\n", kind.String(), stats.Levels[lvl].KindsCount[kind])
   186  					if stats.Levels[lvl].LatestKindsCount[kind] > 0 {
   187  						fmt.Fprintf(&b, "  %s latest count: %d\n", kind.String(), stats.Levels[lvl].LatestKindsCount[kind])
   188  					}
   189  				}
   190  			}
   191  
   192  			fmt.Fprintf(&b, "Aggregate:\n")
   193  			if showSnapshotPinned {
   194  				fmt.Fprintf(&b, "  snapshot pinned count: %d\n", stats.Accumulated.SnapshotPinnedKeys)
   195  			}
   196  			for _, kind := range keyKindsToDisplay {
   197  				fmt.Fprintf(&b, "  %s key count: %d\n", kind.String(), stats.Accumulated.KindsCount[kind])
   198  				if stats.Accumulated.LatestKindsCount[kind] > 0 {
   199  					fmt.Fprintf(&b, "  %s latest count: %d\n", kind.String(), stats.Accumulated.LatestKindsCount[kind])
   200  				}
   201  			}
   202  			return b.String()
   203  		default:
   204  			return fmt.Sprintf("unknown command %q", td.Cmd)
   205  		}
   206  	})
   207  }
   208  
   209  func TestScanInternal(t *testing.T) {
   210  	var d *DB
   211  	type scanInternalReader interface {
   212  		ScanInternal(
   213  			ctx context.Context,
   214  			lower, upper []byte,
   215  			visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error,
   216  			visitRangeDel func(start, end []byte, seqNum uint64) error,
   217  			visitRangeKey func(start, end []byte, keys []keyspan.Key) error,
   218  			visitSharedFile func(sst *SharedSSTMeta) error,
   219  		) error
   220  	}
   221  	batches := map[string]*Batch{}
   222  	snaps := map[string]*Snapshot{}
   223  	efos := map[string]*EventuallyFileOnlySnapshot{}
   224  	parseOpts := func(td *datadriven.TestData) (*Options, error) {
   225  		opts := &Options{
   226  			FS:                 vfs.NewMem(),
   227  			Logger:             testLogger{t: t},
   228  			Comparer:           testkeys.Comparer,
   229  			FormatMajorVersion: FormatVirtualSSTables,
   230  			BlockPropertyCollectors: []func() BlockPropertyCollector{
   231  				sstable.NewTestKeysBlockPropertyCollector,
   232  			},
   233  		}
   234  		opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
   235  			"": remote.NewInMem(),
   236  		})
   237  		opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
   238  		opts.Experimental.CreateOnSharedLocator = ""
   239  		opts.DisableAutomaticCompactions = true
   240  		opts.EnsureDefaults()
   241  		opts.WithFSDefaults()
   242  
   243  		for _, cmdArg := range td.CmdArgs {
   244  			switch cmdArg.Key {
   245  			case "format-major-version":
   246  				v, err := strconv.Atoi(cmdArg.Vals[0])
   247  				if err != nil {
   248  					return nil, err
   249  				}
   250  				// Override the DB version.
   251  				opts.FormatMajorVersion = FormatMajorVersion(v)
   252  			case "block-size":
   253  				v, err := strconv.Atoi(cmdArg.Vals[0])
   254  				if err != nil {
   255  					return nil, err
   256  				}
   257  				for i := range opts.Levels {
   258  					opts.Levels[i].BlockSize = v
   259  				}
   260  			case "index-block-size":
   261  				v, err := strconv.Atoi(cmdArg.Vals[0])
   262  				if err != nil {
   263  					return nil, err
   264  				}
   265  				for i := range opts.Levels {
   266  					opts.Levels[i].IndexBlockSize = v
   267  				}
   268  			case "target-file-size":
   269  				v, err := strconv.Atoi(cmdArg.Vals[0])
   270  				if err != nil {
   271  					return nil, err
   272  				}
   273  				for i := range opts.Levels {
   274  					opts.Levels[i].TargetFileSize = int64(v)
   275  				}
   276  			case "bloom-bits-per-key":
   277  				v, err := strconv.Atoi(cmdArg.Vals[0])
   278  				if err != nil {
   279  					return nil, err
   280  				}
   281  				fp := bloom.FilterPolicy(v)
   282  				opts.Filters = map[string]FilterPolicy{fp.Name(): fp}
   283  				for i := range opts.Levels {
   284  					opts.Levels[i].FilterPolicy = fp
   285  				}
   286  			case "merger":
   287  				switch cmdArg.Vals[0] {
   288  				case "appender":
   289  					opts.Merger = base.DefaultMerger
   290  				default:
   291  					return nil, errors.Newf("unrecognized Merger %q\n", cmdArg.Vals[0])
   292  				}
   293  			}
   294  		}
   295  		return opts, nil
   296  	}
   297  	cleanup := func() (err error) {
   298  		for key, batch := range batches {
   299  			err = firstError(err, batch.Close())
   300  			delete(batches, key)
   301  		}
   302  		for key, snap := range snaps {
   303  			err = firstError(err, snap.Close())
   304  			delete(snaps, key)
   305  		}
   306  		for key, es := range efos {
   307  			err = firstError(err, es.Close())
   308  			delete(efos, key)
   309  		}
   310  		if d != nil {
   311  			err = firstError(err, d.Close())
   312  			d = nil
   313  		}
   314  		return err
   315  	}
   316  	defer cleanup()
   317  
   318  	datadriven.RunTest(t, "testdata/scan_internal", func(t *testing.T, td *datadriven.TestData) string {
   319  		switch td.Cmd {
   320  		case "define":
   321  			if err := cleanup(); err != nil {
   322  				return err.Error()
   323  			}
   324  			opts, err := parseOpts(td)
   325  			if err != nil {
   326  				return err.Error()
   327  			}
   328  			d, err = runDBDefineCmd(td, opts)
   329  			if err != nil {
   330  				return err.Error()
   331  			}
   332  			return runLSMCmd(td, d)
   333  
   334  		case "reset":
   335  			if err := cleanup(); err != nil {
   336  				t.Fatal(err)
   337  				return err.Error()
   338  			}
   339  			opts, err := parseOpts(td)
   340  			if err != nil {
   341  				t.Fatal(err)
   342  				return err.Error()
   343  			}
   344  
   345  			d, err = Open("", opts)
   346  			require.NoError(t, err)
   347  			require.NoError(t, d.SetCreatorID(1))
   348  			return ""
   349  		case "snapshot":
   350  			s := d.NewSnapshot()
   351  			var name string
   352  			td.ScanArgs(t, "name", &name)
   353  			snaps[name] = s
   354  			return ""
   355  		case "wait-for-file-only-snapshot":
   356  			if len(td.CmdArgs) != 1 {
   357  				panic("insufficient args for file-only-snapshot command")
   358  			}
   359  			name := td.CmdArgs[0].Key
   360  			es := efos[name]
   361  			if err := es.WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond); err != nil {
   362  				return err.Error()
   363  			}
   364  			return "ok"
   365  		case "file-only-snapshot":
   366  			if len(td.CmdArgs) != 1 {
   367  				panic("insufficient args for file-only-snapshot command")
   368  			}
   369  			name := td.CmdArgs[0].Key
   370  			var keyRanges []KeyRange
   371  			for _, line := range strings.Split(td.Input, "\n") {
   372  				fields := strings.Fields(line)
   373  				if len(fields) != 2 {
   374  					return "expected two fields for file-only snapshot KeyRanges"
   375  				}
   376  				kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])}
   377  				keyRanges = append(keyRanges, kr)
   378  			}
   379  
   380  			s := d.NewEventuallyFileOnlySnapshot(keyRanges)
   381  			efos[name] = s
   382  			return "ok"
   383  		case "batch":
   384  			var name string
   385  			td.MaybeScanArgs(t, "name", &name)
   386  			commit := td.HasArg("commit")
   387  			ingest := td.HasArg("ingest")
   388  			b := d.NewIndexedBatch()
   389  			require.NoError(t, runBatchDefineCmd(td, b))
   390  			var err error
   391  			if commit {
   392  				func() {
   393  					defer func() {
   394  						if r := recover(); r != nil {
   395  							err = errors.New(r.(string))
   396  						}
   397  					}()
   398  					err = b.Commit(nil)
   399  				}()
   400  			} else if ingest {
   401  				points, rangeDels, rangeKeys := batchSort(b)
   402  				file, err := d.opts.FS.Create("temp0.sst")
   403  				require.NoError(t, err)
   404  				w := sstable.NewWriter(objstorageprovider.NewFileWritable(file), d.opts.MakeWriterOptions(0, sstable.TableFormatPebblev4))
   405  				for span := rangeDels.First(); span != nil; span = rangeDels.Next() {
   406  					require.NoError(t, w.DeleteRange(span.Start, span.End))
   407  				}
   408  				rangeDels.Close()
   409  				for span := rangeKeys.First(); span != nil; span = rangeKeys.Next() {
   410  					keys := []keyspan.Key{}
   411  					for i := range span.Keys {
   412  						keys = append(keys, span.Keys[i])
   413  						keys[i].Trailer = base.MakeTrailer(0, keys[i].Kind())
   414  					}
   415  					keyspan.SortKeysByTrailer(&keys)
   416  					newSpan := &keyspan.Span{Start: span.Start, End: span.End, Keys: keys}
   417  					rangekey.Encode(newSpan, w.AddRangeKey)
   418  				}
   419  				rangeKeys.Close()
   420  				for key, val := points.First(); key != nil; key, val = points.Next() {
   421  					var value []byte
   422  					value, _, err = val.Value(value)
   423  					require.NoError(t, err)
   424  					require.NoError(t, w.Add(*key, value))
   425  				}
   426  				points.Close()
   427  				require.NoError(t, w.Close())
   428  				require.NoError(t, d.Ingest([]string{"temp0.sst"}))
   429  			} else if name != "" {
   430  				batches[name] = b
   431  			}
   432  			if err != nil {
   433  				return err.Error()
   434  			}
   435  			count := b.Count()
   436  			if commit {
   437  				return fmt.Sprintf("committed %d keys\n", count)
   438  			}
   439  			return fmt.Sprintf("wrote %d keys to batch %q\n", count, name)
   440  		case "compact":
   441  			if err := runCompactCmd(td, d); err != nil {
   442  				return err.Error()
   443  			}
   444  			return runLSMCmd(td, d)
   445  		case "flush":
   446  			err := d.Flush()
   447  			if err != nil {
   448  				return err.Error()
   449  			}
   450  			return ""
   451  		case "lsm":
   452  			return runLSMCmd(td, d)
   453  		case "commit":
   454  			name := pluckStringCmdArg(td, "batch")
   455  			b := batches[name]
   456  			defer b.Close()
   457  			count := b.Count()
   458  			require.NoError(t, d.Apply(b, nil))
   459  			delete(batches, name)
   460  			return fmt.Sprintf("committed %d keys\n", count)
   461  		case "scan-internal":
   462  			var lower, upper []byte
   463  			var reader scanInternalReader = d
   464  			var b strings.Builder
   465  			var fileVisitor func(sst *SharedSSTMeta) error
   466  			for _, arg := range td.CmdArgs {
   467  				switch arg.Key {
   468  				case "lower":
   469  					lower = []byte(arg.Vals[0])
   470  				case "upper":
   471  					upper = []byte(arg.Vals[0])
   472  				case "snapshot":
   473  					name := arg.Vals[0]
   474  					snap, ok := snaps[name]
   475  					if !ok {
   476  						return fmt.Sprintf("no snapshot found for name %s", name)
   477  					}
   478  					reader = snap
   479  				case "file-only-snapshot":
   480  					name := arg.Vals[0]
   481  					efos, ok := efos[name]
   482  					if !ok {
   483  						return fmt.Sprintf("no snapshot found for name %s", name)
   484  					}
   485  					reader = efos
   486  				case "skip-shared":
   487  					fileVisitor = func(sst *SharedSSTMeta) error {
   488  						fmt.Fprintf(&b, "shared file: %s [%s-%s] [point=%s-%s] [range=%s-%s]\n", sst.fileNum, sst.Smallest.String(), sst.Largest.String(), sst.SmallestPointKey.String(), sst.LargestPointKey.String(), sst.SmallestRangeKey.String(), sst.LargestRangeKey.String())
   489  						return nil
   490  					}
   491  				}
   492  			}
   493  			err := reader.ScanInternal(context.TODO(), lower, upper,
   494  				func(key *InternalKey, value LazyValue, _ IteratorLevel) error {
   495  					v := value.InPlaceValue()
   496  					fmt.Fprintf(&b, "%s (%s)\n", key, v)
   497  					return nil
   498  				},
   499  				func(start, end []byte, seqNum uint64) error {
   500  					fmt.Fprintf(&b, "%s-%s#%d,RANGEDEL\n", start, end, seqNum)
   501  					return nil
   502  				},
   503  				func(start, end []byte, keys []keyspan.Key) error {
   504  					s := keyspan.Span{Start: start, End: end, Keys: keys}
   505  					fmt.Fprintf(&b, "%s\n", s.String())
   506  					return nil
   507  				},
   508  				fileVisitor,
   509  			)
   510  			if err != nil {
   511  				return err.Error()
   512  			}
   513  			return b.String()
   514  		default:
   515  			return fmt.Sprintf("unknown command %q", td.Cmd)
   516  		}
   517  	})
   518  }
   519  
   520  func TestPointCollapsingIter(t *testing.T) {
   521  	var def string
   522  	datadriven.RunTest(t, "testdata/point_collapsing_iter", func(t *testing.T, d *datadriven.TestData) string {
   523  		switch d.Cmd {
   524  		case "define":
   525  			def = d.Input
   526  			return ""
   527  
   528  		case "iter":
   529  			f := &fakeIter{}
   530  			var spans []keyspan.Span
   531  			for _, line := range strings.Split(def, "\n") {
   532  				for _, key := range strings.Fields(line) {
   533  					j := strings.Index(key, ":")
   534  					k := base.ParseInternalKey(key[:j])
   535  					v := []byte(key[j+1:])
   536  					if k.Kind() == InternalKeyKindRangeDelete {
   537  						spans = append(spans, keyspan.Span{
   538  							Start:     k.UserKey,
   539  							End:       v,
   540  							Keys:      []keyspan.Key{{Trailer: k.Trailer}},
   541  							KeysOrder: 0,
   542  						})
   543  						continue
   544  					}
   545  					f.keys = append(f.keys, k)
   546  					f.vals = append(f.vals, v)
   547  				}
   548  			}
   549  
   550  			ksIter := keyspan.NewIter(base.DefaultComparer.Compare, spans)
   551  			pcIter := &pointCollapsingIterator{
   552  				comparer: base.DefaultComparer,
   553  				merge:    base.DefaultMerger.Merge,
   554  				seqNum:   math.MaxUint64,
   555  			}
   556  			pcIter.iter.Init(base.DefaultComparer, f, ksIter, keyspan.InterleavingIterOpts{})
   557  			defer pcIter.Close()
   558  
   559  			return runInternalIterCmd(t, d, pcIter, iterCmdVerboseKey)
   560  
   561  		default:
   562  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   563  		}
   564  	})
   565  }