github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/scan_internal_test.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"math"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/errors"
    18  	"github.com/cockroachdb/pebble/bloom"
    19  	"github.com/cockroachdb/pebble/internal/base"
    20  	"github.com/cockroachdb/pebble/internal/itertest"
    21  	"github.com/cockroachdb/pebble/internal/keyspan"
    22  	"github.com/cockroachdb/pebble/internal/rangekey"
    23  	"github.com/cockroachdb/pebble/internal/testkeys"
    24  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    25  	"github.com/cockroachdb/pebble/objstorage/remote"
    26  	"github.com/cockroachdb/pebble/sstable"
    27  	"github.com/cockroachdb/pebble/vfs"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  func TestScanStatistics(t *testing.T) {
    32  	var d *DB
    33  	type scanInternalReader interface {
    34  		ScanStatistics(
    35  			ctx context.Context,
    36  			lower, upper []byte,
    37  			opts ScanStatisticsOptions,
    38  		) (LSMKeyStatistics, error)
    39  	}
    40  	batches := map[string]*Batch{}
    41  	snaps := map[string]*Snapshot{}
    42  	ctx := context.TODO()
    43  
    44  	getOpts := func() *Options {
    45  		opts := &Options{
    46  			FS:                 vfs.NewMem(),
    47  			Logger:             testLogger{t: t},
    48  			Comparer:           testkeys.Comparer,
    49  			FormatMajorVersion: FormatRangeKeys,
    50  			BlockPropertyCollectors: []func() BlockPropertyCollector{
    51  				sstable.NewTestKeysBlockPropertyCollector,
    52  			},
    53  		}
    54  		opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
    55  			"": remote.NewInMem(),
    56  		})
    57  		opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
    58  		opts.Experimental.CreateOnSharedLocator = ""
    59  		opts.DisableAutomaticCompactions = true
    60  		opts.EnsureDefaults()
    61  		opts.WithFSDefaults()
    62  		return opts
    63  	}
    64  	cleanup := func() (err error) {
    65  		for key, batch := range batches {
    66  			err = firstError(err, batch.Close())
    67  			delete(batches, key)
    68  		}
    69  		for key, snap := range snaps {
    70  			err = firstError(err, snap.Close())
    71  			delete(snaps, key)
    72  		}
    73  		if d != nil {
    74  			err = firstError(err, d.Close())
    75  			d = nil
    76  		}
    77  		return err
    78  	}
    79  	defer cleanup()
    80  
    81  	datadriven.RunTest(t, "testdata/scan_statistics", func(t *testing.T, td *datadriven.TestData) string {
    82  		switch td.Cmd {
    83  		case "reset":
    84  			if err := cleanup(); err != nil {
    85  				t.Fatal(err)
    86  				return err.Error()
    87  			}
    88  			var err error
    89  			d, err = Open("", getOpts())
    90  			require.NoError(t, err)
    91  			require.NoError(t, d.SetCreatorID(1))
    92  			return ""
    93  		case "snapshot":
    94  			s := d.NewSnapshot()
    95  			var name string
    96  			td.ScanArgs(t, "name", &name)
    97  			snaps[name] = s
    98  			return ""
    99  		case "batch":
   100  			var name string
   101  			td.MaybeScanArgs(t, "name", &name)
   102  			commit := td.HasArg("commit")
   103  			b := d.NewIndexedBatch()
   104  			require.NoError(t, runBatchDefineCmd(td, b))
   105  			var err error
   106  			if commit {
   107  				func() {
   108  					defer func() {
   109  						if r := recover(); r != nil {
   110  							err = errors.New(r.(string))
   111  						}
   112  					}()
   113  					err = b.Commit(nil)
   114  				}()
   115  			} else if name != "" {
   116  				batches[name] = b
   117  			}
   118  			if err != nil {
   119  				return err.Error()
   120  			}
   121  			count := b.Count()
   122  			if commit {
   123  				return fmt.Sprintf("committed %d keys\n", count)
   124  			}
   125  			return fmt.Sprintf("wrote %d keys to batch %q\n", count, name)
   126  		case "compact":
   127  			if err := runCompactCmd(td, d); err != nil {
   128  				return err.Error()
   129  			}
   130  			return runLSMCmd(td, d)
   131  		case "flush":
   132  			err := d.Flush()
   133  			if err != nil {
   134  				return err.Error()
   135  			}
   136  			return ""
   137  		case "commit":
   138  			name := pluckStringCmdArg(td, "batch")
   139  			b := batches[name]
   140  			defer b.Close()
   141  			count := b.Count()
   142  			require.NoError(t, d.Apply(b, nil))
   143  			delete(batches, name)
   144  			return fmt.Sprintf("committed %d keys\n", count)
   145  		case "scan-statistics":
   146  			var lower, upper []byte
   147  			var reader scanInternalReader = d
   148  			var b strings.Builder
   149  			var showSnapshotPinned = false
   150  			var keyKindsToDisplay []InternalKeyKind
   151  			var showLevels []string
   152  
   153  			for _, arg := range td.CmdArgs {
   154  				switch arg.Key {
   155  				case "lower":
   156  					lower = []byte(arg.Vals[0])
   157  				case "upper":
   158  					upper = []byte(arg.Vals[0])
   159  				case "show-snapshot-pinned":
   160  					showSnapshotPinned = true
   161  				case "keys":
   162  					for _, key := range arg.Vals {
   163  						keyKindsToDisplay = append(keyKindsToDisplay, base.ParseKind(key))
   164  					}
   165  				case "levels":
   166  					showLevels = append(showLevels, arg.Vals...)
   167  				default:
   168  				}
   169  			}
   170  			stats, err := reader.ScanStatistics(ctx, lower, upper, ScanStatisticsOptions{})
   171  			if err != nil {
   172  				return err.Error()
   173  			}
   174  
   175  			for _, level := range showLevels {
   176  				lvl, err := strconv.Atoi(level)
   177  				if err != nil || lvl >= numLevels {
   178  					return fmt.Sprintf("invalid level %s", level)
   179  				}
   180  
   181  				fmt.Fprintf(&b, "Level %d:\n", lvl)
   182  				if showSnapshotPinned {
   183  					fmt.Fprintf(&b, "  compaction pinned count: %d\n", stats.Levels[lvl].SnapshotPinnedKeys)
   184  				}
   185  				for _, kind := range keyKindsToDisplay {
   186  					fmt.Fprintf(&b, "  %s key count: %d\n", kind.String(), stats.Levels[lvl].KindsCount[kind])
   187  					if stats.Levels[lvl].LatestKindsCount[kind] > 0 {
   188  						fmt.Fprintf(&b, "  %s latest count: %d\n", kind.String(), stats.Levels[lvl].LatestKindsCount[kind])
   189  					}
   190  				}
   191  			}
   192  
   193  			fmt.Fprintf(&b, "Aggregate:\n")
   194  			if showSnapshotPinned {
   195  				fmt.Fprintf(&b, "  snapshot pinned count: %d\n", stats.Accumulated.SnapshotPinnedKeys)
   196  			}
   197  			for _, kind := range keyKindsToDisplay {
   198  				fmt.Fprintf(&b, "  %s key count: %d\n", kind.String(), stats.Accumulated.KindsCount[kind])
   199  				if stats.Accumulated.LatestKindsCount[kind] > 0 {
   200  					fmt.Fprintf(&b, "  %s latest count: %d\n", kind.String(), stats.Accumulated.LatestKindsCount[kind])
   201  				}
   202  			}
   203  			return b.String()
   204  		default:
   205  			return fmt.Sprintf("unknown command %q", td.Cmd)
   206  		}
   207  	})
   208  }
   209  
   210  func TestScanInternal(t *testing.T) {
   211  	var d *DB
   212  	type scanInternalReader interface {
   213  		ScanInternal(
   214  			ctx context.Context,
   215  			categoryAndQoS sstable.CategoryAndQoS,
   216  			lower, upper []byte,
   217  			visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error,
   218  			visitRangeDel func(start, end []byte, seqNum uint64) error,
   219  			visitRangeKey func(start, end []byte, keys []keyspan.Key) error,
   220  			visitSharedFile func(sst *SharedSSTMeta) error,
   221  		) error
   222  	}
   223  	batches := map[string]*Batch{}
   224  	snaps := map[string]*Snapshot{}
   225  	efos := map[string]*EventuallyFileOnlySnapshot{}
   226  	parseOpts := func(td *datadriven.TestData) (*Options, error) {
   227  		opts := &Options{
   228  			FS:                 vfs.NewMem(),
   229  			Logger:             testLogger{t: t},
   230  			Comparer:           testkeys.Comparer,
   231  			FormatMajorVersion: FormatVirtualSSTables,
   232  			BlockPropertyCollectors: []func() BlockPropertyCollector{
   233  				sstable.NewTestKeysBlockPropertyCollector,
   234  			},
   235  		}
   236  		opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
   237  			"": remote.NewInMem(),
   238  		})
   239  		opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
   240  		opts.Experimental.CreateOnSharedLocator = ""
   241  		opts.DisableAutomaticCompactions = true
   242  		opts.EnsureDefaults()
   243  		opts.WithFSDefaults()
   244  
   245  		for _, cmdArg := range td.CmdArgs {
   246  			switch cmdArg.Key {
   247  			case "format-major-version":
   248  				v, err := strconv.Atoi(cmdArg.Vals[0])
   249  				if err != nil {
   250  					return nil, err
   251  				}
   252  				// Override the DB version.
   253  				opts.FormatMajorVersion = FormatMajorVersion(v)
   254  			case "block-size":
   255  				v, err := strconv.Atoi(cmdArg.Vals[0])
   256  				if err != nil {
   257  					return nil, err
   258  				}
   259  				for i := range opts.Levels {
   260  					opts.Levels[i].BlockSize = v
   261  				}
   262  			case "index-block-size":
   263  				v, err := strconv.Atoi(cmdArg.Vals[0])
   264  				if err != nil {
   265  					return nil, err
   266  				}
   267  				for i := range opts.Levels {
   268  					opts.Levels[i].IndexBlockSize = v
   269  				}
   270  			case "target-file-size":
   271  				v, err := strconv.Atoi(cmdArg.Vals[0])
   272  				if err != nil {
   273  					return nil, err
   274  				}
   275  				for i := range opts.Levels {
   276  					opts.Levels[i].TargetFileSize = int64(v)
   277  				}
   278  			case "bloom-bits-per-key":
   279  				v, err := strconv.Atoi(cmdArg.Vals[0])
   280  				if err != nil {
   281  					return nil, err
   282  				}
   283  				fp := bloom.FilterPolicy(v)
   284  				opts.Filters = map[string]FilterPolicy{fp.Name(): fp}
   285  				for i := range opts.Levels {
   286  					opts.Levels[i].FilterPolicy = fp
   287  				}
   288  			case "merger":
   289  				switch cmdArg.Vals[0] {
   290  				case "appender":
   291  					opts.Merger = base.DefaultMerger
   292  				default:
   293  					return nil, errors.Newf("unrecognized Merger %q\n", cmdArg.Vals[0])
   294  				}
   295  			}
   296  		}
   297  		return opts, nil
   298  	}
   299  	cleanup := func() (err error) {
   300  		for key, batch := range batches {
   301  			err = firstError(err, batch.Close())
   302  			delete(batches, key)
   303  		}
   304  		for key, snap := range snaps {
   305  			err = firstError(err, snap.Close())
   306  			delete(snaps, key)
   307  		}
   308  		for key, es := range efos {
   309  			err = firstError(err, es.Close())
   310  			delete(efos, key)
   311  		}
   312  		if d != nil {
   313  			err = firstError(err, d.Close())
   314  			d = nil
   315  		}
   316  		return err
   317  	}
   318  	defer cleanup()
   319  
   320  	datadriven.RunTest(t, "testdata/scan_internal", func(t *testing.T, td *datadriven.TestData) string {
   321  		switch td.Cmd {
   322  		case "define":
   323  			if err := cleanup(); err != nil {
   324  				return err.Error()
   325  			}
   326  			opts, err := parseOpts(td)
   327  			if err != nil {
   328  				return err.Error()
   329  			}
   330  			d, err = runDBDefineCmd(td, opts)
   331  			if err != nil {
   332  				return err.Error()
   333  			}
   334  			return runLSMCmd(td, d)
   335  
   336  		case "reset":
   337  			if err := cleanup(); err != nil {
   338  				t.Fatal(err)
   339  				return err.Error()
   340  			}
   341  			opts, err := parseOpts(td)
   342  			if err != nil {
   343  				t.Fatal(err)
   344  				return err.Error()
   345  			}
   346  
   347  			d, err = Open("", opts)
   348  			require.NoError(t, err)
   349  			require.NoError(t, d.SetCreatorID(1))
   350  			return ""
   351  		case "snapshot":
   352  			s := d.NewSnapshot()
   353  			var name string
   354  			td.ScanArgs(t, "name", &name)
   355  			snaps[name] = s
   356  			return ""
   357  		case "wait-for-file-only-snapshot":
   358  			if len(td.CmdArgs) != 1 {
   359  				panic("insufficient args for file-only-snapshot command")
   360  			}
   361  			name := td.CmdArgs[0].Key
   362  			es := efos[name]
   363  			if err := es.WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond); err != nil {
   364  				return err.Error()
   365  			}
   366  			return "ok"
   367  		case "file-only-snapshot":
   368  			if len(td.CmdArgs) != 1 {
   369  				panic("insufficient args for file-only-snapshot command")
   370  			}
   371  			name := td.CmdArgs[0].Key
   372  			var keyRanges []KeyRange
   373  			for _, line := range strings.Split(td.Input, "\n") {
   374  				fields := strings.Fields(line)
   375  				if len(fields) != 2 {
   376  					return "expected two fields for file-only snapshot KeyRanges"
   377  				}
   378  				kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])}
   379  				keyRanges = append(keyRanges, kr)
   380  			}
   381  
   382  			s := d.NewEventuallyFileOnlySnapshot(keyRanges)
   383  			efos[name] = s
   384  			return "ok"
   385  		case "batch":
   386  			var name string
   387  			td.MaybeScanArgs(t, "name", &name)
   388  			commit := td.HasArg("commit")
   389  			ingest := td.HasArg("ingest")
   390  			b := d.NewIndexedBatch()
   391  			require.NoError(t, runBatchDefineCmd(td, b))
   392  			var err error
   393  			if commit {
   394  				func() {
   395  					defer func() {
   396  						if r := recover(); r != nil {
   397  							err = errors.New(r.(string))
   398  						}
   399  					}()
   400  					err = b.Commit(nil)
   401  				}()
   402  			} else if ingest {
   403  				points, rangeDels, rangeKeys := batchSort(b)
   404  				file, err := d.opts.FS.Create("temp0.sst")
   405  				require.NoError(t, err)
   406  				w := sstable.NewWriter(objstorageprovider.NewFileWritable(file), d.opts.MakeWriterOptions(0, sstable.TableFormatPebblev4))
   407  				for span := rangeDels.First(); span != nil; span = rangeDels.Next() {
   408  					require.NoError(t, w.DeleteRange(span.Start, span.End))
   409  				}
   410  				rangeDels.Close()
   411  				for span := rangeKeys.First(); span != nil; span = rangeKeys.Next() {
   412  					keys := []keyspan.Key{}
   413  					for i := range span.Keys {
   414  						keys = append(keys, span.Keys[i])
   415  						keys[i].Trailer = base.MakeTrailer(0, keys[i].Kind())
   416  					}
   417  					keyspan.SortKeysByTrailer(&keys)
   418  					newSpan := &keyspan.Span{Start: span.Start, End: span.End, Keys: keys}
   419  					rangekey.Encode(newSpan, w.AddRangeKey)
   420  				}
   421  				rangeKeys.Close()
   422  				for key, val := points.First(); key != nil; key, val = points.Next() {
   423  					var value []byte
   424  					value, _, err = val.Value(value)
   425  					require.NoError(t, err)
   426  					require.NoError(t, w.Add(*key, value))
   427  				}
   428  				points.Close()
   429  				require.NoError(t, w.Close())
   430  				require.NoError(t, d.Ingest([]string{"temp0.sst"}))
   431  			} else if name != "" {
   432  				batches[name] = b
   433  			}
   434  			if err != nil {
   435  				return err.Error()
   436  			}
   437  			count := b.Count()
   438  			if commit {
   439  				return fmt.Sprintf("committed %d keys\n", count)
   440  			}
   441  			return fmt.Sprintf("wrote %d keys to batch %q\n", count, name)
   442  		case "compact":
   443  			if err := runCompactCmd(td, d); err != nil {
   444  				return err.Error()
   445  			}
   446  			return runLSMCmd(td, d)
   447  		case "flush":
   448  			err := d.Flush()
   449  			if err != nil {
   450  				return err.Error()
   451  			}
   452  			return ""
   453  		case "lsm":
   454  			return runLSMCmd(td, d)
   455  		case "commit":
   456  			name := pluckStringCmdArg(td, "batch")
   457  			b := batches[name]
   458  			defer b.Close()
   459  			count := b.Count()
   460  			require.NoError(t, d.Apply(b, nil))
   461  			delete(batches, name)
   462  			return fmt.Sprintf("committed %d keys\n", count)
   463  		case "scan-internal":
   464  			var lower, upper []byte
   465  			var reader scanInternalReader = d
   466  			var b strings.Builder
   467  			var fileVisitor func(sst *SharedSSTMeta) error
   468  			for _, arg := range td.CmdArgs {
   469  				switch arg.Key {
   470  				case "lower":
   471  					lower = []byte(arg.Vals[0])
   472  				case "upper":
   473  					upper = []byte(arg.Vals[0])
   474  				case "snapshot":
   475  					name := arg.Vals[0]
   476  					snap, ok := snaps[name]
   477  					if !ok {
   478  						return fmt.Sprintf("no snapshot found for name %s", name)
   479  					}
   480  					reader = snap
   481  				case "file-only-snapshot":
   482  					name := arg.Vals[0]
   483  					efos, ok := efos[name]
   484  					if !ok {
   485  						return fmt.Sprintf("no snapshot found for name %s", name)
   486  					}
   487  					reader = efos
   488  				case "skip-shared":
   489  					fileVisitor = func(sst *SharedSSTMeta) error {
   490  						fmt.Fprintf(&b, "shared file: %s [%s-%s] [point=%s-%s] [range=%s-%s]\n", sst.fileNum, sst.Smallest.String(), sst.Largest.String(), sst.SmallestPointKey.String(), sst.LargestPointKey.String(), sst.SmallestRangeKey.String(), sst.LargestRangeKey.String())
   491  						return nil
   492  					}
   493  				}
   494  			}
   495  			err := reader.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, lower, upper,
   496  				func(key *InternalKey, value LazyValue, _ IteratorLevel) error {
   497  					v := value.InPlaceValue()
   498  					fmt.Fprintf(&b, "%s (%s)\n", key, v)
   499  					return nil
   500  				},
   501  				func(start, end []byte, seqNum uint64) error {
   502  					fmt.Fprintf(&b, "%s-%s#%d,RANGEDEL\n", start, end, seqNum)
   503  					return nil
   504  				},
   505  				func(start, end []byte, keys []keyspan.Key) error {
   506  					s := keyspan.Span{Start: start, End: end, Keys: keys}
   507  					fmt.Fprintf(&b, "%s\n", s.String())
   508  					return nil
   509  				},
   510  				fileVisitor,
   511  			)
   512  			if err != nil {
   513  				return err.Error()
   514  			}
   515  			return b.String()
   516  		default:
   517  			return fmt.Sprintf("unknown command %q", td.Cmd)
   518  		}
   519  	})
   520  }
   521  
   522  func TestPointCollapsingIter(t *testing.T) {
   523  	var def string
   524  	datadriven.RunTest(t, "testdata/point_collapsing_iter", func(t *testing.T, d *datadriven.TestData) string {
   525  		switch d.Cmd {
   526  		case "define":
   527  			def = d.Input
   528  			return ""
   529  
   530  		case "iter":
   531  			f := &fakeIter{}
   532  			var spans []keyspan.Span
   533  			for _, line := range strings.Split(def, "\n") {
   534  				for _, key := range strings.Fields(line) {
   535  					j := strings.Index(key, ":")
   536  					k := base.ParseInternalKey(key[:j])
   537  					v := []byte(key[j+1:])
   538  					if k.Kind() == InternalKeyKindRangeDelete {
   539  						spans = append(spans, keyspan.Span{
   540  							Start:     k.UserKey,
   541  							End:       v,
   542  							Keys:      []keyspan.Key{{Trailer: k.Trailer}},
   543  							KeysOrder: 0,
   544  						})
   545  						continue
   546  					}
   547  					f.keys = append(f.keys, k)
   548  					f.vals = append(f.vals, v)
   549  				}
   550  			}
   551  
   552  			ksIter := keyspan.NewIter(base.DefaultComparer.Compare, spans)
   553  			pcIter := &pointCollapsingIterator{
   554  				comparer: base.DefaultComparer,
   555  				merge:    base.DefaultMerger.Merge,
   556  				seqNum:   math.MaxUint64,
   557  			}
   558  			pcIter.iter.Init(base.DefaultComparer, f, ksIter, keyspan.InterleavingIterOpts{})
   559  			defer pcIter.Close()
   560  			return itertest.RunInternalIterCmd(t, d, pcIter, itertest.Verbose)
   561  
   562  		default:
   563  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   564  		}
   565  	})
   566  }