github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/data_test.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"strconv"
    13  	"strings"
    14  
    15  	"github.com/cockroachdb/datadriven"
    16  	"github.com/cockroachdb/errors"
    17  	"github.com/cockroachdb/pebble/bloom"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/cockroachdb/pebble/internal/cache"
    20  	"github.com/cockroachdb/pebble/internal/keyspan"
    21  	"github.com/cockroachdb/pebble/internal/testkeys"
    22  	"github.com/cockroachdb/pebble/objstorage"
    23  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    24  	"github.com/cockroachdb/pebble/vfs"
    25  )
    26  
    27  func optsFromArgs(td *datadriven.TestData, writerOpts *WriterOptions) error {
    28  	for _, arg := range td.CmdArgs {
    29  		switch arg.Key {
    30  		case "leveldb":
    31  			if len(arg.Vals) != 0 {
    32  				return errors.Errorf("%s: arg %s expects 0 values", td.Cmd, arg.Key)
    33  			}
    34  			writerOpts.TableFormat = TableFormatLevelDB
    35  		case "block-size":
    36  			if len(arg.Vals) != 1 {
    37  				return errors.Errorf("%s: arg %s expects 1 value", td.Cmd, arg.Key)
    38  			}
    39  			var err error
    40  			writerOpts.BlockSize, err = strconv.Atoi(arg.Vals[0])
    41  			if err != nil {
    42  				return err
    43  			}
    44  		case "index-block-size":
    45  			if len(arg.Vals) != 1 {
    46  				return errors.Errorf("%s: arg %s expects 1 value", td.Cmd, arg.Key)
    47  			}
    48  			var err error
    49  			writerOpts.IndexBlockSize, err = strconv.Atoi(arg.Vals[0])
    50  			if err != nil {
    51  				return err
    52  			}
    53  		case "filter":
    54  			writerOpts.FilterPolicy = bloom.FilterPolicy(10)
    55  		case "comparer-split-4b-suffix":
    56  			writerOpts.Comparer = test4bSuffixComparer
    57  		case "writing-to-lowest-level":
    58  			writerOpts.WritingToLowestLevel = true
    59  		case "is-strict-obsolete":
    60  			writerOpts.IsStrictObsolete = true
    61  		}
    62  	}
    63  	return nil
    64  }
    65  
    66  func runBuildCmd(
    67  	td *datadriven.TestData, writerOpts *WriterOptions, cacheSize int,
    68  ) (*WriterMetadata, *Reader, error) {
    69  
    70  	f0 := &memFile{}
    71  	if err := optsFromArgs(td, writerOpts); err != nil {
    72  		return nil, nil, err
    73  	}
    74  
    75  	w := NewWriter(f0, *writerOpts)
    76  	var rangeDels []keyspan.Span
    77  	rangeDelFrag := keyspan.Fragmenter{
    78  		Cmp:    DefaultComparer.Compare,
    79  		Format: DefaultComparer.FormatKey,
    80  		Emit: func(s keyspan.Span) {
    81  			rangeDels = append(rangeDels, s)
    82  		},
    83  	}
    84  	var rangeKeys []keyspan.Span
    85  	rangeKeyFrag := keyspan.Fragmenter{
    86  		Cmp:    DefaultComparer.Compare,
    87  		Format: DefaultComparer.FormatKey,
    88  		Emit: func(s keyspan.Span) {
    89  			rangeKeys = append(rangeKeys, s)
    90  		},
    91  	}
    92  	for _, data := range strings.Split(td.Input, "\n") {
    93  		if strings.HasPrefix(data, "rangekey:") {
    94  			var err error
    95  			func() {
    96  				defer func() {
    97  					if r := recover(); r != nil {
    98  						err = errors.Errorf("%v", r)
    99  					}
   100  				}()
   101  				rangeKeyFrag.Add(keyspan.ParseSpan(strings.TrimPrefix(data, "rangekey:")))
   102  			}()
   103  			if err != nil {
   104  				return nil, nil, err
   105  			}
   106  			continue
   107  		}
   108  
   109  		forceObsolete := false
   110  		if strings.HasPrefix(data, "force-obsolete:") {
   111  			data = strings.TrimSpace(strings.TrimPrefix(data, "force-obsolete:"))
   112  			forceObsolete = true
   113  		}
   114  		j := strings.Index(data, ":")
   115  		key := base.ParseInternalKey(data[:j])
   116  		value := []byte(data[j+1:])
   117  		switch key.Kind() {
   118  		case InternalKeyKindRangeDelete:
   119  			if forceObsolete {
   120  				return nil, nil, errors.Errorf("force-obsolete is not allowed for RANGEDEL")
   121  			}
   122  			var err error
   123  			func() {
   124  				defer func() {
   125  					if r := recover(); r != nil {
   126  						err = errors.Errorf("%v", r)
   127  					}
   128  				}()
   129  				rangeDelFrag.Add(keyspan.Span{
   130  					Start: key.UserKey,
   131  					End:   value,
   132  					Keys:  []keyspan.Key{{Trailer: key.Trailer}},
   133  				})
   134  			}()
   135  			if err != nil {
   136  				return nil, nil, err
   137  			}
   138  		default:
   139  			if err := w.AddWithForceObsolete(key, value, forceObsolete); err != nil {
   140  				return nil, nil, err
   141  			}
   142  		}
   143  	}
   144  	rangeDelFrag.Finish()
   145  	for _, v := range rangeDels {
   146  		for _, k := range v.Keys {
   147  			ik := base.InternalKey{UserKey: v.Start, Trailer: k.Trailer}
   148  			if err := w.Add(ik, v.End); err != nil {
   149  				return nil, nil, err
   150  			}
   151  		}
   152  	}
   153  	rangeKeyFrag.Finish()
   154  	for _, s := range rangeKeys {
   155  		if err := w.addRangeKeySpan(s); err != nil {
   156  			return nil, nil, err
   157  		}
   158  	}
   159  	if err := w.Close(); err != nil {
   160  		return nil, nil, err
   161  	}
   162  	meta, err := w.Metadata()
   163  	if err != nil {
   164  		return nil, nil, err
   165  	}
   166  
   167  	readerOpts := ReaderOptions{Comparer: writerOpts.Comparer}
   168  	if writerOpts.FilterPolicy != nil {
   169  		readerOpts.Filters = map[string]FilterPolicy{
   170  			writerOpts.FilterPolicy.Name(): writerOpts.FilterPolicy,
   171  		}
   172  	}
   173  	if cacheSize > 0 {
   174  		readerOpts.Cache = cache.New(int64(cacheSize))
   175  		defer readerOpts.Cache.Unref()
   176  	}
   177  	r, err := NewMemReader(f0.Data(), readerOpts)
   178  	if err != nil {
   179  		return nil, nil, err
   180  	}
   181  	return meta, r, nil
   182  }
   183  
   184  func runBuildRawCmd(
   185  	td *datadriven.TestData, opts *WriterOptions,
   186  ) (*WriterMetadata, *Reader, error) {
   187  	mem := vfs.NewMem()
   188  	provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "" /* dirName */))
   189  	if err != nil {
   190  		return nil, nil, err
   191  	}
   192  	defer provider.Close()
   193  
   194  	f0, _, err := provider.Create(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.CreateOptions{})
   195  	if err != nil {
   196  		return nil, nil, err
   197  	}
   198  
   199  	w := NewWriter(f0, *opts)
   200  	for i := range td.CmdArgs {
   201  		arg := &td.CmdArgs[i]
   202  		if arg.Key == "range-del-v1" {
   203  			w.rangeDelV1Format = true
   204  			break
   205  		}
   206  	}
   207  
   208  	for _, data := range strings.Split(td.Input, "\n") {
   209  		if strings.HasPrefix(data, "rangekey:") {
   210  			data = strings.TrimPrefix(data, "rangekey:")
   211  			if err := w.addRangeKeySpan(keyspan.ParseSpan(data)); err != nil {
   212  				return nil, nil, err
   213  			}
   214  			continue
   215  		}
   216  
   217  		j := strings.Index(data, ":")
   218  		key := base.ParseInternalKey(data[:j])
   219  		value := []byte(data[j+1:])
   220  		switch key.Kind() {
   221  		case base.InternalKeyKindRangeKeyDelete,
   222  			base.InternalKeyKindRangeKeyUnset,
   223  			base.InternalKeyKindRangeKeySet:
   224  			if err := w.AddRangeKey(key, value); err != nil {
   225  				return nil, nil, err
   226  			}
   227  		default:
   228  			if err := w.Add(key, value); err != nil {
   229  				return nil, nil, err
   230  			}
   231  		}
   232  	}
   233  	if err := w.Close(); err != nil {
   234  		return nil, nil, err
   235  	}
   236  	meta, err := w.Metadata()
   237  	if err != nil {
   238  		return nil, nil, err
   239  	}
   240  
   241  	f1, err := provider.OpenForReading(context.Background(), base.FileTypeTable, base.FileNum(0).DiskFileNum(), objstorage.OpenOptions{})
   242  	if err != nil {
   243  		return nil, nil, err
   244  	}
   245  	r, err := NewReader(f1, ReaderOptions{})
   246  	if err != nil {
   247  		return nil, nil, err
   248  	}
   249  	return meta, r, nil
   250  }
   251  
   252  func scanGlobalSeqNum(td *datadriven.TestData) (uint64, error) {
   253  	for _, arg := range td.CmdArgs {
   254  		switch arg.Key {
   255  		case "globalSeqNum":
   256  			if len(arg.Vals) != 1 {
   257  				return 0, errors.Errorf("%s: arg %s expects 1 value", td.Cmd, arg.Key)
   258  			}
   259  			v, err := strconv.Atoi(arg.Vals[0])
   260  			if err != nil {
   261  				return 0, err
   262  			}
   263  			return uint64(v), nil
   264  		}
   265  	}
   266  	return 0, nil
   267  }
   268  
   269  type runIterCmdOption func(*runIterCmdOptions)
   270  
   271  type runIterCmdOptions struct {
   272  	everyOp      func(io.Writer)
   273  	everyOpAfter func(io.Writer)
   274  	stats        *base.InternalIteratorStats
   275  }
   276  
   277  func runIterCmdEveryOp(everyOp func(io.Writer)) runIterCmdOption {
   278  	return func(opts *runIterCmdOptions) { opts.everyOp = everyOp }
   279  }
   280  
   281  func runIterCmdEveryOpAfter(everyOp func(io.Writer)) runIterCmdOption {
   282  	return func(opts *runIterCmdOptions) { opts.everyOpAfter = everyOp }
   283  }
   284  
   285  func runIterCmdStats(stats *base.InternalIteratorStats) runIterCmdOption {
   286  	return func(opts *runIterCmdOptions) { opts.stats = stats }
   287  }
   288  
   289  func runIterCmd(
   290  	td *datadriven.TestData, origIter Iterator, printValue bool, opt ...runIterCmdOption,
   291  ) string {
   292  	var opts runIterCmdOptions
   293  	for _, o := range opt {
   294  		o(&opts)
   295  	}
   296  
   297  	iter := newIterAdapter(origIter)
   298  	defer iter.Close()
   299  
   300  	var b bytes.Buffer
   301  	var prefix []byte
   302  	for _, line := range strings.Split(td.Input, "\n") {
   303  		parts := strings.Fields(line)
   304  		if len(parts) == 0 {
   305  			continue
   306  		}
   307  		switch parts[0] {
   308  		case "seek-ge":
   309  			if len(parts) < 2 || len(parts) > 3 {
   310  				return "seek-ge <key> [<try-seek-using-next]\n"
   311  			}
   312  			prefix = nil
   313  			var flags base.SeekGEFlags
   314  			if len(parts) == 3 {
   315  				if trySeekUsingNext, err := strconv.ParseBool(parts[2]); err != nil {
   316  					return err.Error()
   317  				} else if trySeekUsingNext {
   318  					flags = flags.EnableTrySeekUsingNext()
   319  				}
   320  			}
   321  			iter.SeekGE([]byte(strings.TrimSpace(parts[1])), flags)
   322  		case "seek-prefix-ge":
   323  			if len(parts) != 2 && len(parts) != 3 {
   324  				return "seek-prefix-ge <key> [<try-seek-using-next>]\n"
   325  			}
   326  			prefix = []byte(strings.TrimSpace(parts[1]))
   327  			var flags base.SeekGEFlags
   328  			if len(parts) == 3 {
   329  				if trySeekUsingNext, err := strconv.ParseBool(parts[2]); err != nil {
   330  					return err.Error()
   331  				} else if trySeekUsingNext {
   332  					flags = flags.EnableTrySeekUsingNext()
   333  				}
   334  			}
   335  			iter.SeekPrefixGE(prefix, prefix /* key */, flags)
   336  		case "seek-lt":
   337  			if len(parts) != 2 {
   338  				return "seek-lt <key>\n"
   339  			}
   340  			prefix = nil
   341  			iter.SeekLT([]byte(strings.TrimSpace(parts[1])), base.SeekLTFlagsNone)
   342  		case "first":
   343  			prefix = nil
   344  			iter.First()
   345  		case "last":
   346  			prefix = nil
   347  			iter.Last()
   348  		case "next":
   349  			iter.Next()
   350  		case "next-ignore-result":
   351  			iter.NextIgnoreResult()
   352  		case "prev":
   353  			iter.Prev()
   354  		case "next-prefix":
   355  			if len(parts) != 1 {
   356  				return "next-prefix should have no parameter\n"
   357  			}
   358  			if iter.Key() == nil {
   359  				return "next-prefix cannot be called on exhauster iterator\n"
   360  			}
   361  			k := iter.Key().UserKey
   362  			prefixLen := testkeys.Comparer.Split(k)
   363  			k = k[:prefixLen]
   364  			kSucc := testkeys.Comparer.ImmediateSuccessor(nil, k)
   365  			iter.NextPrefix(kSucc)
   366  		case "set-bounds":
   367  			if len(parts) <= 1 || len(parts) > 3 {
   368  				return "set-bounds lower=<lower> upper=<upper>\n"
   369  			}
   370  			var lower []byte
   371  			var upper []byte
   372  			for _, part := range parts[1:] {
   373  				arg := strings.Split(strings.TrimSpace(part), "=")
   374  				switch arg[0] {
   375  				case "lower":
   376  					lower = []byte(arg[1])
   377  					if len(lower) == 0 {
   378  						lower = nil
   379  					}
   380  				case "upper":
   381  					upper = []byte(arg[1])
   382  					if len(upper) == 0 {
   383  						upper = nil
   384  					}
   385  				default:
   386  					return fmt.Sprintf("set-bounds: unknown arg: %s", arg)
   387  				}
   388  			}
   389  			iter.SetBounds(lower, upper)
   390  		case "stats":
   391  			// The timing is non-deterministic, so set to 0.
   392  			opts.stats.BlockReadDuration = 0
   393  			fmt.Fprintf(&b, "%+v\n", *opts.stats)
   394  			continue
   395  		case "reset-stats":
   396  			*opts.stats = base.InternalIteratorStats{}
   397  			continue
   398  		case "internal-iter-state":
   399  			fmt.Fprintf(&b, "| %T:\n", origIter)
   400  			si, _ := origIter.(*singleLevelIterator)
   401  			if twoLevelIter, ok := origIter.(*twoLevelIterator); ok {
   402  				si = &twoLevelIter.singleLevelIterator
   403  				if twoLevelIter.topLevelIndex.valid() {
   404  					fmt.Fprintf(&b, "|  topLevelIndex.Key() = %q\n", twoLevelIter.topLevelIndex.Key())
   405  					v := twoLevelIter.topLevelIndex.value()
   406  					bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
   407  					if err != nil {
   408  						fmt.Fprintf(&b, "|  topLevelIndex.InPlaceValue() failed to decode as BHP: %s\n", err)
   409  					} else {
   410  						fmt.Fprintf(&b, "|  topLevelIndex.InPlaceValue() = (Offset: %d, Length: %d, Props: %x)\n",
   411  							bhp.Offset, bhp.Length, bhp.Props)
   412  					}
   413  				} else {
   414  					fmt.Fprintf(&b, "|  topLevelIndex iter invalid\n")
   415  				}
   416  				fmt.Fprintf(&b, "|  topLevelIndex.isDataInvalidated()=%t\n", twoLevelIter.topLevelIndex.isDataInvalidated())
   417  			}
   418  			if si.index.valid() {
   419  				fmt.Fprintf(&b, "|  index.Key() = %q\n", si.index.Key())
   420  				v := si.index.value()
   421  				bhp, err := decodeBlockHandleWithProperties(v.InPlaceValue())
   422  				if err != nil {
   423  					fmt.Fprintf(&b, "|  index.InPlaceValue() failed to decode as BHP: %s\n", err)
   424  				} else {
   425  					fmt.Fprintf(&b, "|  index.InPlaceValue() = (Offset: %d, Length: %d, Props: %x)\n",
   426  						bhp.Offset, bhp.Length, bhp.Props)
   427  				}
   428  			} else {
   429  				fmt.Fprintf(&b, "|  index iter invalid\n")
   430  			}
   431  			fmt.Fprintf(&b, "|  index.isDataInvalidated()=%t\n", si.index.isDataInvalidated())
   432  			fmt.Fprintf(&b, "|  data.isDataInvalidated()=%t\n", si.data.isDataInvalidated())
   433  			fmt.Fprintf(&b, "|  hideObsoletePoints = %t\n", si.hideObsoletePoints)
   434  			fmt.Fprintf(&b, "|  dataBH = (Offset: %d, Length: %d)\n", si.dataBH.Offset, si.dataBH.Length)
   435  			fmt.Fprintf(&b, "|  (boundsCmp,positionedUsingLatestBounds) = (%d,%t)\n", si.boundsCmp, si.positionedUsingLatestBounds)
   436  			fmt.Fprintf(&b, "|  exhaustedBounds = %d\n", si.exhaustedBounds)
   437  
   438  			continue
   439  		}
   440  		if opts.everyOp != nil {
   441  			opts.everyOp(&b)
   442  		}
   443  		if iter.Valid() && checkValidPrefix(prefix, iter.Key().UserKey) {
   444  			fmt.Fprintf(&b, "<%s:%d>", iter.Key().UserKey, iter.Key().SeqNum())
   445  			if printValue {
   446  				fmt.Fprintf(&b, ":%s", string(iter.Value()))
   447  			}
   448  		} else if err := iter.Error(); err != nil {
   449  			fmt.Fprintf(&b, "<err=%v>", err)
   450  		} else {
   451  			fmt.Fprintf(&b, ".")
   452  		}
   453  		if opts.everyOpAfter != nil {
   454  			opts.everyOpAfter(&b)
   455  		}
   456  		b.WriteString("\n")
   457  	}
   458  	return b.String()
   459  }
   460  
   461  func runRewriteCmd(
   462  	td *datadriven.TestData, r *Reader, writerOpts WriterOptions,
   463  ) (*WriterMetadata, *Reader, error) {
   464  	var from, to []byte
   465  	for _, arg := range td.CmdArgs {
   466  		switch arg.Key {
   467  		case "from":
   468  			from = []byte(arg.Vals[0])
   469  		case "to":
   470  			to = []byte(arg.Vals[0])
   471  		}
   472  	}
   473  	if from == nil || to == nil {
   474  		return nil, r, errors.New("missing from/to")
   475  	}
   476  
   477  	opts := writerOpts
   478  	if err := optsFromArgs(td, &opts); err != nil {
   479  		return nil, r, err
   480  	}
   481  
   482  	f := &memFile{}
   483  	meta, _, err := rewriteKeySuffixesInBlocks(r, f, opts, from, to, 2)
   484  	if err != nil {
   485  		return nil, r, errors.Wrap(err, "rewrite failed")
   486  	}
   487  	readerOpts := ReaderOptions{Comparer: opts.Comparer}
   488  	if opts.FilterPolicy != nil {
   489  		readerOpts.Filters = map[string]FilterPolicy{
   490  			opts.FilterPolicy.Name(): opts.FilterPolicy,
   491  		}
   492  	}
   493  	r.Close()
   494  
   495  	r, err = NewMemReader(f.Data(), readerOpts)
   496  	if err != nil {
   497  		return nil, nil, err
   498  	}
   499  	return meta, r, nil
   500  }