github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/compaction_iter_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"slices"
    13  	"strconv"
    14  	"strings"
    15  	"testing"
    16  
    17  	"github.com/cockroachdb/datadriven"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/cockroachdb/pebble/internal/invalidating"
    20  	"github.com/cockroachdb/pebble/internal/keyspan"
    21  	"github.com/cockroachdb/pebble/internal/rangekey"
    22  	"github.com/cockroachdb/pebble/internal/testkeys"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestSnapshotIndex(t *testing.T) {
    27  	testCases := []struct {
    28  		snapshots      []uint64
    29  		seq            uint64
    30  		expectedIndex  int
    31  		expectedSeqNum uint64
    32  	}{
    33  		{[]uint64{}, 1, 0, InternalKeySeqNumMax},
    34  		{[]uint64{1}, 0, 0, 1},
    35  		{[]uint64{1}, 1, 1, InternalKeySeqNumMax},
    36  		{[]uint64{1}, 2, 1, InternalKeySeqNumMax},
    37  		{[]uint64{1, 3}, 1, 1, 3},
    38  		{[]uint64{1, 3}, 2, 1, 3},
    39  		{[]uint64{1, 3}, 3, 2, InternalKeySeqNumMax},
    40  		{[]uint64{1, 3}, 4, 2, InternalKeySeqNumMax},
    41  		{[]uint64{1, 3, 3}, 2, 1, 3},
    42  	}
    43  	for _, c := range testCases {
    44  		t.Run("", func(t *testing.T) {
    45  			idx, seqNum := snapshotIndex(c.seq, c.snapshots)
    46  			if c.expectedIndex != idx {
    47  				t.Fatalf("expected %d, but got %d", c.expectedIndex, idx)
    48  			}
    49  			if c.expectedSeqNum != seqNum {
    50  				t.Fatalf("expected %d, but got %d", c.expectedSeqNum, seqNum)
    51  			}
    52  		})
    53  	}
    54  }
    55  
    56  type debugMerger struct {
    57  	buf []byte
    58  }
    59  
    60  func (m *debugMerger) MergeNewer(value []byte) error {
    61  	m.buf = append(m.buf, value...)
    62  	return nil
    63  }
    64  
    65  func (m *debugMerger) MergeOlder(value []byte) error {
    66  	buf := make([]byte, 0, len(m.buf)+len(value))
    67  	buf = append(buf, value...)
    68  	buf = append(buf, m.buf...)
    69  	m.buf = buf
    70  	return nil
    71  }
    72  
    73  func (m *debugMerger) Finish(includesBase bool) ([]byte, io.Closer, error) {
    74  	if includesBase {
    75  		m.buf = append(m.buf, []byte("[base]")...)
    76  	}
    77  	return m.buf, nil, nil
    78  }
    79  
    80  func TestCompactionIter(t *testing.T) {
    81  	var merge Merge
    82  	var keys []InternalKey
    83  	var rangeKeys []keyspan.Span
    84  	var vals [][]byte
    85  	var snapshots []uint64
    86  	var elideTombstones bool
    87  	var allowZeroSeqnum bool
    88  	var interleavingIter *keyspan.InterleavingIter
    89  
    90  	// The input to the data-driven test is dependent on the format major
    91  	// version we are testing against.
    92  	fileFunc := func(formatVersion FormatMajorVersion) string {
    93  		if formatVersion < FormatSetWithDelete {
    94  			return "testdata/compaction_iter"
    95  		}
    96  		if formatVersion < FormatDeleteSizedAndObsolete {
    97  			return "testdata/compaction_iter_set_with_del"
    98  		}
    99  		return "testdata/compaction_iter_delete_sized"
   100  	}
   101  
   102  	var ineffectualSingleDeleteKeys []string
   103  	var invariantViolationSingleDeleteKeys []string
   104  	resetSingleDelStats := func() {
   105  		ineffectualSingleDeleteKeys = ineffectualSingleDeleteKeys[:0]
   106  		invariantViolationSingleDeleteKeys = invariantViolationSingleDeleteKeys[:0]
   107  	}
   108  	newIter := func(formatVersion FormatMajorVersion) *compactionIter {
   109  		// To adhere to the existing assumption that range deletion blocks in
   110  		// SSTables are not released while iterating, and therefore not
   111  		// susceptible to use-after-free bugs, we skip the zeroing of
   112  		// RangeDelete keys.
   113  		fi := &fakeIter{keys: keys, vals: vals}
   114  		interleavingIter = &keyspan.InterleavingIter{}
   115  		interleavingIter.Init(
   116  			base.DefaultComparer,
   117  			fi,
   118  			keyspan.NewIter(base.DefaultComparer.Compare, rangeKeys),
   119  			keyspan.InterleavingIterOpts{})
   120  		iter := invalidating.NewIter(interleavingIter, invalidating.IgnoreKinds(InternalKeyKindRangeDelete))
   121  		if merge == nil {
   122  			merge = func(key, value []byte) (base.ValueMerger, error) {
   123  				m := &debugMerger{}
   124  				m.buf = append(m.buf, value...)
   125  				return m, nil
   126  			}
   127  		}
   128  		resetSingleDelStats()
   129  		return newCompactionIter(
   130  			DefaultComparer.Compare,
   131  			DefaultComparer.Equal,
   132  			DefaultComparer.FormatKey,
   133  			merge,
   134  			iter,
   135  			snapshots,
   136  			&keyspan.Fragmenter{},
   137  			&keyspan.Fragmenter{},
   138  			allowZeroSeqnum,
   139  			func([]byte) bool {
   140  				return elideTombstones
   141  			},
   142  			func(_, _ []byte) bool {
   143  				return elideTombstones
   144  			},
   145  			func(userKey []byte) {
   146  				ineffectualSingleDeleteKeys = append(ineffectualSingleDeleteKeys, string(userKey))
   147  			},
   148  			func(userKey []byte) {
   149  				invariantViolationSingleDeleteKeys = append(invariantViolationSingleDeleteKeys, string(userKey))
   150  			},
   151  			formatVersion,
   152  		)
   153  	}
   154  
   155  	runTest := func(t *testing.T, formatVersion FormatMajorVersion) {
   156  		datadriven.RunTest(t, fileFunc(formatVersion), func(t *testing.T, d *datadriven.TestData) string {
   157  			switch d.Cmd {
   158  			case "define":
   159  				merge = nil
   160  				if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "merger" &&
   161  					len(d.CmdArgs[0].Vals) > 0 && d.CmdArgs[0].Vals[0] == "deletable" {
   162  					merge = newDeletableSumValueMerger
   163  				}
   164  				keys = keys[:0]
   165  				vals = vals[:0]
   166  				rangeKeys = rangeKeys[:0]
   167  				for _, key := range strings.Split(d.Input, "\n") {
   168  					j := strings.Index(key, ":")
   169  					keys = append(keys, base.ParseInternalKey(key[:j]))
   170  
   171  					if strings.HasPrefix(key[j+1:], "varint(") {
   172  						valueStr := strings.TrimSuffix(strings.TrimPrefix(key[j+1:], "varint("), ")")
   173  						v, err := strconv.ParseUint(valueStr, 10, 64)
   174  						require.NoError(t, err)
   175  						encodedValue := binary.AppendUvarint([]byte(nil), v)
   176  						vals = append(vals, encodedValue)
   177  					} else {
   178  						vals = append(vals, []byte(key[j+1:]))
   179  					}
   180  				}
   181  				return ""
   182  
   183  			case "define-range-keys":
   184  				for _, key := range strings.Split(d.Input, "\n") {
   185  					s := keyspan.ParseSpan(strings.TrimSpace(key))
   186  					rangeKeys = append(rangeKeys, s)
   187  				}
   188  				return ""
   189  
   190  			case "iter":
   191  				snapshots = snapshots[:0]
   192  				elideTombstones = false
   193  				allowZeroSeqnum = false
   194  				printSnapshotPinned := false
   195  				printMissizedDels := false
   196  				printForceObsolete := false
   197  				for _, arg := range d.CmdArgs {
   198  					switch arg.Key {
   199  					case "snapshots":
   200  						for _, val := range arg.Vals {
   201  							seqNum, err := strconv.Atoi(val)
   202  							if err != nil {
   203  								return err.Error()
   204  							}
   205  							snapshots = append(snapshots, uint64(seqNum))
   206  						}
   207  					case "elide-tombstones":
   208  						var err error
   209  						elideTombstones, err = strconv.ParseBool(arg.Vals[0])
   210  						if err != nil {
   211  							return err.Error()
   212  						}
   213  					case "allow-zero-seqnum":
   214  						var err error
   215  						allowZeroSeqnum, err = strconv.ParseBool(arg.Vals[0])
   216  						if err != nil {
   217  							return err.Error()
   218  						}
   219  					case "print-snapshot-pinned":
   220  						printSnapshotPinned = true
   221  					case "print-missized-dels":
   222  						printMissizedDels = true
   223  					case "print-force-obsolete":
   224  						printForceObsolete = true
   225  					default:
   226  						return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key)
   227  					}
   228  				}
   229  				slices.Sort(snapshots)
   230  
   231  				iter := newIter(formatVersion)
   232  				var b bytes.Buffer
   233  				for _, line := range strings.Split(d.Input, "\n") {
   234  					parts := strings.Fields(line)
   235  					if len(parts) == 0 {
   236  						continue
   237  					}
   238  					switch parts[0] {
   239  					case "first":
   240  						iter.First()
   241  					case "next":
   242  						iter.Next()
   243  					case "tombstones":
   244  						var key []byte
   245  						if len(parts) == 2 {
   246  							key = []byte(parts[1])
   247  						}
   248  						for _, v := range iter.Tombstones(key) {
   249  							for _, k := range v.Keys {
   250  								fmt.Fprintf(&b, "%s-%s#%d\n", v.Start, v.End, k.SeqNum())
   251  							}
   252  						}
   253  						fmt.Fprintf(&b, ".\n")
   254  						continue
   255  					case "range-keys":
   256  						var key []byte
   257  						if len(parts) == 2 {
   258  							key = []byte(parts[1])
   259  						}
   260  						for _, v := range iter.RangeKeys(key) {
   261  							fmt.Fprintf(&b, "%s\n", v)
   262  						}
   263  						fmt.Fprintf(&b, ".\n")
   264  						continue
   265  					default:
   266  						return fmt.Sprintf("unknown op: %s", parts[0])
   267  					}
   268  					if iter.Valid() {
   269  						snapshotPinned := ""
   270  						if printSnapshotPinned {
   271  							snapshotPinned = " (not pinned)"
   272  							if iter.snapshotPinned {
   273  								snapshotPinned = " (pinned)"
   274  							}
   275  						}
   276  						forceObsolete := ""
   277  						if printForceObsolete {
   278  							forceObsolete = " (not force obsolete)"
   279  							if iter.forceObsoleteDueToRangeDel {
   280  								forceObsolete = " (force obsolete)"
   281  							}
   282  						}
   283  						v := string(iter.Value())
   284  						if iter.Key().Kind() == base.InternalKeyKindDeleteSized && len(iter.Value()) > 0 {
   285  							vn, n := binary.Uvarint(iter.Value())
   286  							if n != len(iter.Value()) {
   287  								v = fmt.Sprintf("err: %0x value not a uvarint", iter.Value())
   288  							} else {
   289  								v = fmt.Sprintf("varint(%d)", vn)
   290  							}
   291  						}
   292  						fmt.Fprintf(&b, "%s:%s%s%s\n", iter.Key(), v, snapshotPinned, forceObsolete)
   293  						if iter.Key().Kind() == InternalKeyKindRangeDelete {
   294  							iter.rangeDelFrag.Add(keyspan.Span{
   295  								Start: append([]byte{}, iter.Key().UserKey...),
   296  								End:   append([]byte{}, iter.Value()...),
   297  								Keys: []keyspan.Key{
   298  									{Trailer: iter.Key().Trailer},
   299  								},
   300  							})
   301  						}
   302  						if rangekey.IsRangeKey(iter.Key().Kind()) {
   303  							iter.rangeKeyFrag.Add(*interleavingIter.Span())
   304  						}
   305  					} else if err := iter.Error(); err != nil {
   306  						fmt.Fprintf(&b, "err=%v\n", err)
   307  					} else {
   308  						fmt.Fprintf(&b, ".\n")
   309  					}
   310  				}
   311  				if printMissizedDels {
   312  					fmt.Fprintf(&b, "missized-dels=%d\n", iter.stats.countMissizedDels)
   313  				}
   314  				if len(ineffectualSingleDeleteKeys) > 0 {
   315  					fmt.Fprintf(&b, "ineffectual-single-deletes: %s\n",
   316  						strings.Join(ineffectualSingleDeleteKeys, ","))
   317  				}
   318  				if len(invariantViolationSingleDeleteKeys) > 0 {
   319  					fmt.Fprintf(&b, "invariant-violation-single-deletes: %s\n",
   320  						strings.Join(invariantViolationSingleDeleteKeys, ","))
   321  				}
   322  				return b.String()
   323  
   324  			default:
   325  				return fmt.Sprintf("unknown command: %s", d.Cmd)
   326  			}
   327  		})
   328  	}
   329  
   330  	// Rather than testing against all format version, we test against the
   331  	// significant boundaries.
   332  	formatVersions := []FormatMajorVersion{
   333  		FormatMostCompatible,
   334  		FormatSetWithDelete - 1,
   335  		FormatSetWithDelete,
   336  		internalFormatNewest,
   337  	}
   338  	for _, formatVersion := range formatVersions {
   339  		t.Run(fmt.Sprintf("version-%s", formatVersion), func(t *testing.T) {
   340  			runTest(t, formatVersion)
   341  		})
   342  	}
   343  }
   344  
   345  func TestFrontiers(t *testing.T) {
   346  	cmp := testkeys.Comparer.Compare
   347  	var keySets [][][]byte
   348  	datadriven.RunTest(t, "testdata/frontiers", func(t *testing.T, td *datadriven.TestData) string {
   349  		switch td.Cmd {
   350  		case "init":
   351  			// Init configures a frontier per line of input. Each line should
   352  			// contain a sorted whitespace-separated list of keys that the
   353  			// frontier will use.
   354  			//
   355  			// For example, the following input creates two separate monitored
   356  			// frontiers: one that sets its key successively to 'd', 'e', 'j'
   357  			// and one that sets its key to 'a', 'p', 'n', 'z':
   358  			//
   359  			//    init
   360  			//    b e j
   361  			//    a p n z
   362  
   363  			keySets = keySets[:0]
   364  			for _, line := range strings.Split(td.Input, "\n") {
   365  				keySets = append(keySets, bytes.Fields([]byte(line)))
   366  			}
   367  			return ""
   368  		case "scan":
   369  			f := &frontiers{cmp: cmp}
   370  			for _, keys := range keySets {
   371  				initTestFrontier(f, keys...)
   372  			}
   373  			var buf bytes.Buffer
   374  			for _, kStr := range strings.Fields(td.Input) {
   375  				k := []byte(kStr)
   376  				f.Advance(k)
   377  				fmt.Fprintf(&buf, "%s : { %s }\n", kStr, f.String())
   378  			}
   379  			return buf.String()
   380  		default:
   381  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   382  		}
   383  	})
   384  }
   385  
   386  // initTestFrontiers adds a new frontier to f that iterates through the provided
   387  // keys. The keys slice must be sorted.
   388  func initTestFrontier(f *frontiers, keys ...[]byte) *frontier {
   389  	ff := &frontier{}
   390  	var key []byte
   391  	if len(keys) > 0 {
   392  		key, keys = keys[0], keys[1:]
   393  	}
   394  	reached := func(k []byte) (nextKey []byte) {
   395  		if len(keys) > 0 {
   396  			nextKey, keys = keys[0], keys[1:]
   397  		}
   398  		return nextKey
   399  	}
   400  	ff.Init(f, key, reached)
   401  	return ff
   402  }