github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/compaction_iter_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"testing"
    16  
    17  	"github.com/cockroachdb/datadriven"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/cockroachdb/pebble/internal/invalidating"
    20  	"github.com/cockroachdb/pebble/internal/keyspan"
    21  	"github.com/cockroachdb/pebble/internal/rangekey"
    22  	"github.com/cockroachdb/pebble/internal/testkeys"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestSnapshotIndex(t *testing.T) {
    27  	testCases := []struct {
    28  		snapshots      []uint64
    29  		seq            uint64
    30  		expectedIndex  int
    31  		expectedSeqNum uint64
    32  	}{
    33  		{[]uint64{}, 1, 0, InternalKeySeqNumMax},
    34  		{[]uint64{1}, 0, 0, 1},
    35  		{[]uint64{1}, 1, 1, InternalKeySeqNumMax},
    36  		{[]uint64{1}, 2, 1, InternalKeySeqNumMax},
    37  		{[]uint64{1, 3}, 1, 1, 3},
    38  		{[]uint64{1, 3}, 2, 1, 3},
    39  		{[]uint64{1, 3}, 3, 2, InternalKeySeqNumMax},
    40  		{[]uint64{1, 3}, 4, 2, InternalKeySeqNumMax},
    41  		{[]uint64{1, 3, 3}, 2, 1, 3},
    42  	}
    43  	for _, c := range testCases {
    44  		t.Run("", func(t *testing.T) {
    45  			idx, seqNum := snapshotIndex(c.seq, c.snapshots)
    46  			if c.expectedIndex != idx {
    47  				t.Fatalf("expected %d, but got %d", c.expectedIndex, idx)
    48  			}
    49  			if c.expectedSeqNum != seqNum {
    50  				t.Fatalf("expected %d, but got %d", c.expectedSeqNum, seqNum)
    51  			}
    52  		})
    53  	}
    54  }
    55  
    56  type debugMerger struct {
    57  	buf []byte
    58  }
    59  
    60  func (m *debugMerger) MergeNewer(value []byte) error {
    61  	m.buf = append(m.buf, value...)
    62  	return nil
    63  }
    64  
    65  func (m *debugMerger) MergeOlder(value []byte) error {
    66  	buf := make([]byte, 0, len(m.buf)+len(value))
    67  	buf = append(buf, value...)
    68  	buf = append(buf, m.buf...)
    69  	m.buf = buf
    70  	return nil
    71  }
    72  
    73  func (m *debugMerger) Finish(includesBase bool) ([]byte, io.Closer, error) {
    74  	if includesBase {
    75  		m.buf = append(m.buf, []byte("[base]")...)
    76  	}
    77  	return m.buf, nil, nil
    78  }
    79  
    80  func TestCompactionIter(t *testing.T) {
    81  	var merge Merge
    82  	var keys []InternalKey
    83  	var rangeKeys []keyspan.Span
    84  	var vals [][]byte
    85  	var snapshots []uint64
    86  	var elideTombstones bool
    87  	var allowZeroSeqnum bool
    88  	var interleavingIter *keyspan.InterleavingIter
    89  
    90  	// The input to the data-driven test is dependent on the format major
    91  	// version we are testing against.
    92  	fileFunc := func(formatVersion FormatMajorVersion) string {
    93  		if formatVersion < FormatSetWithDelete {
    94  			return "testdata/compaction_iter"
    95  		}
    96  		if formatVersion < FormatDeleteSizedAndObsolete {
    97  			return "testdata/compaction_iter_set_with_del"
    98  		}
    99  		return "testdata/compaction_iter_delete_sized"
   100  	}
   101  
   102  	var ineffectualSingleDeleteKeys []string
   103  	var invariantViolationSingleDeleteKeys []string
   104  	resetSingleDelStats := func() {
   105  		ineffectualSingleDeleteKeys = ineffectualSingleDeleteKeys[:0]
   106  		invariantViolationSingleDeleteKeys = invariantViolationSingleDeleteKeys[:0]
   107  	}
   108  	newIter := func(formatVersion FormatMajorVersion) *compactionIter {
   109  		// To adhere to the existing assumption that range deletion blocks in
   110  		// SSTables are not released while iterating, and therefore not
   111  		// susceptible to use-after-free bugs, we skip the zeroing of
   112  		// RangeDelete keys.
   113  		fi := &fakeIter{keys: keys, vals: vals}
   114  		interleavingIter = &keyspan.InterleavingIter{}
   115  		interleavingIter.Init(
   116  			base.DefaultComparer,
   117  			fi,
   118  			keyspan.NewIter(base.DefaultComparer.Compare, rangeKeys),
   119  			keyspan.InterleavingIterOpts{})
   120  		iter := invalidating.NewIter(interleavingIter, invalidating.IgnoreKinds(InternalKeyKindRangeDelete))
   121  		if merge == nil {
   122  			merge = func(key, value []byte) (base.ValueMerger, error) {
   123  				m := &debugMerger{}
   124  				m.buf = append(m.buf, value...)
   125  				return m, nil
   126  			}
   127  		}
   128  		resetSingleDelStats()
   129  		return newCompactionIter(
   130  			DefaultComparer.Compare,
   131  			DefaultComparer.Equal,
   132  			DefaultComparer.FormatKey,
   133  			merge,
   134  			iter,
   135  			snapshots,
   136  			&keyspan.Fragmenter{},
   137  			&keyspan.Fragmenter{},
   138  			allowZeroSeqnum,
   139  			func([]byte) bool {
   140  				return elideTombstones
   141  			},
   142  			func(_, _ []byte) bool {
   143  				return elideTombstones
   144  			},
   145  			func(userKey []byte) {
   146  				ineffectualSingleDeleteKeys = append(ineffectualSingleDeleteKeys, string(userKey))
   147  			},
   148  			func(userKey []byte) {
   149  				invariantViolationSingleDeleteKeys = append(invariantViolationSingleDeleteKeys, string(userKey))
   150  			},
   151  			formatVersion,
   152  		)
   153  	}
   154  
   155  	runTest := func(t *testing.T, formatVersion FormatMajorVersion) {
   156  		datadriven.RunTest(t, fileFunc(formatVersion), func(t *testing.T, d *datadriven.TestData) string {
   157  			switch d.Cmd {
   158  			case "define":
   159  				merge = nil
   160  				if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "merger" &&
   161  					len(d.CmdArgs[0].Vals) > 0 && d.CmdArgs[0].Vals[0] == "deletable" {
   162  					merge = newDeletableSumValueMerger
   163  				}
   164  				keys = keys[:0]
   165  				vals = vals[:0]
   166  				rangeKeys = rangeKeys[:0]
   167  				for _, key := range strings.Split(d.Input, "\n") {
   168  					j := strings.Index(key, ":")
   169  					keys = append(keys, base.ParseInternalKey(key[:j]))
   170  
   171  					if strings.HasPrefix(key[j+1:], "varint(") {
   172  						valueStr := strings.TrimSuffix(strings.TrimPrefix(key[j+1:], "varint("), ")")
   173  						v, err := strconv.ParseUint(valueStr, 10, 64)
   174  						require.NoError(t, err)
   175  						encodedValue := binary.AppendUvarint([]byte(nil), v)
   176  						vals = append(vals, encodedValue)
   177  					} else {
   178  						vals = append(vals, []byte(key[j+1:]))
   179  					}
   180  				}
   181  				return ""
   182  
   183  			case "define-range-keys":
   184  				for _, key := range strings.Split(d.Input, "\n") {
   185  					s := keyspan.ParseSpan(strings.TrimSpace(key))
   186  					rangeKeys = append(rangeKeys, s)
   187  				}
   188  				return ""
   189  
   190  			case "iter":
   191  				snapshots = snapshots[:0]
   192  				elideTombstones = false
   193  				allowZeroSeqnum = false
   194  				printSnapshotPinned := false
   195  				printMissizedDels := false
   196  				printForceObsolete := false
   197  				for _, arg := range d.CmdArgs {
   198  					switch arg.Key {
   199  					case "snapshots":
   200  						for _, val := range arg.Vals {
   201  							seqNum, err := strconv.Atoi(val)
   202  							if err != nil {
   203  								return err.Error()
   204  							}
   205  							snapshots = append(snapshots, uint64(seqNum))
   206  						}
   207  					case "elide-tombstones":
   208  						var err error
   209  						elideTombstones, err = strconv.ParseBool(arg.Vals[0])
   210  						if err != nil {
   211  							return err.Error()
   212  						}
   213  					case "allow-zero-seqnum":
   214  						var err error
   215  						allowZeroSeqnum, err = strconv.ParseBool(arg.Vals[0])
   216  						if err != nil {
   217  							return err.Error()
   218  						}
   219  					case "print-snapshot-pinned":
   220  						printSnapshotPinned = true
   221  					case "print-missized-dels":
   222  						printMissizedDels = true
   223  					case "print-force-obsolete":
   224  						printForceObsolete = true
   225  					default:
   226  						return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key)
   227  					}
   228  				}
   229  				sort.Slice(snapshots, func(i, j int) bool {
   230  					return snapshots[i] < snapshots[j]
   231  				})
   232  
   233  				iter := newIter(formatVersion)
   234  				var b bytes.Buffer
   235  				for _, line := range strings.Split(d.Input, "\n") {
   236  					parts := strings.Fields(line)
   237  					if len(parts) == 0 {
   238  						continue
   239  					}
   240  					switch parts[0] {
   241  					case "first":
   242  						iter.First()
   243  					case "next":
   244  						iter.Next()
   245  					case "tombstones":
   246  						var key []byte
   247  						if len(parts) == 2 {
   248  							key = []byte(parts[1])
   249  						}
   250  						for _, v := range iter.Tombstones(key) {
   251  							for _, k := range v.Keys {
   252  								fmt.Fprintf(&b, "%s-%s#%d\n", v.Start, v.End, k.SeqNum())
   253  							}
   254  						}
   255  						fmt.Fprintf(&b, ".\n")
   256  						continue
   257  					case "range-keys":
   258  						var key []byte
   259  						if len(parts) == 2 {
   260  							key = []byte(parts[1])
   261  						}
   262  						for _, v := range iter.RangeKeys(key) {
   263  							fmt.Fprintf(&b, "%s\n", v)
   264  						}
   265  						fmt.Fprintf(&b, ".\n")
   266  						continue
   267  					default:
   268  						return fmt.Sprintf("unknown op: %s", parts[0])
   269  					}
   270  					if iter.Valid() {
   271  						snapshotPinned := ""
   272  						if printSnapshotPinned {
   273  							snapshotPinned = " (not pinned)"
   274  							if iter.snapshotPinned {
   275  								snapshotPinned = " (pinned)"
   276  							}
   277  						}
   278  						forceObsolete := ""
   279  						if printForceObsolete {
   280  							forceObsolete = " (not force obsolete)"
   281  							if iter.forceObsoleteDueToRangeDel {
   282  								forceObsolete = " (force obsolete)"
   283  							}
   284  						}
   285  						v := string(iter.Value())
   286  						if iter.Key().Kind() == base.InternalKeyKindDeleteSized && len(iter.Value()) > 0 {
   287  							vn, n := binary.Uvarint(iter.Value())
   288  							if n != len(iter.Value()) {
   289  								v = fmt.Sprintf("err: %0x value not a uvarint", iter.Value())
   290  							} else {
   291  								v = fmt.Sprintf("varint(%d)", vn)
   292  							}
   293  						}
   294  						fmt.Fprintf(&b, "%s:%s%s%s\n", iter.Key(), v, snapshotPinned, forceObsolete)
   295  						if iter.Key().Kind() == InternalKeyKindRangeDelete {
   296  							iter.rangeDelFrag.Add(keyspan.Span{
   297  								Start: append([]byte{}, iter.Key().UserKey...),
   298  								End:   append([]byte{}, iter.Value()...),
   299  								Keys: []keyspan.Key{
   300  									{Trailer: iter.Key().Trailer},
   301  								},
   302  							})
   303  						}
   304  						if rangekey.IsRangeKey(iter.Key().Kind()) {
   305  							iter.rangeKeyFrag.Add(*interleavingIter.Span())
   306  						}
   307  					} else if err := iter.Error(); err != nil {
   308  						fmt.Fprintf(&b, "err=%v\n", err)
   309  					} else {
   310  						fmt.Fprintf(&b, ".\n")
   311  					}
   312  				}
   313  				if printMissizedDels {
   314  					fmt.Fprintf(&b, "missized-dels=%d\n", iter.stats.countMissizedDels)
   315  				}
   316  				if len(ineffectualSingleDeleteKeys) > 0 {
   317  					fmt.Fprintf(&b, "ineffectual-single-deletes: %s\n",
   318  						strings.Join(ineffectualSingleDeleteKeys, ","))
   319  				}
   320  				if len(invariantViolationSingleDeleteKeys) > 0 {
   321  					fmt.Fprintf(&b, "invariant-violation-single-deletes: %s\n",
   322  						strings.Join(invariantViolationSingleDeleteKeys, ","))
   323  				}
   324  				return b.String()
   325  
   326  			default:
   327  				return fmt.Sprintf("unknown command: %s", d.Cmd)
   328  			}
   329  		})
   330  	}
   331  
   332  	// Rather than testing against all format version, we test against the
   333  	// significant boundaries.
   334  	formatVersions := []FormatMajorVersion{
   335  		FormatMostCompatible,
   336  		FormatSetWithDelete - 1,
   337  		FormatSetWithDelete,
   338  		internalFormatNewest,
   339  	}
   340  	for _, formatVersion := range formatVersions {
   341  		t.Run(fmt.Sprintf("version-%s", formatVersion), func(t *testing.T) {
   342  			runTest(t, formatVersion)
   343  		})
   344  	}
   345  }
   346  
   347  func TestFrontiers(t *testing.T) {
   348  	cmp := testkeys.Comparer.Compare
   349  	var keySets [][][]byte
   350  	datadriven.RunTest(t, "testdata/frontiers", func(t *testing.T, td *datadriven.TestData) string {
   351  		switch td.Cmd {
   352  		case "init":
   353  			// Init configures a frontier per line of input. Each line should
   354  			// contain a sorted whitespace-separated list of keys that the
   355  			// frontier will use.
   356  			//
   357  			// For example, the following input creates two separate monitored
   358  			// frontiers: one that sets its key successively to 'd', 'e', 'j'
   359  			// and one that sets its key to 'a', 'p', 'n', 'z':
   360  			//
   361  			//    init
   362  			//    b e j
   363  			//    a p n z
   364  
   365  			keySets = keySets[:0]
   366  			for _, line := range strings.Split(td.Input, "\n") {
   367  				keySets = append(keySets, bytes.Fields([]byte(line)))
   368  			}
   369  			return ""
   370  		case "scan":
   371  			f := &frontiers{cmp: cmp}
   372  			for _, keys := range keySets {
   373  				initTestFrontier(f, keys...)
   374  			}
   375  			var buf bytes.Buffer
   376  			for _, kStr := range strings.Fields(td.Input) {
   377  				k := []byte(kStr)
   378  				f.Advance(k)
   379  				fmt.Fprintf(&buf, "%s : { %s }\n", kStr, f.String())
   380  			}
   381  			return buf.String()
   382  		default:
   383  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   384  		}
   385  	})
   386  }
   387  
   388  // initTestFrontiers adds a new frontier to f that iterates through the provided
   389  // keys. The keys slice must be sorted.
   390  func initTestFrontier(f *frontiers, keys ...[]byte) *frontier {
   391  	ff := &frontier{}
   392  	var key []byte
   393  	if len(keys) > 0 {
   394  		key, keys = keys[0], keys[1:]
   395  	}
   396  	reached := func(k []byte) (nextKey []byte) {
   397  		if len(keys) > 0 {
   398  			nextKey, keys = keys[0], keys[1:]
   399  		}
   400  		return nextKey
   401  	}
   402  	ff.Init(f, key, reached)
   403  	return ff
   404  }