github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/merging_iter_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math/rand"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/cockroachdb/pebble/internal/testkeys"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  func TestMergingIter(t *testing.T) {
    23  	cmp := base.DefaultComparer.Compare
    24  	var iter MergingIter
    25  
    26  	datadriven.RunTest(t, "testdata/merging_iter", func(t *testing.T, td *datadriven.TestData) string {
    27  		switch td.Cmd {
    28  		case "define":
    29  			snapshot := base.InternalKeySeqNumMax
    30  			for _, cmdArg := range td.CmdArgs {
    31  				switch cmdArg.Key {
    32  				case "snapshot":
    33  					var err error
    34  					snapshot, err = strconv.ParseUint(cmdArg.Vals[0], 10, 64)
    35  					require.NoError(t, err)
    36  				default:
    37  					return fmt.Sprintf("unrecognized arg %q", cmdArg.Key)
    38  				}
    39  			}
    40  
    41  			var iters []FragmentIterator
    42  			var spans []Span
    43  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    44  			for _, line := range lines {
    45  				if line == "--" {
    46  					iters = append(iters, NewIter(cmp, spans))
    47  					spans = nil
    48  					continue
    49  				}
    50  				spans = append(spans, ParseSpan(line))
    51  			}
    52  			if len(spans) > 0 {
    53  				iters = append(iters, &invalidatingIter{iter: NewIter(cmp, spans)})
    54  			}
    55  			iter.Init(cmp, VisibleTransform(snapshot), new(MergingBuffers), iters...)
    56  			return fmt.Sprintf("%d levels", len(iters))
    57  		case "iter":
    58  			return runIterCmd(t, td, &iter)
    59  
    60  		default:
    61  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    62  		}
    63  	})
    64  }
    65  
    66  // TestMergingIter_FragmenterEquivalence tests for equivalence between the
    67  // fragmentation performed on-the-fly by the MergingIter and the fragmentation
    68  // performed by the Fragmenter.
    69  //
    70  // It does this by producing 1-10 levels of well-formed fragments. Generated
    71  // fragments may overlap other levels arbitrarily, but within their level
    72  // generated fragments may only overlap other fragments that share the same user
    73  // key bounds.
    74  //
    75  // The test then feeds all the fragments, across all levels, into a Fragmenter
    76  // and produces a Iter over those fragments. The test also constructs a
    77  // MergingIter with a separate Iter for each level. It runs a random
    78  // series of operations, applying each operation to both. It asserts that each
    79  // operation has identical results on both iterators.
    80  func TestMergingIter_FragmenterEquivalence(t *testing.T) {
    81  	seed := time.Now().UnixNano()
    82  	for i := int64(0); i < 10; i++ {
    83  		testFragmenterEquivalenceOnce(t, seed+i)
    84  	}
    85  }
    86  
    87  func TestMergingIter_FragmenterEquivalence_Seed(t *testing.T) {
    88  	// This test uses a fixed seed. It's useful to manually edit its seed when
    89  	// debugging a test failure of the variable-seed test.
    90  	const seed = 1644517830186873000
    91  	testFragmenterEquivalenceOnce(t, seed)
    92  }
    93  
    94  func testFragmenterEquivalenceOnce(t *testing.T, seed int64) {
    95  	cmp := testkeys.Comparer.Compare
    96  	rng := rand.New(rand.NewSource(seed))
    97  	t.Logf("seed = %d", seed)
    98  
    99  	// Use a key space of alphanumeric strings, with a random max length between
   100  	// 1-3. Repeat keys are more common at the lower max lengths.
   101  	ks := testkeys.Alpha(rng.Intn(3) + 1)
   102  
   103  	// Generate between 1 and 10 levels of fragment iterators.
   104  	levels := make([][]Span, rng.Intn(10)+1)
   105  	iters := make([]FragmentIterator, len(levels))
   106  	var allSpans []Span
   107  	var buf bytes.Buffer
   108  	for l := 0; l < len(levels); l++ {
   109  		fmt.Fprintf(&buf, "level %d: ", l)
   110  		for keyspaceStartIdx := int64(0); keyspaceStartIdx < ks.Count(); {
   111  			// Generate spans of lengths of up to a third of the keyspace.
   112  			spanStartIdx := keyspaceStartIdx + rng.Int63n(ks.Count()/3)
   113  			spanEndIdx := spanStartIdx + rng.Int63n(ks.Count()/3) + 1
   114  
   115  			if spanEndIdx < ks.Count() {
   116  				keyCount := uint64(rng.Intn(3) + 1)
   117  				s := Span{
   118  					Start: testkeys.Key(ks, spanStartIdx),
   119  					End:   testkeys.Key(ks, spanEndIdx),
   120  					Keys:  make([]Key, 0, keyCount),
   121  				}
   122  				for k := keyCount; k > 0; k-- {
   123  					seqNum := uint64((len(levels)-l)*3) + k
   124  					s.Keys = append(s.Keys, Key{
   125  						Trailer: base.MakeTrailer(seqNum, base.InternalKeyKindRangeKeySet),
   126  					})
   127  				}
   128  				if len(levels[l]) > 0 {
   129  					fmt.Fprint(&buf, ", ")
   130  				}
   131  				fmt.Fprintf(&buf, "%s", s)
   132  
   133  				levels[l] = append(levels[l], s)
   134  				allSpans = append(allSpans, s)
   135  			}
   136  			keyspaceStartIdx = spanEndIdx
   137  		}
   138  		iters[l] = &invalidatingIter{iter: NewIter(cmp, levels[l])}
   139  		fmt.Fprintln(&buf)
   140  	}
   141  
   142  	// Fragment the spans across the levels.
   143  	var allFragmented []Span
   144  	f := Fragmenter{
   145  		Cmp:    cmp,
   146  		Format: testkeys.Comparer.FormatKey,
   147  		Emit: func(span Span) {
   148  			allFragmented = append(allFragmented, span)
   149  		},
   150  	}
   151  	Sort(f.Cmp, allSpans)
   152  	for _, s := range allSpans {
   153  		f.Add(s)
   154  	}
   155  	f.Finish()
   156  
   157  	// Log all the levels and their fragments, as well as the fully-fragmented
   158  	// spans produced by the Fragmenter.
   159  	fmt.Fprintln(&buf, "Fragmenter fragments:")
   160  	for i, s := range allFragmented {
   161  		if i > 0 {
   162  			fmt.Fprint(&buf, ", ")
   163  		}
   164  		fmt.Fprint(&buf, s)
   165  	}
   166  	t.Logf("%d levels:\n%s\n", len(levels), buf.String())
   167  
   168  	fragmenterIter := NewIter(f.Cmp, allFragmented)
   169  	mergingIter := &MergingIter{}
   170  	mergingIter.Init(f.Cmp, VisibleTransform(base.InternalKeySeqNumMax), new(MergingBuffers), iters...)
   171  
   172  	// Position both so that it's okay to perform relative positioning
   173  	// operations immediately.
   174  	mergingIter.First()
   175  	fragmenterIter.First()
   176  
   177  	type opKind struct {
   178  		weight int
   179  		fn     func() (str string, f *Span, m *Span)
   180  	}
   181  	ops := []opKind{
   182  		{weight: 2, fn: func() (string, *Span, *Span) {
   183  			return "First()", fragmenterIter.First(), mergingIter.First()
   184  		}},
   185  		{weight: 2, fn: func() (string, *Span, *Span) {
   186  			return "Last()", fragmenterIter.Last(), mergingIter.Last()
   187  		}},
   188  		{weight: 5, fn: func() (string, *Span, *Span) {
   189  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   190  			return fmt.Sprintf("SeekGE(%q)", k),
   191  				fragmenterIter.SeekGE(k),
   192  				mergingIter.SeekGE(k)
   193  		}},
   194  		{weight: 5, fn: func() (string, *Span, *Span) {
   195  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   196  			return fmt.Sprintf("SeekLT(%q)", k),
   197  				fragmenterIter.SeekLT(k),
   198  				mergingIter.SeekLT(k)
   199  		}},
   200  		{weight: 50, fn: func() (string, *Span, *Span) {
   201  			return "Next()", fragmenterIter.Next(), mergingIter.Next()
   202  		}},
   203  		{weight: 50, fn: func() (string, *Span, *Span) {
   204  			return "Prev()", fragmenterIter.Prev(), mergingIter.Prev()
   205  		}},
   206  	}
   207  	var totalWeight int
   208  	for _, op := range ops {
   209  		totalWeight += op.weight
   210  	}
   211  
   212  	var fragmenterBuf bytes.Buffer
   213  	var mergingBuf bytes.Buffer
   214  	opCount := rng.Intn(200) + 50
   215  	for i := 0; i < opCount; i++ {
   216  		p := rng.Intn(totalWeight)
   217  		opIndex := 0
   218  		for i, op := range ops {
   219  			if p < op.weight {
   220  				opIndex = i
   221  				break
   222  			}
   223  			p -= op.weight
   224  		}
   225  
   226  		opString, fs, ms := ops[opIndex].fn()
   227  
   228  		fragmenterBuf.Reset()
   229  		mergingBuf.Reset()
   230  		fmt.Fprint(&fragmenterBuf, fs)
   231  		fmt.Fprint(&mergingBuf, ms)
   232  		if fragmenterBuf.String() != mergingBuf.String() {
   233  			t.Fatalf("seed %d, op %d: %s = %s, fragmenter iterator returned %s",
   234  				seed, i, opString, mergingBuf.String(), fragmenterBuf.String())
   235  		}
   236  		t.Logf("op %d: %s = %s", i, opString, fragmenterBuf.String())
   237  	}
   238  }