github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/keyspan/merging_iter_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math/rand"
    11  	"slices"
    12  	"strconv"
    13  	"strings"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/datadriven"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/cockroachdb/pebble/internal/testkeys"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  func TestMergingIter(t *testing.T) {
    24  	cmp := base.DefaultComparer.Compare
    25  
    26  	var definedIters []FragmentIterator
    27  	var buf bytes.Buffer
    28  	datadriven.RunTest(t, "testdata/merging_iter", func(t *testing.T, td *datadriven.TestData) string {
    29  		switch td.Cmd {
    30  		case "define":
    31  			definedIters = definedIters[:0]
    32  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    33  			var spans []Span
    34  			for _, line := range lines {
    35  				if line == "--" {
    36  					definedIters = append(definedIters, &invalidatingIter{iter: NewIter(cmp, spans)})
    37  					spans = nil
    38  					continue
    39  				}
    40  				spans = append(spans, ParseSpan(line))
    41  			}
    42  			if len(spans) > 0 {
    43  				definedIters = append(definedIters, &invalidatingIter{iter: NewIter(cmp, spans)})
    44  			}
    45  			return fmt.Sprintf("%d levels", len(definedIters))
    46  		case "iter":
    47  			buf.Reset()
    48  			pctx := probeContext{log: &buf}
    49  			snapshot := base.InternalKeySeqNumMax
    50  			iters := slices.Clone(definedIters)
    51  			for _, cmdArg := range td.CmdArgs {
    52  				switch cmdArg.Key {
    53  				case "snapshot":
    54  					var err error
    55  					snapshot, err = strconv.ParseUint(cmdArg.Vals[0], 10, 64)
    56  					require.NoError(t, err)
    57  				case "probes":
    58  					// The first value indicates which of the merging iterator's
    59  					// child iterators is the target.
    60  					i, err := strconv.Atoi(cmdArg.Vals[0])
    61  					if err != nil {
    62  						return err.Error()
    63  					}
    64  					// The remaining values define probes to attach.
    65  					iters[i] = attachProbes(iters[i], pctx, parseProbes(cmdArg.Vals[1:]...)...)
    66  				default:
    67  					return fmt.Sprintf("unrecognized arg %q", cmdArg.Key)
    68  				}
    69  			}
    70  			var iter MergingIter
    71  			iter.Init(cmp, VisibleTransform(snapshot), new(MergingBuffers), iters...)
    72  			runIterCmd(t, td, &iter, &buf)
    73  			return buf.String()
    74  		default:
    75  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    76  		}
    77  	})
    78  }
    79  
    80  // TestMergingIter_FragmenterEquivalence tests for equivalence between the
    81  // fragmentation performed on-the-fly by the MergingIter and the fragmentation
    82  // performed by the Fragmenter.
    83  //
    84  // It does this by producing 1-10 levels of well-formed fragments. Generated
    85  // fragments may overlap other levels arbitrarily, but within their level
    86  // generated fragments may only overlap other fragments that share the same user
    87  // key bounds.
    88  //
    89  // The test then feeds all the fragments, across all levels, into a Fragmenter
    90  // and produces a Iter over those fragments. The test also constructs a
    91  // MergingIter with a separate Iter for each level. It runs a random
    92  // series of operations, applying each operation to both. It asserts that each
    93  // operation has identical results on both iterators.
    94  func TestMergingIter_FragmenterEquivalence(t *testing.T) {
    95  	seed := time.Now().UnixNano()
    96  	for i := int64(0); i < 10; i++ {
    97  		testFragmenterEquivalenceOnce(t, seed+i)
    98  	}
    99  }
   100  
   101  func TestMergingIter_FragmenterEquivalence_Seed(t *testing.T) {
   102  	// This test uses a fixed seed. It's useful to manually edit its seed when
   103  	// debugging a test failure of the variable-seed test.
   104  	const seed = 1644517830186873000
   105  	testFragmenterEquivalenceOnce(t, seed)
   106  }
   107  
   108  func testFragmenterEquivalenceOnce(t *testing.T, seed int64) {
   109  	cmp := testkeys.Comparer.Compare
   110  	rng := rand.New(rand.NewSource(seed))
   111  	t.Logf("seed = %d", seed)
   112  
   113  	// Use a key space of alphanumeric strings, with a random max length between
   114  	// 1-3. Repeat keys are more common at the lower max lengths.
   115  	ks := testkeys.Alpha(rng.Intn(3) + 1)
   116  
   117  	// Generate between 1 and 10 levels of fragment iterators.
   118  	levels := make([][]Span, rng.Intn(10)+1)
   119  	iters := make([]FragmentIterator, len(levels))
   120  	var allSpans []Span
   121  	var buf bytes.Buffer
   122  	for l := 0; l < len(levels); l++ {
   123  		fmt.Fprintf(&buf, "level %d: ", l)
   124  		for keyspaceStartIdx := int64(0); keyspaceStartIdx < ks.Count(); {
   125  			// Generate spans of lengths of up to a third of the keyspace.
   126  			spanStartIdx := keyspaceStartIdx + rng.Int63n(ks.Count()/3)
   127  			spanEndIdx := spanStartIdx + rng.Int63n(ks.Count()/3) + 1
   128  
   129  			if spanEndIdx < ks.Count() {
   130  				keyCount := uint64(rng.Intn(3) + 1)
   131  				s := Span{
   132  					Start: testkeys.Key(ks, spanStartIdx),
   133  					End:   testkeys.Key(ks, spanEndIdx),
   134  					Keys:  make([]Key, 0, keyCount),
   135  				}
   136  				for k := keyCount; k > 0; k-- {
   137  					seqNum := uint64((len(levels)-l)*3) + k
   138  					s.Keys = append(s.Keys, Key{
   139  						Trailer: base.MakeTrailer(seqNum, base.InternalKeyKindRangeKeySet),
   140  					})
   141  				}
   142  				if len(levels[l]) > 0 {
   143  					fmt.Fprint(&buf, ", ")
   144  				}
   145  				fmt.Fprintf(&buf, "%s", s)
   146  
   147  				levels[l] = append(levels[l], s)
   148  				allSpans = append(allSpans, s)
   149  			}
   150  			keyspaceStartIdx = spanEndIdx
   151  		}
   152  		iters[l] = &invalidatingIter{iter: NewIter(cmp, levels[l])}
   153  		fmt.Fprintln(&buf)
   154  	}
   155  
   156  	// Fragment the spans across the levels.
   157  	var allFragmented []Span
   158  	f := Fragmenter{
   159  		Cmp:    cmp,
   160  		Format: testkeys.Comparer.FormatKey,
   161  		Emit: func(span Span) {
   162  			allFragmented = append(allFragmented, span)
   163  		},
   164  	}
   165  	Sort(f.Cmp, allSpans)
   166  	for _, s := range allSpans {
   167  		f.Add(s)
   168  	}
   169  	f.Finish()
   170  
   171  	// Log all the levels and their fragments, as well as the fully-fragmented
   172  	// spans produced by the Fragmenter.
   173  	fmt.Fprintln(&buf, "Fragmenter fragments:")
   174  	for i, s := range allFragmented {
   175  		if i > 0 {
   176  			fmt.Fprint(&buf, ", ")
   177  		}
   178  		fmt.Fprint(&buf, s)
   179  	}
   180  	t.Logf("%d levels:\n%s\n", len(levels), buf.String())
   181  
   182  	fragmenterIter := NewIter(f.Cmp, allFragmented)
   183  	mergingIter := &MergingIter{}
   184  	mergingIter.Init(f.Cmp, VisibleTransform(base.InternalKeySeqNumMax), new(MergingBuffers), iters...)
   185  
   186  	// Position both so that it's okay to perform relative positioning
   187  	// operations immediately.
   188  	mergingIter.First()
   189  	fragmenterIter.First()
   190  
   191  	type opKind struct {
   192  		weight int
   193  		fn     func() (str string, f *Span, m *Span)
   194  	}
   195  	ops := []opKind{
   196  		{weight: 2, fn: func() (string, *Span, *Span) {
   197  			return "First()", fragmenterIter.First(), mergingIter.First()
   198  		}},
   199  		{weight: 2, fn: func() (string, *Span, *Span) {
   200  			return "Last()", fragmenterIter.Last(), mergingIter.Last()
   201  		}},
   202  		{weight: 5, fn: func() (string, *Span, *Span) {
   203  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   204  			return fmt.Sprintf("SeekGE(%q)", k),
   205  				fragmenterIter.SeekGE(k),
   206  				mergingIter.SeekGE(k)
   207  		}},
   208  		{weight: 5, fn: func() (string, *Span, *Span) {
   209  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   210  			return fmt.Sprintf("SeekLT(%q)", k),
   211  				fragmenterIter.SeekLT(k),
   212  				mergingIter.SeekLT(k)
   213  		}},
   214  		{weight: 50, fn: func() (string, *Span, *Span) {
   215  			return "Next()", fragmenterIter.Next(), mergingIter.Next()
   216  		}},
   217  		{weight: 50, fn: func() (string, *Span, *Span) {
   218  			return "Prev()", fragmenterIter.Prev(), mergingIter.Prev()
   219  		}},
   220  	}
   221  	var totalWeight int
   222  	for _, op := range ops {
   223  		totalWeight += op.weight
   224  	}
   225  
   226  	var fragmenterBuf bytes.Buffer
   227  	var mergingBuf bytes.Buffer
   228  	opCount := rng.Intn(200) + 50
   229  	for i := 0; i < opCount; i++ {
   230  		p := rng.Intn(totalWeight)
   231  		opIndex := 0
   232  		for i, op := range ops {
   233  			if p < op.weight {
   234  				opIndex = i
   235  				break
   236  			}
   237  			p -= op.weight
   238  		}
   239  
   240  		opString, fs, ms := ops[opIndex].fn()
   241  
   242  		fragmenterBuf.Reset()
   243  		mergingBuf.Reset()
   244  		fmt.Fprint(&fragmenterBuf, fs)
   245  		fmt.Fprint(&mergingBuf, ms)
   246  		if fragmenterBuf.String() != mergingBuf.String() {
   247  			t.Fatalf("seed %d, op %d: %s = %s, fragmenter iterator returned %s",
   248  				seed, i, opString, mergingBuf.String(), fragmenterBuf.String())
   249  		}
   250  		t.Logf("op %d: %s = %s", i, opString, fragmenterBuf.String())
   251  	}
   252  }