github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/defragment_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math/rand"
    11  	"sort"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/cockroachdb/pebble/internal/testkeys"
    19  	"github.com/pmezard/go-difflib/difflib"
    20  )
    21  
    22  func TestDefragmentingIter(t *testing.T) {
    23  	comparer := testkeys.Comparer
    24  	cmp := comparer.Compare
    25  	internalEqual := DefragmentInternal
    26  	alwaysEqual := DefragmentMethodFunc(func(_ base.Equal, _, _ *Span) bool { return true })
    27  	staticReducer := StaticDefragmentReducer
    28  	collectReducer := func(cur, next []Key) []Key {
    29  		c := keysBySeqNumKind(append(cur, next...))
    30  		sort.Sort(&c)
    31  		return c
    32  	}
    33  
    34  	var buf bytes.Buffer
    35  	var spans []Span
    36  	datadriven.RunTest(t, "testdata/defragmenting_iter", func(t *testing.T, td *datadriven.TestData) string {
    37  		buf.Reset()
    38  		switch td.Cmd {
    39  		case "define":
    40  			spans = spans[:0]
    41  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    42  			for _, line := range lines {
    43  				spans = append(spans, ParseSpan(line))
    44  			}
    45  			return ""
    46  		case "iter":
    47  			equal := internalEqual
    48  			reducer := staticReducer
    49  			var probes []probe
    50  			for _, cmdArg := range td.CmdArgs {
    51  				switch cmd := cmdArg.Key; cmd {
    52  				case "equal":
    53  					if len(cmdArg.Vals) != 1 {
    54  						return fmt.Sprintf("only one equal func expected; got %d", len(cmdArg.Vals))
    55  					}
    56  					switch val := cmdArg.Vals[0]; val {
    57  					case "internal":
    58  						equal = internalEqual
    59  					case "always":
    60  						equal = alwaysEqual
    61  					default:
    62  						return fmt.Sprintf("unknown reducer %s", val)
    63  					}
    64  				case "reducer":
    65  					if len(cmdArg.Vals) != 1 {
    66  						return fmt.Sprintf("only one reducer expected; got %d", len(cmdArg.Vals))
    67  					}
    68  					switch val := cmdArg.Vals[0]; val {
    69  					case "collect":
    70  						reducer = collectReducer
    71  					case "static":
    72  						reducer = staticReducer
    73  					default:
    74  						return fmt.Sprintf("unknown reducer %s", val)
    75  					}
    76  				case "probes":
    77  					probes = parseProbes(cmdArg.Vals...)
    78  				default:
    79  					return fmt.Sprintf("unknown command: %s", cmd)
    80  				}
    81  			}
    82  			var miter MergingIter
    83  			miter.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, spans))
    84  			innerIter := attachProbes(&miter, probeContext{log: &buf}, probes...)
    85  			var iter DefragmentingIter
    86  			iter.Init(comparer, innerIter, equal, reducer, new(DefragmentingBuffers))
    87  			for _, line := range strings.Split(td.Input, "\n") {
    88  				runIterOp(&buf, &iter, line)
    89  				fmt.Fprintln(&buf)
    90  			}
    91  			return strings.TrimSpace(buf.String())
    92  		default:
    93  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    94  		}
    95  	})
    96  }
    97  
    98  func TestDefragmentingIter_Randomized(t *testing.T) {
    99  	seed := time.Now().UnixNano()
   100  	for i := int64(0); i < 100; i++ {
   101  		testDefragmentingIteRandomizedOnce(t, seed+i)
   102  	}
   103  }
   104  
   105  func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) {
   106  	const seed = 1648173101214881000
   107  	testDefragmentingIteRandomizedOnce(t, seed)
   108  }
   109  
   110  func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) {
   111  	comparer := testkeys.Comparer
   112  	cmp := comparer.Compare
   113  	formatKey := comparer.FormatKey
   114  
   115  	rng := rand.New(rand.NewSource(seed))
   116  	t.Logf("seed = %d", seed)
   117  
   118  	// Use a key space of alphanumeric strings, with a random max length between
   119  	// 1-2. Repeat keys are more common at the lower max lengths.
   120  	ks := testkeys.Alpha(rng.Intn(2) + 1)
   121  
   122  	// Generate between 1-15 range keys.
   123  	const maxRangeKeys = 15
   124  	var original, fragmented []Span
   125  	numRangeKeys := 1 + rng.Intn(maxRangeKeys)
   126  	for i := 0; i < numRangeKeys; i++ {
   127  		startIdx := rng.Int63n(ks.Count())
   128  		endIdx := rng.Int63n(ks.Count())
   129  		for startIdx == endIdx {
   130  			endIdx = rng.Int63n(ks.Count())
   131  		}
   132  		if startIdx > endIdx {
   133  			startIdx, endIdx = endIdx, startIdx
   134  		}
   135  
   136  		key := Key{
   137  			Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet),
   138  			Value:   []byte(fmt.Sprintf("v%d", rng.Intn(3))),
   139  		}
   140  		// Generate suffixes 0, 1, 2, or 3 with 0 indicating none.
   141  		if suffix := rng.Int63n(4); suffix > 0 {
   142  			key.Suffix = testkeys.Suffix(suffix)
   143  		}
   144  		original = append(original, Span{
   145  			Start: testkeys.Key(ks, startIdx),
   146  			End:   testkeys.Key(ks, endIdx),
   147  			Keys:  []Key{key},
   148  		})
   149  
   150  		for startIdx < endIdx {
   151  			width := rng.Int63n(endIdx-startIdx) + 1
   152  			fragmented = append(fragmented, Span{
   153  				Start: testkeys.Key(ks, startIdx),
   154  				End:   testkeys.Key(ks, startIdx+width),
   155  				Keys:  []Key{key},
   156  			})
   157  			startIdx += width
   158  		}
   159  	}
   160  
   161  	// Both the original and the deliberately fragmented spans may contain
   162  	// overlaps, so we need to sort and fragment them.
   163  	original = fragment(cmp, formatKey, original)
   164  	fragmented = fragment(cmp, formatKey, fragmented)
   165  
   166  	var originalInner MergingIter
   167  	originalInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, original))
   168  	var fragmentedInner MergingIter
   169  	fragmentedInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, fragmented))
   170  
   171  	var referenceIter, fragmentedIter DefragmentingIter
   172  	referenceIter.Init(comparer, &originalInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers))
   173  	fragmentedIter.Init(comparer, &fragmentedInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers))
   174  
   175  	// Generate 100 random operations and run them against both iterators.
   176  	const numIterOps = 100
   177  	type opKind struct {
   178  		weight int
   179  		fn     func() string
   180  	}
   181  	ops := []opKind{
   182  		{weight: 2, fn: func() string { return "first" }},
   183  		{weight: 2, fn: func() string { return "last" }},
   184  		{weight: 50, fn: func() string { return "next" }},
   185  		{weight: 50, fn: func() string { return "prev" }},
   186  		{weight: 5, fn: func() string {
   187  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   188  			return fmt.Sprintf("seekge(%s)", k)
   189  		}},
   190  		{weight: 5, fn: func() string {
   191  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   192  			return fmt.Sprintf("seeklt(%s)", k)
   193  		}},
   194  	}
   195  	var totalWeight int
   196  	for _, op := range ops {
   197  		totalWeight += op.weight
   198  	}
   199  	var referenceHistory, fragmentedHistory bytes.Buffer
   200  	for i := 0; i < numIterOps; i++ {
   201  		p := rng.Intn(totalWeight)
   202  		opIndex := 0
   203  		if i == 0 {
   204  			// First op is always a First().
   205  		} else {
   206  			for i, op := range ops {
   207  				if p < op.weight {
   208  					opIndex = i
   209  					break
   210  				}
   211  				p -= op.weight
   212  			}
   213  		}
   214  		op := ops[opIndex].fn()
   215  		runIterOp(&referenceHistory, &referenceIter, op)
   216  		runIterOp(&fragmentedHistory, &fragmentedIter, op)
   217  		if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) {
   218  			t.Fatal(debugContext(cmp, formatKey, original, fragmented,
   219  				referenceHistory.String(), fragmentedHistory.String()))
   220  		}
   221  		fmt.Fprintln(&referenceHistory)
   222  		fmt.Fprintln(&fragmentedHistory)
   223  	}
   224  }
   225  
   226  func fragment(cmp base.Compare, formatKey base.FormatKey, spans []Span) []Span {
   227  	Sort(cmp, spans)
   228  	var fragments []Span
   229  	f := Fragmenter{
   230  		Cmp:    cmp,
   231  		Format: formatKey,
   232  		Emit: func(f Span) {
   233  			fragments = append(fragments, f)
   234  		},
   235  	}
   236  	for _, s := range spans {
   237  		f.Add(s)
   238  	}
   239  	f.Finish()
   240  	return fragments
   241  }
   242  
   243  func debugContext(
   244  	cmp base.Compare,
   245  	formatKey base.FormatKey,
   246  	original, fragmented []Span,
   247  	refHistory, fragHistory string,
   248  ) string {
   249  	var buf bytes.Buffer
   250  	fmt.Fprintln(&buf, "Reference:")
   251  	for _, s := range original {
   252  		fmt.Fprintln(&buf, s)
   253  	}
   254  	fmt.Fprintln(&buf)
   255  	fmt.Fprintln(&buf, "Fragmented:")
   256  	for _, s := range fragmented {
   257  		fmt.Fprintln(&buf, s)
   258  	}
   259  	fmt.Fprintln(&buf)
   260  	fmt.Fprintln(&buf, "\nOperations diff:")
   261  	diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
   262  		A:       difflib.SplitLines(refHistory),
   263  		B:       difflib.SplitLines(fragHistory),
   264  		Context: 5,
   265  	})
   266  	if err != nil {
   267  		panic(err)
   268  	}
   269  	fmt.Fprintln(&buf, diff)
   270  	return buf.String()
   271  }