github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/rangekey/coalesce_test.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package rangekey
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"math"
    12  	"math/rand"
    13  	"strconv"
    14  	"strings"
    15  	"testing"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/datadriven"
    19  	"github.com/cockroachdb/pebble/internal/base"
    20  	"github.com/cockroachdb/pebble/internal/keyspan"
    21  	"github.com/cockroachdb/pebble/internal/testkeys"
    22  	"github.com/pmezard/go-difflib/difflib"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestCoalesce(t *testing.T) {
    27  	var buf bytes.Buffer
    28  	eq := testkeys.Comparer.Equal
    29  	cmp := testkeys.Comparer.Compare
    30  
    31  	datadriven.RunTest(t, "testdata/coalesce", func(t *testing.T, td *datadriven.TestData) string {
    32  		switch td.Cmd {
    33  		case "coalesce":
    34  			buf.Reset()
    35  			span := keyspan.ParseSpan(td.Input)
    36  			coalesced := keyspan.Span{
    37  				Start: span.Start,
    38  				End:   span.End,
    39  			}
    40  			if err := Coalesce(cmp, eq, span.Keys, &coalesced.Keys); err != nil {
    41  				return err.Error()
    42  			}
    43  			fmt.Fprintln(&buf, coalesced)
    44  			return buf.String()
    45  		default:
    46  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    47  		}
    48  	})
    49  }
    50  
    51  func TestIter(t *testing.T) {
    52  	eq := testkeys.Comparer.Equal
    53  	cmp := testkeys.Comparer.Compare
    54  	var iter keyspan.MergingIter
    55  	var buf bytes.Buffer
    56  
    57  	datadriven.RunTest(t, "testdata/iter", func(t *testing.T, td *datadriven.TestData) string {
    58  		buf.Reset()
    59  		switch td.Cmd {
    60  		case "define":
    61  			visibleSeqNum := base.InternalKeySeqNumMax
    62  			for _, arg := range td.CmdArgs {
    63  				if arg.Key == "visible-seq-num" {
    64  					var err error
    65  					visibleSeqNum, err = strconv.ParseUint(arg.Vals[0], 10, 64)
    66  					require.NoError(t, err)
    67  				}
    68  			}
    69  
    70  			var spans []keyspan.Span
    71  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    72  			for _, line := range lines {
    73  				spans = append(spans, keyspan.ParseSpan(line))
    74  			}
    75  			transform := keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error {
    76  				keysBySuffix := keyspan.KeysBySuffix{
    77  					Cmp:  cmp,
    78  					Keys: dst.Keys[:0],
    79  				}
    80  				if err := coalesce(eq, &keysBySuffix, visibleSeqNum, s.Keys); err != nil {
    81  					return err
    82  				}
    83  				// Update the span with the (potentially reduced) keys slice.  coalesce left
    84  				// the keys in *dst sorted by suffix. Re-sort them by trailer.
    85  				dst.Keys = keysBySuffix.Keys
    86  				keyspan.SortKeysByTrailer(&dst.Keys)
    87  				dst.Start = s.Start
    88  				dst.End = s.End
    89  				return nil
    90  			})
    91  			iter.Init(cmp, transform, new(keyspan.MergingBuffers), keyspan.NewIter(cmp, spans))
    92  			return "OK"
    93  		case "iter":
    94  			buf.Reset()
    95  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    96  			for _, line := range lines {
    97  				line = strings.TrimSpace(line)
    98  				i := strings.IndexByte(line, ' ')
    99  				iterCmd := line
   100  				if i > 0 {
   101  					iterCmd = string(line[:i])
   102  				}
   103  				var s *keyspan.Span
   104  				switch iterCmd {
   105  				case "first":
   106  					s = iter.First()
   107  				case "last":
   108  					s = iter.Last()
   109  				case "next":
   110  					s = iter.Next()
   111  				case "prev":
   112  					s = iter.Prev()
   113  				case "seek-ge":
   114  					s = iter.SeekGE([]byte(strings.TrimSpace(line[i:])))
   115  				case "seek-lt":
   116  					s = iter.SeekLT([]byte(strings.TrimSpace(line[i:])))
   117  				default:
   118  					return fmt.Sprintf("unrecognized iter command %q", iterCmd)
   119  				}
   120  				require.NoError(t, iter.Error())
   121  				fmt.Fprint(&buf, s)
   122  				if buf.Len() > 0 {
   123  					fmt.Fprintln(&buf)
   124  				}
   125  			}
   126  			return buf.String()
   127  		default:
   128  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   129  		}
   130  	})
   131  }
   132  
   133  func TestDefragmenting(t *testing.T) {
   134  	cmp := testkeys.Comparer.Compare
   135  
   136  	var buf bytes.Buffer
   137  	var spans []keyspan.Span
   138  	var hasPrefix bool
   139  	var prefix []byte
   140  	datadriven.RunTest(t, "testdata/defragmenting_iter", func(t *testing.T, td *datadriven.TestData) string {
   141  		buf.Reset()
   142  		switch td.Cmd {
   143  		case "define":
   144  			spans = spans[:0]
   145  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
   146  			for _, line := range lines {
   147  				spans = append(spans, keyspan.ParseSpan(line))
   148  			}
   149  			return ""
   150  		case "iter":
   151  			var userIterCfg UserIteratorConfig
   152  			iter := userIterCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   153  				nil /* lower */, nil, /* upper */
   154  				&hasPrefix, &prefix, false /* internalKeys */, new(Buffers),
   155  				keyspan.NewIter(cmp, spans))
   156  			for _, line := range strings.Split(td.Input, "\n") {
   157  				runIterOp(&buf, iter, line)
   158  			}
   159  			return strings.TrimSpace(buf.String())
   160  		default:
   161  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   162  		}
   163  	})
   164  }
   165  
   166  func TestDefragmentingIter_Randomized(t *testing.T) {
   167  	seed := time.Now().UnixNano()
   168  	for i := int64(0); i < 100; i++ {
   169  		testDefragmentingIteRandomizedOnce(t, seed+i)
   170  	}
   171  }
   172  
   173  func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) {
   174  	const seed = 1648173101214881000
   175  	testDefragmentingIteRandomizedOnce(t, seed)
   176  }
   177  
   178  func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) {
   179  	cmp := testkeys.Comparer.Compare
   180  	formatKey := testkeys.Comparer.FormatKey
   181  
   182  	rng := rand.New(rand.NewSource(seed))
   183  	t.Logf("seed = %d", seed)
   184  
   185  	// Use a key space of alphanumeric strings, with a random max length between
   186  	// 1-2. Repeat keys are more common at the lower max lengths.
   187  	ks := testkeys.Alpha(rng.Intn(2) + 1)
   188  
   189  	// Generate between 1-15 range keys.
   190  	const maxRangeKeys = 15
   191  	var original, fragmented []keyspan.Span
   192  	numRangeKeys := 1 + rng.Intn(maxRangeKeys)
   193  	for i := 0; i < numRangeKeys; i++ {
   194  		startIdx := rng.Int63n(ks.Count())
   195  		endIdx := rng.Int63n(ks.Count())
   196  		for startIdx == endIdx {
   197  			endIdx = rng.Int63n(ks.Count())
   198  		}
   199  		if startIdx > endIdx {
   200  			startIdx, endIdx = endIdx, startIdx
   201  		}
   202  
   203  		key := keyspan.Key{
   204  			Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet),
   205  			Value:   []byte(fmt.Sprintf("v%d", rng.Intn(3))),
   206  		}
   207  		// Generate suffixes 0, 1, 2, or 3 with 0 indicating none.
   208  		if suffix := rng.Int63n(4); suffix > 0 {
   209  			key.Suffix = testkeys.Suffix(suffix)
   210  		}
   211  		original = append(original, keyspan.Span{
   212  			Start: testkeys.Key(ks, startIdx),
   213  			End:   testkeys.Key(ks, endIdx),
   214  			Keys:  []keyspan.Key{key},
   215  		})
   216  
   217  		for startIdx < endIdx {
   218  			width := rng.Int63n(endIdx-startIdx) + 1
   219  			fragmented = append(fragmented, keyspan.Span{
   220  				Start: testkeys.Key(ks, startIdx),
   221  				End:   testkeys.Key(ks, startIdx+width),
   222  				Keys:  []keyspan.Key{key},
   223  			})
   224  			startIdx += width
   225  		}
   226  	}
   227  
   228  	// Both the original and the deliberately fragmented spans may contain
   229  	// overlaps, so we need to sort and fragment them.
   230  	original = fragment(cmp, formatKey, original)
   231  	fragmented = fragment(cmp, formatKey, fragmented)
   232  
   233  	var referenceCfg, fragmentedCfg UserIteratorConfig
   234  	referenceIter := referenceCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   235  		nil /* lower */, nil, /* upper */
   236  		new(bool), new([]byte), false /* internalKeys */, new(Buffers),
   237  		keyspan.NewIter(cmp, original))
   238  	fragmentedIter := fragmentedCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   239  		nil /* lower */, nil, /* upper */
   240  		new(bool), new([]byte), false /* internalKeys */, new(Buffers),
   241  		keyspan.NewIter(cmp, fragmented))
   242  
   243  	// Generate 100 random operations and run them against both iterators.
   244  	const numIterOps = 100
   245  	type opKind struct {
   246  		weight int
   247  		fn     func() string
   248  	}
   249  	ops := []opKind{
   250  		{weight: 2, fn: func() string { return "first" }},
   251  		{weight: 2, fn: func() string { return "last" }},
   252  		{weight: 50, fn: func() string { return "next" }},
   253  		{weight: 50, fn: func() string { return "prev" }},
   254  		{weight: 5, fn: func() string {
   255  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   256  			return fmt.Sprintf("seekge(%s)", k)
   257  		}},
   258  		{weight: 5, fn: func() string {
   259  			k := testkeys.Key(ks, rng.Int63n(ks.Count()))
   260  			return fmt.Sprintf("seeklt(%s)", k)
   261  		}},
   262  	}
   263  	var totalWeight int
   264  	for _, op := range ops {
   265  		totalWeight += op.weight
   266  	}
   267  	var referenceHistory, fragmentedHistory bytes.Buffer
   268  	for i := 0; i < numIterOps; i++ {
   269  		p := rng.Intn(totalWeight)
   270  		opIndex := 0
   271  		if i == 0 {
   272  			// First op is always a First().
   273  		} else {
   274  			for i, op := range ops {
   275  				if p < op.weight {
   276  					opIndex = i
   277  					break
   278  				}
   279  				p -= op.weight
   280  			}
   281  		}
   282  		op := ops[opIndex].fn()
   283  		runIterOp(&referenceHistory, referenceIter, op)
   284  		runIterOp(&fragmentedHistory, fragmentedIter, op)
   285  		if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) {
   286  			t.Fatal(debugContext(cmp, formatKey, original, fragmented,
   287  				referenceHistory.String(), fragmentedHistory.String()))
   288  		}
   289  	}
   290  }
   291  
   292  func fragment(cmp base.Compare, formatKey base.FormatKey, spans []keyspan.Span) []keyspan.Span {
   293  	keyspan.Sort(cmp, spans)
   294  	var fragments []keyspan.Span
   295  	f := keyspan.Fragmenter{
   296  		Cmp:    cmp,
   297  		Format: formatKey,
   298  		Emit: func(f keyspan.Span) {
   299  			fragments = append(fragments, f)
   300  		},
   301  	}
   302  	for _, s := range spans {
   303  		f.Add(s)
   304  	}
   305  	f.Finish()
   306  	return fragments
   307  }
   308  
   309  func debugContext(
   310  	cmp base.Compare,
   311  	formatKey base.FormatKey,
   312  	original, fragmented []keyspan.Span,
   313  	refHistory, fragHistory string,
   314  ) string {
   315  	var buf bytes.Buffer
   316  	fmt.Fprintln(&buf, "Reference:")
   317  	for _, s := range original {
   318  		fmt.Fprintln(&buf, s)
   319  	}
   320  	fmt.Fprintln(&buf)
   321  	fmt.Fprintln(&buf, "Fragmented:")
   322  	for _, s := range fragmented {
   323  		fmt.Fprintln(&buf, s)
   324  	}
   325  	fmt.Fprintln(&buf)
   326  	fmt.Fprintln(&buf, "\nOperations diff:")
   327  	diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
   328  		A:       difflib.SplitLines(refHistory),
   329  		B:       difflib.SplitLines(fragHistory),
   330  		Context: 5,
   331  	})
   332  	if err != nil {
   333  		panic(err)
   334  	}
   335  	fmt.Fprintln(&buf, diff)
   336  	return buf.String()
   337  }
   338  
   339  var iterDelim = map[rune]bool{',': true, ' ': true, '(': true, ')': true, '"': true}
   340  
   341  func runIterOp(w io.Writer, it keyspan.FragmentIterator, op string) {
   342  	fields := strings.FieldsFunc(op, func(r rune) bool { return iterDelim[r] })
   343  	var s *keyspan.Span
   344  	switch strings.ToLower(fields[0]) {
   345  	case "first":
   346  		s = it.First()
   347  	case "last":
   348  		s = it.Last()
   349  	case "seekge":
   350  		s = it.SeekGE([]byte(fields[1]))
   351  	case "seeklt":
   352  		s = it.SeekLT([]byte(fields[1]))
   353  	case "next":
   354  		s = it.Next()
   355  	case "prev":
   356  		s = it.Prev()
   357  	default:
   358  		panic(fmt.Sprintf("unrecognized iter op %q", fields[0]))
   359  	}
   360  	fmt.Fprintf(w, "%-10s", op)
   361  	if s == nil {
   362  		fmt.Fprintln(w, ".")
   363  		return
   364  	}
   365  	fmt.Fprintln(w, s)
   366  }
   367  
   368  func BenchmarkTransform(b *testing.B) {
   369  	var bufs Buffers
   370  	var ui UserIteratorConfig
   371  	reinit := func() {
   372  		bufs.PrepareForReuse()
   373  		_ = ui.Init(testkeys.Comparer, math.MaxUint64, nil, nil, new(bool), nil, true /* internalKeys */, &bufs)
   374  	}
   375  
   376  	for _, shadowing := range []bool{false, true} {
   377  		b.Run(fmt.Sprintf("shadowing=%t", shadowing), func(b *testing.B) {
   378  			for n := 1; n <= 128; n *= 2 {
   379  				b.Run(fmt.Sprintf("keys=%d", n), func(b *testing.B) {
   380  					rng := rand.New(rand.NewSource(233473048763))
   381  					reinit()
   382  
   383  					suffixes := make([][]byte, n)
   384  					for s := range suffixes {
   385  						if shadowing {
   386  							suffixes[s] = testkeys.Suffix(int64(rng.Intn(n)))
   387  						} else {
   388  							suffixes[s] = testkeys.Suffix(int64(s))
   389  						}
   390  					}
   391  					rng.Shuffle(len(suffixes), func(i, j int) {
   392  						suffixes[i], suffixes[j] = suffixes[j], suffixes[i]
   393  					})
   394  
   395  					var keys []keyspan.Key
   396  					for k := 0; k < n; k++ {
   397  						keys = append(keys, keyspan.Key{
   398  							Trailer: base.MakeTrailer(uint64(n-k), base.InternalKeyKindRangeKeySet),
   399  							Suffix:  suffixes[k],
   400  						})
   401  					}
   402  					dst := keyspan.Span{Keys: make([]keyspan.Key, 0, len(keys))}
   403  					b.ResetTimer()
   404  
   405  					for i := 0; i < b.N; i++ {
   406  						err := ui.Transform(testkeys.Comparer.Compare, keyspan.Span{Keys: keys}, &dst)
   407  						if err != nil {
   408  							b.Fatal(err)
   409  						}
   410  						dst.Keys = dst.Keys[:0]
   411  					}
   412  				})
   413  			}
   414  		})
   415  	}
   416  }