github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/rangekey/coalesce_test.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package rangekey
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"math/rand"
    12  	"strconv"
    13  	"strings"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/pmezard/go-difflib/difflib"
    18  	"github.com/stretchr/testify/require"
    19  	"github.com/zuoyebang/bitalostable/internal/base"
    20  	"github.com/zuoyebang/bitalostable/internal/datadriven"
    21  	"github.com/zuoyebang/bitalostable/internal/keyspan"
    22  	"github.com/zuoyebang/bitalostable/internal/testkeys"
    23  )
    24  
    25  func TestCoalesce(t *testing.T) {
    26  	var buf bytes.Buffer
    27  	cmp := testkeys.Comparer.Compare
    28  
    29  	datadriven.RunTest(t, "testdata/coalesce", func(td *datadriven.TestData) string {
    30  		switch td.Cmd {
    31  		case "coalesce":
    32  			buf.Reset()
    33  			span := keyspan.ParseSpan(td.Input)
    34  			coalesced := keyspan.Span{
    35  				Start: span.Start,
    36  				End:   span.End,
    37  			}
    38  			if err := Coalesce(cmp, span.Keys, &coalesced.Keys); err != nil {
    39  				return err.Error()
    40  			}
    41  			fmt.Fprintln(&buf, coalesced)
    42  			return buf.String()
    43  		default:
    44  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    45  		}
    46  	})
    47  }
    48  
    49  func TestIter(t *testing.T) {
    50  	cmp := testkeys.Comparer.Compare
    51  	var iter keyspan.MergingIter
    52  	var buf bytes.Buffer
    53  
    54  	datadriven.RunTest(t, "testdata/iter", func(td *datadriven.TestData) string {
    55  		buf.Reset()
    56  		switch td.Cmd {
    57  		case "define":
    58  			visibleSeqNum := base.InternalKeySeqNumMax
    59  			for _, arg := range td.CmdArgs {
    60  				if arg.Key == "visible-seq-num" {
    61  					var err error
    62  					visibleSeqNum, err = strconv.ParseUint(arg.Vals[0], 10, 64)
    63  					require.NoError(t, err)
    64  				}
    65  			}
    66  
    67  			var spans []keyspan.Span
    68  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    69  			for _, line := range lines {
    70  				spans = append(spans, keyspan.ParseSpan(line))
    71  			}
    72  			transform := keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error {
    73  				s = s.Visible(visibleSeqNum)
    74  				dst.Start = s.Start
    75  				dst.End = s.End
    76  				return Coalesce(cmp, s.Keys, &dst.Keys)
    77  			})
    78  			iter.Init(cmp, transform, keyspan.NewIter(cmp, spans))
    79  			return "OK"
    80  		case "iter":
    81  			buf.Reset()
    82  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    83  			for _, line := range lines {
    84  				line = strings.TrimSpace(line)
    85  				i := strings.IndexByte(line, ' ')
    86  				iterCmd := line
    87  				if i > 0 {
    88  					iterCmd = string(line[:i])
    89  				}
    90  				var s *keyspan.Span
    91  				switch iterCmd {
    92  				case "first":
    93  					s = iter.First()
    94  				case "last":
    95  					s = iter.Last()
    96  				case "next":
    97  					s = iter.Next()
    98  				case "prev":
    99  					s = iter.Prev()
   100  				case "seek-ge":
   101  					s = iter.SeekGE([]byte(strings.TrimSpace(line[i:])))
   102  				case "seek-lt":
   103  					s = iter.SeekLT([]byte(strings.TrimSpace(line[i:])))
   104  				default:
   105  					return fmt.Sprintf("unrecognized iter command %q", iterCmd)
   106  				}
   107  				require.NoError(t, iter.Error())
   108  				fmt.Fprint(&buf, s)
   109  				if buf.Len() > 0 {
   110  					fmt.Fprintln(&buf)
   111  				}
   112  			}
   113  			return buf.String()
   114  		default:
   115  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   116  		}
   117  	})
   118  }
   119  
   120  func TestDefragmenting(t *testing.T) {
   121  	cmp := testkeys.Comparer.Compare
   122  
   123  	var buf bytes.Buffer
   124  	var spans []keyspan.Span
   125  	var hasPrefix bool
   126  	var prefix []byte
   127  	datadriven.RunTest(t, "testdata/defragmenting_iter", func(td *datadriven.TestData) string {
   128  		buf.Reset()
   129  		switch td.Cmd {
   130  		case "define":
   131  			spans = spans[:0]
   132  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
   133  			for _, line := range lines {
   134  				spans = append(spans, keyspan.ParseSpan(line))
   135  			}
   136  			return ""
   137  		case "iter":
   138  			var userIterCfg UserIteratorConfig
   139  			iter := userIterCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   140  				nil /* lower */, nil, /* upper */
   141  				&hasPrefix, &prefix,
   142  				keyspan.NewIter(cmp, spans))
   143  			for _, line := range strings.Split(td.Input, "\n") {
   144  				runIterOp(&buf, iter, line)
   145  			}
   146  			return strings.TrimSpace(buf.String())
   147  		default:
   148  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
   149  		}
   150  	})
   151  }
   152  
   153  func TestDefragmentingIter_Randomized(t *testing.T) {
   154  	seed := time.Now().UnixNano()
   155  	for i := int64(0); i < 100; i++ {
   156  		testDefragmentingIteRandomizedOnce(t, seed+i)
   157  	}
   158  }
   159  
   160  func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) {
   161  	const seed = 1648173101214881000
   162  	testDefragmentingIteRandomizedOnce(t, seed)
   163  }
   164  
   165  func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) {
   166  	cmp := testkeys.Comparer.Compare
   167  	formatKey := testkeys.Comparer.FormatKey
   168  
   169  	rng := rand.New(rand.NewSource(seed))
   170  	t.Logf("seed = %d", seed)
   171  
   172  	// Use a key space of alphanumeric strings, with a random max length between
   173  	// 1-2. Repeat keys are more common at the lower max lengths.
   174  	ks := testkeys.Alpha(rng.Intn(2) + 1)
   175  
   176  	// Generate between 1-15 range keys.
   177  	const maxRangeKeys = 15
   178  	var original, fragmented []keyspan.Span
   179  	numRangeKeys := 1 + rng.Intn(maxRangeKeys)
   180  	for i := 0; i < numRangeKeys; i++ {
   181  		startIdx := rng.Intn(ks.Count())
   182  		endIdx := rng.Intn(ks.Count())
   183  		for startIdx == endIdx {
   184  			endIdx = rng.Intn(ks.Count())
   185  		}
   186  		if startIdx > endIdx {
   187  			startIdx, endIdx = endIdx, startIdx
   188  		}
   189  
   190  		key := keyspan.Key{
   191  			Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet),
   192  			Value:   []byte(fmt.Sprintf("v%d", rng.Intn(3))),
   193  		}
   194  		// Generate suffixes 0, 1, 2, or 3 with 0 indicating none.
   195  		if suffix := rng.Intn(4); suffix > 0 {
   196  			key.Suffix = testkeys.Suffix(suffix)
   197  		}
   198  		original = append(original, keyspan.Span{
   199  			Start: testkeys.Key(ks, startIdx),
   200  			End:   testkeys.Key(ks, endIdx),
   201  			Keys:  []keyspan.Key{key},
   202  		})
   203  
   204  		for startIdx < endIdx {
   205  			width := rng.Intn(endIdx-startIdx) + 1
   206  			fragmented = append(fragmented, keyspan.Span{
   207  				Start: testkeys.Key(ks, startIdx),
   208  				End:   testkeys.Key(ks, startIdx+width),
   209  				Keys:  []keyspan.Key{key},
   210  			})
   211  			startIdx += width
   212  		}
   213  	}
   214  
   215  	// Both the original and the deliberately fragmented spans may contain
   216  	// overlaps, so we need to sort and fragment them.
   217  	original = fragment(cmp, formatKey, original)
   218  	fragmented = fragment(cmp, formatKey, fragmented)
   219  
   220  	var referenceCfg, fragmentedCfg UserIteratorConfig
   221  	referenceIter := referenceCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   222  		nil /* lower */, nil, /* upper */
   223  		new(bool), new([]byte),
   224  		keyspan.NewIter(cmp, original))
   225  	fragmentedIter := fragmentedCfg.Init(testkeys.Comparer, base.InternalKeySeqNumMax,
   226  		nil /* lower */, nil, /* upper */
   227  		new(bool), new([]byte),
   228  		keyspan.NewIter(cmp, fragmented))
   229  
   230  	// Generate 100 random operations and run them against both iterators.
   231  	const numIterOps = 100
   232  	type opKind struct {
   233  		weight int
   234  		fn     func() string
   235  	}
   236  	ops := []opKind{
   237  		{weight: 2, fn: func() string { return "first" }},
   238  		{weight: 2, fn: func() string { return "last" }},
   239  		{weight: 50, fn: func() string { return "next" }},
   240  		{weight: 50, fn: func() string { return "prev" }},
   241  		{weight: 5, fn: func() string {
   242  			k := testkeys.Key(ks, rng.Intn(ks.Count()))
   243  			return fmt.Sprintf("seekge(%s)", k)
   244  		}},
   245  		{weight: 5, fn: func() string {
   246  			k := testkeys.Key(ks, rng.Intn(ks.Count()))
   247  			return fmt.Sprintf("seeklt(%s)", k)
   248  		}},
   249  	}
   250  	var totalWeight int
   251  	for _, op := range ops {
   252  		totalWeight += op.weight
   253  	}
   254  	var referenceHistory, fragmentedHistory bytes.Buffer
   255  	for i := 0; i < numIterOps; i++ {
   256  		p := rng.Intn(totalWeight)
   257  		opIndex := 0
   258  		if i == 0 {
   259  			// First op is always a First().
   260  		} else {
   261  			for i, op := range ops {
   262  				if p < op.weight {
   263  					opIndex = i
   264  					break
   265  				}
   266  				p -= op.weight
   267  			}
   268  		}
   269  		op := ops[opIndex].fn()
   270  		runIterOp(&referenceHistory, referenceIter, op)
   271  		runIterOp(&fragmentedHistory, fragmentedIter, op)
   272  		if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) {
   273  			t.Fatal(debugContext(cmp, formatKey, original, fragmented,
   274  				referenceHistory.String(), fragmentedHistory.String()))
   275  		}
   276  	}
   277  }
   278  
   279  func fragment(cmp base.Compare, formatKey base.FormatKey, spans []keyspan.Span) []keyspan.Span {
   280  	keyspan.Sort(cmp, spans)
   281  	var fragments []keyspan.Span
   282  	f := keyspan.Fragmenter{
   283  		Cmp:    cmp,
   284  		Format: formatKey,
   285  		Emit: func(f keyspan.Span) {
   286  			fragments = append(fragments, f)
   287  		},
   288  	}
   289  	for _, s := range spans {
   290  		f.Add(s)
   291  	}
   292  	f.Finish()
   293  	return fragments
   294  }
   295  
   296  func debugContext(
   297  	cmp base.Compare,
   298  	formatKey base.FormatKey,
   299  	original, fragmented []keyspan.Span,
   300  	refHistory, fragHistory string,
   301  ) string {
   302  	var buf bytes.Buffer
   303  	fmt.Fprintln(&buf, "Reference:")
   304  	for _, s := range original {
   305  		fmt.Fprintln(&buf, s)
   306  	}
   307  	fmt.Fprintln(&buf)
   308  	fmt.Fprintln(&buf, "Fragmented:")
   309  	for _, s := range fragmented {
   310  		fmt.Fprintln(&buf, s)
   311  	}
   312  	fmt.Fprintln(&buf)
   313  	fmt.Fprintln(&buf, "\nOperations diff:")
   314  	diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
   315  		A:       difflib.SplitLines(refHistory),
   316  		B:       difflib.SplitLines(fragHistory),
   317  		Context: 5,
   318  	})
   319  	if err != nil {
   320  		panic(err)
   321  	}
   322  	fmt.Fprintln(&buf, diff)
   323  	return buf.String()
   324  }
   325  
   326  var iterDelim = map[rune]bool{',': true, ' ': true, '(': true, ')': true, '"': true}
   327  
   328  func runIterOp(w io.Writer, it keyspan.FragmentIterator, op string) {
   329  	fields := strings.FieldsFunc(op, func(r rune) bool { return iterDelim[r] })
   330  	var s *keyspan.Span
   331  	switch strings.ToLower(fields[0]) {
   332  	case "first":
   333  		s = it.First()
   334  	case "last":
   335  		s = it.Last()
   336  	case "seekge":
   337  		s = it.SeekGE([]byte(fields[1]))
   338  	case "seeklt":
   339  		s = it.SeekLT([]byte(fields[1]))
   340  	case "next":
   341  		s = it.Next()
   342  	case "prev":
   343  		s = it.Prev()
   344  	default:
   345  		panic(fmt.Sprintf("unrecognized iter op %q", fields[0]))
   346  	}
   347  	fmt.Fprintf(w, "%-10s", op)
   348  	if s == nil {
   349  		fmt.Fprintln(w, ".")
   350  		return
   351  	}
   352  	fmt.Fprintln(w, s)
   353  }