github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/merging_iter_test.go (about)

     1  // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math/rand"
    11  	"strconv"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/stretchr/testify/require"
    17  	"github.com/zuoyebang/bitalostable/internal/base"
    18  	"github.com/zuoyebang/bitalostable/internal/datadriven"
    19  	"github.com/zuoyebang/bitalostable/internal/testkeys"
    20  )
    21  
    22  func TestMergingIter(t *testing.T) {
    23  	cmp := base.DefaultComparer.Compare
    24  	var buf bytes.Buffer
    25  	var iter MergingIter
    26  
    27  	formatSpan := func(s *Span) { fmt.Fprintln(&buf, s) }
    28  
    29  	datadriven.RunTest(t, "testdata/merging_iter", func(td *datadriven.TestData) string {
    30  		switch td.Cmd {
    31  		case "define":
    32  			snapshot := base.InternalKeySeqNumMax
    33  			for _, cmdArg := range td.CmdArgs {
    34  				switch cmdArg.Key {
    35  				case "snapshot":
    36  					var err error
    37  					snapshot, err = strconv.ParseUint(cmdArg.Vals[0], 10, 64)
    38  					require.NoError(t, err)
    39  				default:
    40  					return fmt.Sprintf("unrecognized arg %q", cmdArg.Key)
    41  				}
    42  			}
    43  
    44  			var iters []FragmentIterator
    45  			var spans []Span
    46  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    47  			for _, line := range lines {
    48  				if line == "--" {
    49  					iters = append(iters, NewIter(cmp, spans))
    50  					spans = nil
    51  					continue
    52  				}
    53  				spans = append(spans, ParseSpan(line))
    54  			}
    55  			if len(spans) > 0 {
    56  				iters = append(iters, &invalidatingIter{iter: NewIter(cmp, spans)})
    57  			}
    58  			iter.Init(cmp, visibleTransform(snapshot), iters...)
    59  			return fmt.Sprintf("%d levels", len(iters))
    60  		case "iter":
    61  			buf.Reset()
    62  			lines := strings.Split(strings.TrimSpace(td.Input), "\n")
    63  			for _, line := range lines {
    64  				line = strings.TrimSpace(line)
    65  				i := strings.IndexByte(line, ' ')
    66  				iterCmd := line
    67  				if i > 0 {
    68  					iterCmd = string(line[:i])
    69  				}
    70  				switch iterCmd {
    71  				case "first":
    72  					formatSpan(iter.First())
    73  				case "last":
    74  					formatSpan(iter.Last())
    75  				case "next":
    76  					formatSpan(iter.Next())
    77  				case "prev":
    78  					formatSpan(iter.Prev())
    79  				case "seek-ge":
    80  					formatSpan(iter.SeekGE([]byte(strings.TrimSpace(line[i:]))))
    81  				case "seek-lt":
    82  					formatSpan(iter.SeekLT([]byte(strings.TrimSpace(line[i:]))))
    83  				default:
    84  					return fmt.Sprintf("unrecognized iter command %q", iterCmd)
    85  				}
    86  				require.NoError(t, iter.Error())
    87  			}
    88  			return strings.TrimSpace(buf.String())
    89  
    90  		default:
    91  			return fmt.Sprintf("unrecognized command %q", td.Cmd)
    92  		}
    93  	})
    94  }
    95  
    96  // TestMergingIter_FragmenterEquivalence tests for equivalence between the
    97  // fragmentation performed on-the-fly by the MergingIter and the fragmentation
    98  // performed by the Fragmenter.
    99  //
   100  // It does this by producing 1-10 levels of well-formed fragments. Generated
   101  // fragments may overlap other levels arbitrarily, but within their level
   102  // generated fragments may only overlap other fragments that share the same user
   103  // key bounds.
   104  //
   105  // The test then feeds all the fragments, across all levels, into a Fragmenter
   106  // and produces a Iter over those fragments. The test also constructs a
   107  // MergingIter with a separate Iter for each level. It runs a random
   108  // series of operations, applying each operation to both. It asserts that each
   109  // operation has identical results on both iterators.
   110  func TestMergingIter_FragmenterEquivalence(t *testing.T) {
   111  	seed := time.Now().UnixNano()
   112  	for i := int64(0); i < 10; i++ {
   113  		testFragmenterEquivalenceOnce(t, seed+i)
   114  	}
   115  }
   116  
   117  func TestMergingIter_FragmenterEquivalence_Seed(t *testing.T) {
   118  	// This test uses a fixed seed. It's useful to manually edit its seed when
   119  	// debugging a test failure of the variable-seed test.
   120  	const seed = 1644517830186873000
   121  	testFragmenterEquivalenceOnce(t, seed)
   122  }
   123  
   124  func testFragmenterEquivalenceOnce(t *testing.T, seed int64) {
   125  	cmp := testkeys.Comparer.Compare
   126  	rng := rand.New(rand.NewSource(seed))
   127  	t.Logf("seed = %d", seed)
   128  
   129  	// Use a key space of alphanumeric strings, with a random max length between
   130  	// 1-3. Repeat keys are more common at the lower max lengths.
   131  	ks := testkeys.Alpha(rng.Intn(3) + 1)
   132  
   133  	// Generate between 1 and 10 levels of fragment iterators.
   134  	levels := make([][]Span, rng.Intn(10)+1)
   135  	iters := make([]FragmentIterator, len(levels))
   136  	var allSpans []Span
   137  	var buf bytes.Buffer
   138  	for l := 0; l < len(levels); l++ {
   139  		fmt.Fprintf(&buf, "level %d: ", l)
   140  		for keyspaceStartIdx := 0; keyspaceStartIdx < ks.Count(); {
   141  			// Generate spans of lengths of up to a third of the keyspace.
   142  			spanStartIdx := keyspaceStartIdx + rng.Intn(ks.Count()/3)
   143  			spanEndIdx := spanStartIdx + rng.Intn(ks.Count()/3) + 1
   144  
   145  			if spanEndIdx < ks.Count() {
   146  				keyCount := uint64(rng.Intn(3) + 1)
   147  				s := Span{
   148  					Start: testkeys.Key(ks, spanStartIdx),
   149  					End:   testkeys.Key(ks, spanEndIdx),
   150  					Keys:  make([]Key, 0, keyCount),
   151  				}
   152  				for k := keyCount; k > 0; k-- {
   153  					seqNum := uint64((len(levels)-l)*3) + k
   154  					s.Keys = append(s.Keys, Key{
   155  						Trailer: base.MakeTrailer(seqNum, base.InternalKeyKindRangeKeySet),
   156  					})
   157  				}
   158  				if len(levels[l]) > 0 {
   159  					fmt.Fprint(&buf, ", ")
   160  				}
   161  				fmt.Fprintf(&buf, "%s", s)
   162  
   163  				levels[l] = append(levels[l], s)
   164  				allSpans = append(allSpans, s)
   165  			}
   166  			keyspaceStartIdx = spanEndIdx
   167  		}
   168  		iters[l] = &invalidatingIter{iter: NewIter(cmp, levels[l])}
   169  		fmt.Fprintln(&buf)
   170  	}
   171  
   172  	// Fragment the spans across the levels.
   173  	var allFragmented []Span
   174  	f := Fragmenter{
   175  		Cmp:    cmp,
   176  		Format: testkeys.Comparer.FormatKey,
   177  		Emit: func(span Span) {
   178  			allFragmented = append(allFragmented, span)
   179  		},
   180  	}
   181  	Sort(f.Cmp, allSpans)
   182  	for _, s := range allSpans {
   183  		f.Add(s)
   184  	}
   185  	f.Finish()
   186  
   187  	// Log all the levels and their fragments, as well as the fully-fragmented
   188  	// spans produced by the Fragmenter.
   189  	fmt.Fprintln(&buf, "Fragmenter fragments:")
   190  	for i, s := range allFragmented {
   191  		if i > 0 {
   192  			fmt.Fprint(&buf, ", ")
   193  		}
   194  		fmt.Fprint(&buf, s)
   195  	}
   196  	t.Logf("%d levels:\n%s\n", len(levels), buf.String())
   197  
   198  	fragmenterIter := NewIter(f.Cmp, allFragmented)
   199  	mergingIter := &MergingIter{}
   200  	mergingIter.Init(f.Cmp, visibleTransform(base.InternalKeySeqNumMax), iters...)
   201  
   202  	// Position both so that it's okay to perform relative positioning
   203  	// operations immediately.
   204  	mergingIter.First()
   205  	fragmenterIter.First()
   206  
   207  	type opKind struct {
   208  		weight int
   209  		fn     func() (str string, f *Span, m *Span)
   210  	}
   211  	ops := []opKind{
   212  		{weight: 2, fn: func() (string, *Span, *Span) {
   213  			return "First()", fragmenterIter.First(), mergingIter.First()
   214  		}},
   215  		{weight: 2, fn: func() (string, *Span, *Span) {
   216  			return "Last()", fragmenterIter.Last(), mergingIter.Last()
   217  		}},
   218  		{weight: 5, fn: func() (string, *Span, *Span) {
   219  			k := testkeys.Key(ks, rng.Intn(ks.Count()))
   220  			return fmt.Sprintf("SeekGE(%q)", k),
   221  				fragmenterIter.SeekGE(k),
   222  				mergingIter.SeekGE(k)
   223  		}},
   224  		{weight: 5, fn: func() (string, *Span, *Span) {
   225  			k := testkeys.Key(ks, rng.Intn(ks.Count()))
   226  			return fmt.Sprintf("SeekLT(%q)", k),
   227  				fragmenterIter.SeekLT(k),
   228  				mergingIter.SeekLT(k)
   229  		}},
   230  		{weight: 50, fn: func() (string, *Span, *Span) {
   231  			return "Next()", fragmenterIter.Next(), mergingIter.Next()
   232  		}},
   233  		{weight: 50, fn: func() (string, *Span, *Span) {
   234  			return "Prev()", fragmenterIter.Prev(), mergingIter.Prev()
   235  		}},
   236  	}
   237  	var totalWeight int
   238  	for _, op := range ops {
   239  		totalWeight += op.weight
   240  	}
   241  
   242  	var fragmenterBuf bytes.Buffer
   243  	var mergingBuf bytes.Buffer
   244  	opCount := rng.Intn(200) + 50
   245  	for i := 0; i < opCount; i++ {
   246  		p := rng.Intn(totalWeight)
   247  		opIndex := 0
   248  		for i, op := range ops {
   249  			if p < op.weight {
   250  				opIndex = i
   251  				break
   252  			}
   253  			p -= op.weight
   254  		}
   255  
   256  		opString, fs, ms := ops[opIndex].fn()
   257  
   258  		fragmenterBuf.Reset()
   259  		mergingBuf.Reset()
   260  		fmt.Fprint(&fragmenterBuf, fs)
   261  		fmt.Fprint(&mergingBuf, ms)
   262  		if fragmenterBuf.String() != mergingBuf.String() {
   263  			t.Fatalf("seed %d, op %d: %s = %s, fragmenter iterator returned %s",
   264  				seed, i, opString, mergingBuf.String(), fragmenterBuf.String())
   265  		}
   266  		t.Logf("op %d: %s = %s", i, opString, fragmenterBuf.String())
   267  	}
   268  }