github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/random_test.go (about)

     1  // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"fmt"
     9  	"math/rand"
    10  	"runtime/debug"
    11  	"slices"
    12  	"strings"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/cockroachdb/metamorphic"
    17  	"github.com/cockroachdb/pebble/bloom"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/cockroachdb/pebble/internal/bytealloc"
    20  	"github.com/cockroachdb/pebble/internal/testkeys"
    21  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    22  	"github.com/cockroachdb/pebble/vfs"
    23  	"github.com/cockroachdb/pebble/vfs/errorfs"
    24  	"github.com/stretchr/testify/require"
    25  )
    26  
    27  // IterIterator_RandomErrors builds random sstables and runs random iterator
    28  // operations against them while randomly injecting errors. It ensures that if
    29  // an error is injected during an operation, operation surfaces the error to the
    30  // caller.
    31  func TestIterator_RandomErrors(t *testing.T) {
    32  	root := time.Now().UnixNano()
    33  	// Run the test a few times with various seeds for more consistent code
    34  	// coverage.
    35  	for i := int64(0); i < 50; i++ {
    36  		seed := root + i
    37  		t.Run(fmt.Sprintf("seed=%d", seed), func(t *testing.T) {
    38  			runErrorInjectionTest(t, seed)
    39  		})
    40  	}
    41  }
    42  
    43  func runErrorInjectionTest(t *testing.T, seed int64) {
    44  	t.Logf("seed %d", seed)
    45  	fs := vfs.NewMem()
    46  	f, err := fs.Create("random.sst")
    47  	require.NoError(t, err)
    48  	rng := rand.New(rand.NewSource(seed))
    49  	cfg := randomTableConfig{
    50  		wopts:     nil, /* leave to randomize */
    51  		keys:      testkeys.Alpha(3 + rng.Intn(2)),
    52  		keyCount:  10_000,
    53  		maxValLen: rng.Intn(64) + 1,
    54  		maxSuffix: rng.Int63n(95) + 5,
    55  		maxSeqNum: rng.Int63n(1000) + 10,
    56  		rng:       rng,
    57  	}
    58  	cfg.randomize()
    59  	_, err = buildRandomSSTable(f, cfg)
    60  	require.NoError(t, err)
    61  
    62  	f, err = fs.Open("random.sst")
    63  	require.NoError(t, err)
    64  	// Randomly inject errors into 25% of file operations. We use an
    65  	// errorfs.Toggle to avoid injecting errors until the file has been opened.
    66  	toggle := &errorfs.Toggle{Injector: errorfs.ErrInjected.If(errorfs.Randomly(0.25, seed))}
    67  	counter := &errorfs.Counter{Injector: toggle}
    68  	var stack []byte
    69  	f = errorfs.WrapFile(f, errorfs.InjectorFunc(func(op errorfs.Op) error {
    70  		err := counter.MaybeError(op)
    71  		if err != nil {
    72  			// Save the stack trace of the most recently injected error.
    73  			stack = debug.Stack()
    74  		}
    75  		return err
    76  	}))
    77  	readable, err := NewSimpleReadable(f)
    78  	require.NoError(t, err)
    79  	r, err := NewReader(readable, cfg.readerOpts())
    80  	require.NoError(t, err)
    81  	defer r.Close()
    82  
    83  	var filterer *BlockPropertiesFilterer
    84  	if rng.Float64() < 0.75 {
    85  		low, high := uint64(cfg.randSuffix()), uint64(cfg.randSuffix())
    86  		if low > high {
    87  			low, high = high, low
    88  		}
    89  		filterer = newBlockPropertiesFilterer([]BlockPropertyFilter{
    90  			NewTestKeysBlockPropertyFilter(low, high),
    91  		}, nil)
    92  	}
    93  
    94  	// TOOD(jackson): NewIterWithBlockPropertyFilters returns an iterator over
    95  	// point keys only. Should we add variants of this test that run random
    96  	// operations on the range deletion and range key iterators?
    97  	var stats base.InternalIteratorStats
    98  	it, err := r.NewIterWithBlockPropertyFilters(
    99  		nil /* lower TODO */, nil, /* upper TODO */
   100  		filterer,
   101  		rng.Intn(2) == 1, /* use filter block */
   102  		&stats,
   103  		CategoryAndQoS{},
   104  		nil, /* CategoryStatsCollector */
   105  		TrivialReaderProvider{r},
   106  	)
   107  	require.NoError(t, err)
   108  	defer it.Close()
   109  
   110  	// Begin injecting errors.
   111  	toggle.On()
   112  
   113  	ops := opRunner{randomTableConfig: cfg, it: it}
   114  	nextOp := metamorphic.Weighted[func() bool]{
   115  		{Item: ops.runSeekGE, Weight: 2},
   116  		{Item: ops.runSeekPrefixGE, Weight: 2},
   117  		{Item: ops.runSeekLT, Weight: 2},
   118  		{Item: ops.runFirst, Weight: 1},
   119  		{Item: ops.runLast, Weight: 1},
   120  		{Item: ops.runNext, Weight: 5},
   121  		{Item: ops.runNextPrefix, Weight: 5},
   122  		{Item: ops.runPrev, Weight: 5},
   123  	}.RandomDeck(rng)
   124  
   125  	for i := 0; i < 1000; i++ {
   126  		beforeCount := counter.Load()
   127  
   128  		// nextOp returns a function that *may* run the operation. If the
   129  		// current test state makes the operation an invalid operation, the the
   130  		// function returns `false` indicating it was not run. If the operation
   131  		// is a valid operation and was performed, `opFunc` returns true.
   132  		//
   133  		// This loop will run exactly 1 operation, skipping randomly chosen
   134  		// operations that cannot be run on an iterator in its current state.
   135  		for opFunc := nextOp(); !opFunc(); {
   136  			opFunc = nextOp()
   137  		}
   138  
   139  		t.Logf("%s = %s [err = %v]", ops.latestOpDesc, ops.k, it.Error())
   140  		afterCount := counter.Load()
   141  		// TODO(jackson): Consider running all commands against a parallel
   142  		// iterator constructed over a sstable containing the same data in a
   143  		// standard construction (eg, typical block sizes) and no error
   144  		// injection. Then we can assert the results are identical.
   145  
   146  		if afterCount > beforeCount {
   147  			if ops.k != nil || it.Error() == nil {
   148  				t.Errorf("error swallowed during %s with stack %s",
   149  					ops.latestOpDesc, string(stack))
   150  			}
   151  		}
   152  	}
   153  }
   154  
   155  type opRunner struct {
   156  	randomTableConfig
   157  	it Iterator
   158  
   159  	latestOpDesc  string
   160  	latestSeekKey []byte
   161  	dir           int8
   162  	k             *base.InternalKey
   163  	v             base.LazyValue
   164  }
   165  
   166  func (r *opRunner) runSeekGE() bool {
   167  	k := r.randKey()
   168  	flags := base.SeekGEFlagsNone
   169  	if strings.HasPrefix(r.latestOpDesc, "SeekGE") &&
   170  		r.wopts.Comparer.Compare(k, r.latestSeekKey) > 0 && r.rng.Intn(2) == 1 {
   171  		flags = flags.EnableTrySeekUsingNext()
   172  	}
   173  	r.latestOpDesc = fmt.Sprintf("SeekGE(%q, TrySeekUsingNext()=%t)",
   174  		k, flags.TrySeekUsingNext())
   175  	r.latestSeekKey = k
   176  	r.k, r.v = r.it.SeekGE(k, base.SeekGEFlagsNone)
   177  	r.dir = +1
   178  	return true
   179  }
   180  
   181  func (r *opRunner) runSeekPrefixGE() bool {
   182  	k := r.randKey()
   183  	i := r.wopts.Comparer.Split(k)
   184  	flags := base.SeekGEFlagsNone
   185  	if strings.HasPrefix(r.latestOpDesc, "SeekPrefixGE") &&
   186  		r.wopts.Comparer.Compare(k, r.latestSeekKey) > 0 && r.rng.Intn(2) == 1 {
   187  		flags = flags.EnableTrySeekUsingNext()
   188  	}
   189  	r.latestOpDesc = fmt.Sprintf("SeekPrefixGE(%q, %q, TrySeekUsingNext()=%t)",
   190  		k[:i], k, flags.TrySeekUsingNext())
   191  	r.latestSeekKey = k
   192  	r.k, r.v = r.it.SeekPrefixGE(k[:i], k, flags)
   193  	r.dir = +1
   194  	return true
   195  }
   196  
   197  func (r *opRunner) runSeekLT() bool {
   198  	k := r.randKey()
   199  	r.latestOpDesc = fmt.Sprintf("SeekLT(%q)", k)
   200  	r.k, r.v = r.it.SeekLT(k, base.SeekLTFlagsNone)
   201  	r.dir = -1
   202  	return true
   203  }
   204  
   205  func (r *opRunner) runFirst() bool {
   206  	r.latestOpDesc = "First()"
   207  	r.k, r.v = r.it.First()
   208  	r.dir = +1
   209  	return true
   210  }
   211  
   212  func (r *opRunner) runLast() bool {
   213  	r.latestOpDesc = "Last()"
   214  	r.k, r.v = r.it.Last()
   215  	r.dir = -1
   216  	return true
   217  }
   218  
   219  func (r *opRunner) runNext() bool {
   220  	if r.dir == +1 && r.k == nil {
   221  		return false
   222  	}
   223  	r.latestOpDesc = "Next()"
   224  	r.k, r.v = r.it.Next()
   225  	r.dir = +1
   226  	return true
   227  }
   228  
   229  func (r *opRunner) runNextPrefix() bool {
   230  	// NextPrefix cannot be called to change directions or when an iterator is
   231  	// exhausted.
   232  	if r.dir == -1 || r.k == nil {
   233  		return false
   234  	}
   235  	p := r.k.UserKey[:r.wopts.Comparer.Split(r.k.UserKey)]
   236  	succKey := r.wopts.Comparer.ImmediateSuccessor(nil, p)
   237  	r.latestOpDesc = fmt.Sprintf("NextPrefix(%q)", succKey)
   238  	r.k, r.v = r.it.NextPrefix(succKey)
   239  	r.dir = +1
   240  	return true
   241  }
   242  
   243  func (r *opRunner) runPrev() bool {
   244  	if r.dir == -1 && r.k == nil {
   245  		return false
   246  	}
   247  	r.latestOpDesc = "Prev()"
   248  	r.k, r.v = r.it.Prev()
   249  	r.dir = -1
   250  	return true
   251  }
   252  
   253  type randomTableConfig struct {
   254  	wopts     *WriterOptions
   255  	keys      testkeys.Keyspace
   256  	keyCount  int
   257  	maxValLen int
   258  	maxSuffix int64
   259  	maxSeqNum int64
   260  	rng       *rand.Rand
   261  }
   262  
   263  func (cfg *randomTableConfig) readerOpts() ReaderOptions {
   264  	rOpts := ReaderOptions{
   265  		Comparer: testkeys.Comparer,
   266  		Filters:  map[string]FilterPolicy{},
   267  	}
   268  	if cfg.wopts.FilterPolicy != nil {
   269  		rOpts.Filters[cfg.wopts.FilterPolicy.Name()] = cfg.wopts.FilterPolicy
   270  	}
   271  	return rOpts
   272  }
   273  
   274  func (cfg *randomTableConfig) randomize() {
   275  	if cfg.wopts == nil {
   276  		cfg.wopts = &WriterOptions{
   277  			// Test all table formats in [TableFormatLevelDB, TableFormatMax].
   278  			TableFormat:             TableFormat(cfg.rng.Intn(int(TableFormatMax)) + 1),
   279  			BlockRestartInterval:    (1 << cfg.rng.Intn(6)),             // {1, 2, 4, ..., 32}
   280  			BlockSizeThreshold:      min(int(100*cfg.rng.Float64()), 1), // 1-100%
   281  			BlockSize:               (1 << cfg.rng.Intn(18)),            // {1, 2, 4, ..., 128 KiB}
   282  			IndexBlockSize:          (1 << cfg.rng.Intn(20)),            // {1, 2, 4, ..., 512 KiB}
   283  			BlockPropertyCollectors: nil,
   284  			WritingToLowestLevel:    cfg.rng.Intn(2) == 1,
   285  			Parallelism:             cfg.rng.Intn(2) == 1,
   286  		}
   287  		if v := cfg.rng.Intn(11); v > 0 {
   288  			cfg.wopts.FilterPolicy = bloom.FilterPolicy(v)
   289  		}
   290  		if cfg.wopts.TableFormat >= TableFormatPebblev1 && cfg.rng.Float64() < 0.75 {
   291  			cfg.wopts.BlockPropertyCollectors = append(cfg.wopts.BlockPropertyCollectors, NewTestKeysBlockPropertyCollector)
   292  		}
   293  	}
   294  	cfg.wopts.ensureDefaults()
   295  	cfg.wopts.Comparer = testkeys.Comparer
   296  }
   297  
   298  func (cfg *randomTableConfig) randKey() []byte {
   299  	return testkeys.KeyAt(cfg.keys, cfg.randKeyIdx(), cfg.randSuffix())
   300  }
   301  func (cfg *randomTableConfig) randSuffix() int64 { return cfg.rng.Int63n(cfg.maxSuffix + 1) }
   302  func (cfg *randomTableConfig) randKeyIdx() int64 { return cfg.rng.Int63n(cfg.keys.Count()) }
   303  
   304  func buildRandomSSTable(f vfs.File, cfg randomTableConfig) (*WriterMetadata, error) {
   305  	// Construct a weighted distribution of key kinds.
   306  	kinds := metamorphic.Weighted[base.InternalKeyKind]{
   307  		{Item: base.InternalKeyKindSet, Weight: 25},
   308  		{Item: base.InternalKeyKindSetWithDelete, Weight: 25},
   309  		{Item: base.InternalKeyKindDelete, Weight: 5},
   310  		{Item: base.InternalKeyKindSingleDelete, Weight: 2},
   311  		{Item: base.InternalKeyKindMerge, Weight: 1},
   312  	}
   313  	// TODO(jackson): Support writing range deletions and range keys.
   314  	// TestIterator_RandomErrors only reads through the point iterator, so those
   315  	// keys won't be visible regardless, but their existence should be benign.
   316  
   317  	// DELSIZED require Pebblev4 or later.
   318  	if cfg.wopts.TableFormat >= TableFormatPebblev4 {
   319  		kinds = append(kinds, metamorphic.ItemWeight[base.InternalKeyKind]{
   320  			Item: base.InternalKeyKindDeleteSized, Weight: 5,
   321  		})
   322  	}
   323  	nextRandomKind := kinds.RandomDeck(cfg.rng)
   324  
   325  	type keyID struct {
   326  		idx    int64
   327  		suffix int64
   328  		seqNum int64
   329  	}
   330  	keyMap := make(map[keyID]bool)
   331  	// Constrain the space we generate keys to the middle 90% of the keyspace.
   332  	// This helps exercise code paths that are only run when a seek key is
   333  	// beyond or before all index block entries.
   334  	sstKeys := cfg.keys.Slice(cfg.keys.Count()/20, cfg.keys.Count()-cfg.keys.Count()/20)
   335  	randomKey := func() keyID {
   336  		k := keyID{
   337  			idx:    cfg.rng.Int63n(sstKeys.Count()),
   338  			suffix: cfg.rng.Int63n(cfg.maxSuffix + 1),
   339  			seqNum: cfg.rng.Int63n(cfg.maxSeqNum + 1),
   340  		}
   341  		// If we've already generated this exact key, try again.
   342  		for keyMap[k] {
   343  			k = keyID{
   344  				idx:    cfg.rng.Int63n(sstKeys.Count()),
   345  				suffix: cfg.rng.Int63n(cfg.maxSuffix + 1),
   346  				seqNum: cfg.rng.Int63n(cfg.maxSeqNum + 1),
   347  			}
   348  		}
   349  		keyMap[k] = true
   350  		return k
   351  	}
   352  
   353  	var alloc bytealloc.A
   354  	keys := make([]base.InternalKey, cfg.keyCount)
   355  	for i := range keys {
   356  		keyID := randomKey()
   357  		kind := nextRandomKind()
   358  
   359  		var keyBuf []byte
   360  		alloc, keyBuf = alloc.Alloc(testkeys.SuffixLen(keyID.suffix) + cfg.keys.MaxLen())
   361  		n := testkeys.WriteKeyAt(keyBuf, sstKeys, keyID.idx, keyID.suffix)
   362  		keys[i] = base.MakeInternalKey(keyBuf[:n], uint64(keyID.seqNum), kind)
   363  	}
   364  	// The Writer requires the keys to be written in sorted order. Sort them.
   365  	slices.SortFunc(keys, func(a, b base.InternalKey) int {
   366  		return base.InternalCompare(testkeys.Comparer.Compare, a, b)
   367  	})
   368  
   369  	// Release keyMap and alloc; we don't need them and this function can be
   370  	// memory intensive.
   371  	keyMap = nil
   372  	alloc = nil
   373  
   374  	valueBuf := make([]byte, cfg.maxValLen)
   375  	w := NewWriter(objstorageprovider.NewFileWritable(f), *cfg.wopts)
   376  	for i := 0; i < len(keys); i++ {
   377  		var value []byte
   378  		switch keys[i].Kind() {
   379  		case base.InternalKeyKindSet, base.InternalKeyKindMerge:
   380  			value = valueBuf[:cfg.rng.Intn(cfg.maxValLen+1)]
   381  			cfg.rng.Read(value)
   382  		}
   383  		if err := w.Add(keys[i], value); err != nil {
   384  			return nil, err
   385  		}
   386  	}
   387  	if err := w.Close(); err != nil {
   388  		return nil, err
   389  	}
   390  	metadata, err := w.Metadata()
   391  	if err != nil {
   392  		return nil, err
   393  	}
   394  	return metadata, nil
   395  }