github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/db_test.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"path/filepath"
    13  	"sort"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/errors"
    21  	"github.com/cockroachdb/pebble/internal/base"
    22  	"github.com/cockroachdb/pebble/internal/cache"
    23  	"github.com/cockroachdb/pebble/internal/invariants"
    24  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    25  	"github.com/cockroachdb/pebble/sstable"
    26  	"github.com/cockroachdb/pebble/vfs"
    27  	"github.com/stretchr/testify/require"
    28  	"golang.org/x/exp/rand"
    29  )
    30  
    31  // try repeatedly calls f, sleeping between calls with exponential back-off,
    32  // until f returns a nil error or the total sleep time is greater than or equal
    33  // to maxTotalSleep. It always calls f at least once.
    34  func try(initialSleep, maxTotalSleep time.Duration, f func() error) error {
    35  	totalSleep := time.Duration(0)
    36  	for d := initialSleep; ; d *= 2 {
    37  		time.Sleep(d)
    38  		totalSleep += d
    39  		if err := f(); err == nil || totalSleep >= maxTotalSleep {
    40  			return err
    41  		}
    42  	}
    43  }
    44  
    45  func TestTry(t *testing.T) {
    46  	c := make(chan struct{})
    47  	go func() {
    48  		time.Sleep(1 * time.Millisecond)
    49  		close(c)
    50  	}()
    51  
    52  	attemptsMu := sync.Mutex{}
    53  	attempts := 0
    54  
    55  	err := try(100*time.Microsecond, 20*time.Second, func() error {
    56  		attemptsMu.Lock()
    57  		attempts++
    58  		attemptsMu.Unlock()
    59  
    60  		select {
    61  		default:
    62  			return errors.New("timed out")
    63  		case <-c:
    64  			return nil
    65  		}
    66  	})
    67  	require.NoError(t, err)
    68  
    69  	attemptsMu.Lock()
    70  	a := attempts
    71  	attemptsMu.Unlock()
    72  
    73  	if a == 0 {
    74  		t.Fatalf("attempts: got 0, want > 0")
    75  	}
    76  }
    77  
    78  func TestBasicReads(t *testing.T) {
    79  	testCases := []struct {
    80  		dirname string
    81  		wantMap map[string]string
    82  	}{
    83  		{
    84  			"db-stage-1",
    85  			map[string]string{
    86  				"aaa":  "",
    87  				"bar":  "",
    88  				"baz":  "",
    89  				"foo":  "",
    90  				"quux": "",
    91  				"zzz":  "",
    92  			},
    93  		},
    94  		{
    95  			"db-stage-2",
    96  			map[string]string{
    97  				"aaa":  "",
    98  				"bar":  "",
    99  				"baz":  "three",
   100  				"foo":  "four",
   101  				"quux": "",
   102  				"zzz":  "",
   103  			},
   104  		},
   105  		{
   106  			"db-stage-3",
   107  			map[string]string{
   108  				"aaa":  "",
   109  				"bar":  "",
   110  				"baz":  "three",
   111  				"foo":  "four",
   112  				"quux": "",
   113  				"zzz":  "",
   114  			},
   115  		},
   116  		{
   117  			"db-stage-4",
   118  			map[string]string{
   119  				"aaa":  "",
   120  				"bar":  "",
   121  				"baz":  "",
   122  				"foo":  "five",
   123  				"quux": "six",
   124  				"zzz":  "",
   125  			},
   126  		},
   127  	}
   128  	for _, tc := range testCases {
   129  		t.Run(tc.dirname, func(t *testing.T) {
   130  			fs := vfs.NewMem()
   131  			_, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname)
   132  			if err != nil {
   133  				t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err)
   134  			}
   135  			d, err := Open(tc.dirname, testingRandomized(t, &Options{
   136  				FS: fs,
   137  			}))
   138  			if err != nil {
   139  				t.Fatalf("%s: Open failed: %v", tc.dirname, err)
   140  			}
   141  			for key, want := range tc.wantMap {
   142  				got, closer, err := d.Get([]byte(key))
   143  				if err != nil && err != ErrNotFound {
   144  					t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err)
   145  				}
   146  				if string(got) != string(want) {
   147  					t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want)
   148  				}
   149  				if closer != nil {
   150  					closer.Close()
   151  				}
   152  			}
   153  			err = d.Close()
   154  			if err != nil {
   155  				t.Fatalf("%s: Close failed: %v", tc.dirname, err)
   156  			}
   157  		})
   158  	}
   159  }
   160  
   161  func TestBasicWrites(t *testing.T) {
   162  	d, err := Open("", testingRandomized(t, &Options{
   163  		FS: vfs.NewMem(),
   164  	}))
   165  	require.NoError(t, err)
   166  
   167  	names := []string{
   168  		"Alatar",
   169  		"Gandalf",
   170  		"Pallando",
   171  		"Radagast",
   172  		"Saruman",
   173  		"Joe",
   174  	}
   175  	wantMap := map[string]string{}
   176  
   177  	inBatch, batch, pending := false, &Batch{}, [][]string(nil)
   178  	set0 := func(k, v string) error {
   179  		return d.Set([]byte(k), []byte(v), nil)
   180  	}
   181  	del0 := func(k string) error {
   182  		return d.Delete([]byte(k), nil)
   183  	}
   184  	set1 := func(k, v string) error {
   185  		batch.Set([]byte(k), []byte(v), nil)
   186  		return nil
   187  	}
   188  	del1 := func(k string) error {
   189  		batch.Delete([]byte(k), nil)
   190  		return nil
   191  	}
   192  	set, del := set0, del0
   193  
   194  	testCases := []string{
   195  		"set Gandalf Grey",
   196  		"set Saruman White",
   197  		"set Radagast Brown",
   198  		"delete Saruman",
   199  		"set Gandalf White",
   200  		"batch",
   201  		"  set Alatar AliceBlue",
   202  		"apply",
   203  		"delete Pallando",
   204  		"set Alatar AntiqueWhite",
   205  		"set Pallando PapayaWhip",
   206  		"batch",
   207  		"apply",
   208  		"set Pallando PaleVioletRed",
   209  		"batch",
   210  		"  delete Alatar",
   211  		"  set Gandalf GhostWhite",
   212  		"  set Saruman Seashell",
   213  		"  delete Saruman",
   214  		"  set Saruman SeaGreen",
   215  		"  set Radagast RosyBrown",
   216  		"  delete Pallando",
   217  		"apply",
   218  		"delete Radagast",
   219  		"delete Radagast",
   220  		"delete Radagast",
   221  		"set Gandalf Goldenrod",
   222  		"set Pallando PeachPuff",
   223  		"batch",
   224  		"  delete Joe",
   225  		"  delete Saruman",
   226  		"  delete Radagast",
   227  		"  delete Pallando",
   228  		"  delete Gandalf",
   229  		"  delete Alatar",
   230  		"apply",
   231  		"set Joe Plumber",
   232  	}
   233  	for i, tc := range testCases {
   234  		s := strings.Split(strings.TrimSpace(tc), " ")
   235  		switch s[0] {
   236  		case "set":
   237  			if err := set(s[1], s[2]); err != nil {
   238  				t.Fatalf("#%d %s: %v", i, tc, err)
   239  			}
   240  			if inBatch {
   241  				pending = append(pending, s)
   242  			} else {
   243  				wantMap[s[1]] = s[2]
   244  			}
   245  		case "delete":
   246  			if err := del(s[1]); err != nil {
   247  				t.Fatalf("#%d %s: %v", i, tc, err)
   248  			}
   249  			if inBatch {
   250  				pending = append(pending, s)
   251  			} else {
   252  				delete(wantMap, s[1])
   253  			}
   254  		case "batch":
   255  			inBatch, batch, set, del = true, &Batch{}, set1, del1
   256  		case "apply":
   257  			if err := d.Apply(batch, nil); err != nil {
   258  				t.Fatalf("#%d %s: %v", i, tc, err)
   259  			}
   260  			for _, p := range pending {
   261  				switch p[0] {
   262  				case "set":
   263  					wantMap[p[1]] = p[2]
   264  				case "delete":
   265  					delete(wantMap, p[1])
   266  				}
   267  			}
   268  			inBatch, pending, set, del = false, nil, set0, del0
   269  		default:
   270  			t.Fatalf("#%d %s: bad test case: %q", i, tc, s)
   271  		}
   272  
   273  		fail := false
   274  		for _, name := range names {
   275  			g, closer, err := d.Get([]byte(name))
   276  			if err != nil && err != ErrNotFound {
   277  				t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err)
   278  				fail = true
   279  			}
   280  			got, gOK := string(g), err == nil
   281  			want, wOK := wantMap[name]
   282  			if got != want || gOK != wOK {
   283  				t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t",
   284  					i, tc, name, got, gOK, want, wOK)
   285  				fail = true
   286  			}
   287  			if closer != nil {
   288  				closer.Close()
   289  			}
   290  		}
   291  		if fail {
   292  			return
   293  		}
   294  	}
   295  
   296  	require.NoError(t, d.Close())
   297  }
   298  
   299  func TestRandomWrites(t *testing.T) {
   300  	d, err := Open("", testingRandomized(t, &Options{
   301  		FS:           vfs.NewMem(),
   302  		MemTableSize: 8 * 1024,
   303  	}))
   304  	require.NoError(t, err)
   305  
   306  	keys := [64][]byte{}
   307  	wants := [64]int{}
   308  	for k := range keys {
   309  		keys[k] = []byte(strconv.Itoa(k))
   310  		wants[k] = -1
   311  	}
   312  	xxx := bytes.Repeat([]byte("x"), 512)
   313  
   314  	rng := rand.New(rand.NewSource(123))
   315  	const N = 1000
   316  	for i := 0; i < N; i++ {
   317  		k := rng.Intn(len(keys))
   318  		if rng.Intn(20) != 0 {
   319  			wants[k] = rng.Intn(len(xxx) + 1)
   320  			if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil {
   321  				t.Fatalf("i=%d: Set: %v", i, err)
   322  			}
   323  		} else {
   324  			wants[k] = -1
   325  			if err := d.Delete(keys[k], nil); err != nil {
   326  				t.Fatalf("i=%d: Delete: %v", i, err)
   327  			}
   328  		}
   329  
   330  		if i != N-1 || rng.Intn(50) != 0 {
   331  			continue
   332  		}
   333  		for k := range keys {
   334  			got := -1
   335  			if v, closer, err := d.Get(keys[k]); err != nil {
   336  				if err != ErrNotFound {
   337  					t.Fatalf("Get: %v", err)
   338  				}
   339  			} else {
   340  				got = len(v)
   341  				closer.Close()
   342  			}
   343  			if got != wants[k] {
   344  				t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k])
   345  			}
   346  		}
   347  	}
   348  
   349  	require.NoError(t, d.Close())
   350  }
   351  
   352  func TestLargeBatch(t *testing.T) {
   353  	d, err := Open("", testingRandomized(t, &Options{
   354  		FS:                          vfs.NewMem(),
   355  		MemTableSize:                1400,
   356  		MemTableStopWritesThreshold: 100,
   357  	}))
   358  	require.NoError(t, err)
   359  
   360  	verifyLSM := func(expected string) func() error {
   361  		return func() error {
   362  			d.mu.Lock()
   363  			s := d.mu.versions.currentVersion().String()
   364  			d.mu.Unlock()
   365  			if expected != s {
   366  				if testing.Verbose() {
   367  					fmt.Println(strings.TrimSpace(s))
   368  				}
   369  				return errors.Errorf("expected %s, but found %s", expected, s)
   370  			}
   371  			return nil
   372  		}
   373  	}
   374  
   375  	logNum := func() base.DiskFileNum {
   376  		d.mu.Lock()
   377  		defer d.mu.Unlock()
   378  		return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum
   379  	}
   380  	fileSize := func(fileNum base.DiskFileNum) int64 {
   381  		info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum))
   382  		require.NoError(t, err)
   383  		return info.Size()
   384  	}
   385  	memTableCreationSeqNum := func() uint64 {
   386  		d.mu.Lock()
   387  		defer d.mu.Unlock()
   388  		return d.mu.mem.mutable.logSeqNum
   389  	}
   390  
   391  	startLogNum := logNum()
   392  	startLogStartSize := fileSize(startLogNum)
   393  	startSeqNum := d.mu.versions.logSeqNum.Load()
   394  
   395  	// Write a key with a value larger than the memtable size.
   396  	require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil))
   397  
   398  	// Verify that the large batch was written to the WAL that existed before it
   399  	// was committed. We verify that WAL rotation occurred, where the large batch
   400  	// was written to, and that the new WAL is empty.
   401  	endLogNum := logNum()
   402  	if startLogNum == endLogNum {
   403  		t.Fatal("expected WAL rotation")
   404  	}
   405  	startLogEndSize := fileSize(startLogNum)
   406  	if startLogEndSize == startLogStartSize {
   407  		t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d",
   408  			startLogNum, startLogEndSize)
   409  	}
   410  	endLogSize := fileSize(endLogNum)
   411  	if endLogSize != 0 {
   412  		t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize)
   413  	}
   414  	if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum {
   415  		t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum)
   416  	}
   417  
   418  	// Verify this results in one L0 table being created.
   419  	require.NoError(t, try(100*time.Microsecond, 20*time.Second,
   420  		verifyLSM("0.0:\n  000005:[a#10,SET-a#10,SET]\n")))
   421  
   422  	require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil))
   423  
   424  	// Verify this results in a second L0 table being created.
   425  	require.NoError(t, try(100*time.Microsecond, 20*time.Second,
   426  		verifyLSM("0.0:\n  000005:[a#10,SET-a#10,SET]\n  000007:[b#11,SET-b#11,SET]\n")))
   427  
   428  	// Allocate a bunch of batches to exhaust the batchPool. None of these
   429  	// batches should have a non-zero count.
   430  	for i := 0; i < 10; i++ {
   431  		b := d.NewBatch()
   432  		require.EqualValues(t, 0, b.Count())
   433  	}
   434  
   435  	require.NoError(t, d.Close())
   436  }
   437  
   438  func TestGetNoCache(t *testing.T) {
   439  	cache := NewCache(0)
   440  	defer cache.Unref()
   441  
   442  	d, err := Open("", testingRandomized(t, &Options{
   443  		Cache: cache,
   444  		FS:    vfs.NewMem(),
   445  	}))
   446  	require.NoError(t, err)
   447  
   448  	require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil))
   449  	require.NoError(t, d.Flush())
   450  	verifyGet(t, d, []byte("a"), []byte("aa"))
   451  
   452  	require.NoError(t, d.Close())
   453  }
   454  
   455  func TestGetMerge(t *testing.T) {
   456  	d, err := Open("", testingRandomized(t, &Options{
   457  		FS: vfs.NewMem(),
   458  	}))
   459  	require.NoError(t, err)
   460  
   461  	key := []byte("a")
   462  	verify := func(expected string) {
   463  		val, closer, err := d.Get(key)
   464  		require.NoError(t, err)
   465  
   466  		if expected != string(val) {
   467  			t.Fatalf("expected %s, but got %s", expected, val)
   468  		}
   469  		closer.Close()
   470  	}
   471  
   472  	const val = "1"
   473  	for i := 1; i <= 3; i++ {
   474  		require.NoError(t, d.Merge(key, []byte(val), nil))
   475  
   476  		expected := strings.Repeat(val, i)
   477  		verify(expected)
   478  
   479  		require.NoError(t, d.Flush())
   480  		verify(expected)
   481  	}
   482  
   483  	require.NoError(t, d.Close())
   484  }
   485  
   486  func TestMergeOrderSameAfterFlush(t *testing.T) {
   487  	// Ensure compaction iterator (used by flush) and user iterator process merge
   488  	// operands in the same order
   489  	d, err := Open("", testingRandomized(t, &Options{
   490  		FS: vfs.NewMem(),
   491  	}))
   492  	require.NoError(t, err)
   493  
   494  	key := []byte("a")
   495  	verify := func(expected string) {
   496  		iter, _ := d.NewIter(nil)
   497  		if !iter.SeekGE([]byte("a")) {
   498  			t.Fatal("expected one value, but got empty iterator")
   499  		}
   500  		if expected != string(iter.Value()) {
   501  			t.Fatalf("expected %s, but got %s", expected, string(iter.Value()))
   502  		}
   503  		if !iter.SeekLT([]byte("b")) {
   504  			t.Fatal("expected one value, but got empty iterator")
   505  		}
   506  		if expected != string(iter.Value()) {
   507  			t.Fatalf("expected %s, but got %s", expected, string(iter.Value()))
   508  		}
   509  		require.NoError(t, iter.Close())
   510  	}
   511  
   512  	require.NoError(t, d.Merge(key, []byte("0"), nil))
   513  	require.NoError(t, d.Merge(key, []byte("1"), nil))
   514  
   515  	verify("01")
   516  	require.NoError(t, d.Flush())
   517  	verify("01")
   518  
   519  	require.NoError(t, d.Close())
   520  }
   521  
   522  type closableMerger struct {
   523  	lastBuf []byte
   524  	closed  bool
   525  }
   526  
   527  func (m *closableMerger) MergeNewer(value []byte) error {
   528  	m.lastBuf = append(m.lastBuf[:0], value...)
   529  	return nil
   530  }
   531  
   532  func (m *closableMerger) MergeOlder(value []byte) error {
   533  	m.lastBuf = append(m.lastBuf[:0], value...)
   534  	return nil
   535  }
   536  
   537  func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) {
   538  	return m.lastBuf, m, nil
   539  }
   540  
   541  func (m *closableMerger) Close() error {
   542  	m.closed = true
   543  	return nil
   544  }
   545  
   546  func TestMergerClosing(t *testing.T) {
   547  	m := &closableMerger{}
   548  
   549  	d, err := Open("", testingRandomized(t, &Options{
   550  		FS: vfs.NewMem(),
   551  		Merger: &Merger{
   552  			Merge: func(key, value []byte) (base.ValueMerger, error) {
   553  				return m, m.MergeNewer(value)
   554  			},
   555  		},
   556  	}))
   557  	require.NoError(t, err)
   558  
   559  	defer func() {
   560  		require.NoError(t, d.Close())
   561  	}()
   562  
   563  	err = d.Merge([]byte("a"), []byte("b"), nil)
   564  	require.NoError(t, err)
   565  	require.False(t, m.closed)
   566  
   567  	val, closer, err := d.Get([]byte("a"))
   568  	require.NoError(t, err)
   569  	require.Equal(t, []byte("b"), val)
   570  	require.NotNil(t, closer)
   571  	require.False(t, m.closed)
   572  	_ = closer.Close()
   573  	require.True(t, m.closed)
   574  }
   575  
   576  func TestLogData(t *testing.T) {
   577  	d, err := Open("", testingRandomized(t, &Options{
   578  		FS: vfs.NewMem(),
   579  	}))
   580  	require.NoError(t, err)
   581  
   582  	defer func() {
   583  		require.NoError(t, d.Close())
   584  	}()
   585  
   586  	require.NoError(t, d.LogData([]byte("foo"), Sync))
   587  	require.NoError(t, d.LogData([]byte("bar"), Sync))
   588  	// TODO(itsbilal): Confirm that we wrote some bytes to the WAL.
   589  	// For now, LogData proceeding ahead without a panic is good enough.
   590  }
   591  
   592  func TestSingleDeleteGet(t *testing.T) {
   593  	d, err := Open("", testingRandomized(t, &Options{
   594  		FS: vfs.NewMem(),
   595  	}))
   596  	require.NoError(t, err)
   597  	defer func() {
   598  		require.NoError(t, d.Close())
   599  	}()
   600  
   601  	key := []byte("key")
   602  	val := []byte("val")
   603  
   604  	require.NoError(t, d.Set(key, val, nil))
   605  	verifyGet(t, d, key, val)
   606  
   607  	key2 := []byte("key2")
   608  	val2 := []byte("val2")
   609  
   610  	require.NoError(t, d.Set(key2, val2, nil))
   611  	verifyGet(t, d, key2, val2)
   612  
   613  	require.NoError(t, d.SingleDelete(key2, nil))
   614  	verifyGetNotFound(t, d, key2)
   615  }
   616  
   617  func TestSingleDeleteFlush(t *testing.T) {
   618  	d, err := Open("", testingRandomized(t, &Options{
   619  		FS: vfs.NewMem(),
   620  	}))
   621  	require.NoError(t, err)
   622  	defer func() {
   623  		require.NoError(t, d.Close())
   624  	}()
   625  
   626  	key := []byte("key")
   627  	valFirst := []byte("first")
   628  	valSecond := []byte("second")
   629  	key2 := []byte("key2")
   630  	val2 := []byte("val2")
   631  
   632  	require.NoError(t, d.Set(key, valFirst, nil))
   633  	require.NoError(t, d.Set(key2, val2, nil))
   634  	require.NoError(t, d.Flush())
   635  
   636  	require.NoError(t, d.SingleDelete(key, nil))
   637  	require.NoError(t, d.Set(key, valSecond, nil))
   638  	require.NoError(t, d.Delete(key2, nil))
   639  	require.NoError(t, d.Set(key2, val2, nil))
   640  	require.NoError(t, d.Flush())
   641  
   642  	require.NoError(t, d.SingleDelete(key, nil))
   643  	require.NoError(t, d.Delete(key2, nil))
   644  	require.NoError(t, d.Flush())
   645  
   646  	verifyGetNotFound(t, d, key)
   647  	verifyGetNotFound(t, d, key2)
   648  }
   649  
   650  func TestUnremovableSingleDelete(t *testing.T) {
   651  	d, err := Open("", testingRandomized(t, &Options{
   652  		FS:                    vfs.NewMem(),
   653  		L0CompactionThreshold: 8,
   654  	}))
   655  	require.NoError(t, err)
   656  	defer func() {
   657  		require.NoError(t, d.Close())
   658  	}()
   659  
   660  	key := []byte("key")
   661  	valFirst := []byte("valFirst")
   662  	valSecond := []byte("valSecond")
   663  
   664  	require.NoError(t, d.Set(key, valFirst, nil))
   665  	ss := d.NewSnapshot()
   666  	defer ss.Close()
   667  	require.NoError(t, d.SingleDelete(key, nil))
   668  	require.NoError(t, d.Set(key, valSecond, nil))
   669  	require.NoError(t, d.Flush())
   670  
   671  	verifyGet(t, ss, key, valFirst)
   672  	verifyGet(t, d, key, valSecond)
   673  
   674  	require.NoError(t, d.SingleDelete(key, nil))
   675  
   676  	verifyGet(t, ss, key, valFirst)
   677  	verifyGetNotFound(t, d, key)
   678  
   679  	require.NoError(t, d.Flush())
   680  
   681  	verifyGet(t, ss, key, valFirst)
   682  	verifyGetNotFound(t, d, key)
   683  }
   684  
   685  func TestIterLeak(t *testing.T) {
   686  	for _, leak := range []bool{true, false} {
   687  		t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) {
   688  			for _, flush := range []bool{true, false} {
   689  				t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) {
   690  					d, err := Open("", testingRandomized(t, &Options{
   691  						FS: vfs.NewMem(),
   692  					}))
   693  					require.NoError(t, err)
   694  
   695  					require.NoError(t, d.Set([]byte("a"), []byte("a"), nil))
   696  					if flush {
   697  						require.NoError(t, d.Flush())
   698  					}
   699  					iter, _ := d.NewIter(nil)
   700  					iter.First()
   701  					if !leak {
   702  						require.NoError(t, iter.Close())
   703  						require.NoError(t, d.Close())
   704  					} else {
   705  						defer iter.Close()
   706  						if err := d.Close(); err == nil {
   707  							t.Fatalf("expected failure, but found success")
   708  						} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   709  							t.Fatalf("expected leaked iterators, but found %+v", err)
   710  						} else {
   711  							t.Log(err.Error())
   712  						}
   713  					}
   714  				})
   715  			}
   716  		})
   717  	}
   718  }
   719  
   720  // Make sure that we detect an iter leak when only one DB closes
   721  // while the second db still holds a reference to the TableCache.
   722  func TestIterLeakSharedCache(t *testing.T) {
   723  	for _, leak := range []bool{true, false} {
   724  		t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) {
   725  			for _, flush := range []bool{true, false} {
   726  				t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) {
   727  					d1, err := Open("", &Options{
   728  						FS: vfs.NewMem(),
   729  					})
   730  					require.NoError(t, err)
   731  
   732  					d2, err := Open("", &Options{
   733  						FS: vfs.NewMem(),
   734  					})
   735  					require.NoError(t, err)
   736  
   737  					require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil))
   738  					if flush {
   739  						require.NoError(t, d1.Flush())
   740  					}
   741  
   742  					require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil))
   743  					if flush {
   744  						require.NoError(t, d2.Flush())
   745  					}
   746  
   747  					// Check if leak detection works with only one db closing.
   748  					{
   749  						iter1, _ := d1.NewIter(nil)
   750  						iter1.First()
   751  						if !leak {
   752  							require.NoError(t, iter1.Close())
   753  							require.NoError(t, d1.Close())
   754  						} else {
   755  							defer iter1.Close()
   756  							if err := d1.Close(); err == nil {
   757  								t.Fatalf("expected failure, but found success")
   758  							} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   759  								t.Fatalf("expected leaked iterators, but found %+v", err)
   760  							} else {
   761  								t.Log(err.Error())
   762  							}
   763  						}
   764  					}
   765  
   766  					{
   767  						iter2, _ := d2.NewIter(nil)
   768  						iter2.First()
   769  						if !leak {
   770  							require.NoError(t, iter2.Close())
   771  							require.NoError(t, d2.Close())
   772  						} else {
   773  							defer iter2.Close()
   774  							if err := d2.Close(); err == nil {
   775  								t.Fatalf("expected failure, but found success")
   776  							} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   777  								t.Fatalf("expected leaked iterators, but found %+v", err)
   778  							} else {
   779  								t.Log(err.Error())
   780  							}
   781  						}
   782  					}
   783  
   784  				})
   785  			}
   786  		})
   787  	}
   788  }
   789  
   790  func TestMemTableReservation(t *testing.T) {
   791  	opts := &Options{
   792  		Cache:        NewCache(128 << 10 /* 128 KB */),
   793  		MemTableSize: initialMemTableSize,
   794  		FS:           vfs.NewMem(),
   795  	}
   796  	defer opts.Cache.Unref()
   797  	opts.testingRandomized(t)
   798  	opts.EnsureDefaults()
   799  	// We're going to be looking at and asserting the global memtable reservation
   800  	// amount below so we don't want to race with any triggered stats collections.
   801  	opts.private.disableTableStats = true
   802  
   803  	// Add a block to the cache. Note that the memtable size is larger than the
   804  	// cache size, so opening the DB should cause this block to be evicted.
   805  	tmpID := opts.Cache.NewID()
   806  	helloWorld := []byte("hello world")
   807  	value := cache.Alloc(len(helloWorld))
   808  	copy(value.Buf(), helloWorld)
   809  	opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release()
   810  
   811  	d, err := Open("", opts)
   812  	require.NoError(t, err)
   813  
   814  	checkReserved := func(expected int64) {
   815  		t.Helper()
   816  		if reserved := d.memTableReserved.Load(); expected != reserved {
   817  			t.Fatalf("expected %d reserved, but found %d", expected, reserved)
   818  		}
   819  	}
   820  
   821  	checkReserved(int64(opts.MemTableSize))
   822  	if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 {
   823  		t.Fatalf("expected 2 refs, but found %d", refs)
   824  	}
   825  	// Verify the memtable reservation has caused our test block to be evicted.
   826  	if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil {
   827  		t.Fatalf("expected failure, but found success: %s", h.Get())
   828  	}
   829  
   830  	// Flush the memtable. The memtable reservation should double because old
   831  	// memtable will be recycled, saved for the next memtable allocation.
   832  	require.NoError(t, d.Flush())
   833  	checkReserved(int64(2 * opts.MemTableSize))
   834  	// Flush again. The memtable reservation should be unchanged because at most
   835  	// 1 memtable may be preserved for recycling.
   836  
   837  	// Flush in the presence of an active iterator. The iterator will hold a
   838  	// reference to a readState which will in turn hold a reader reference to the
   839  	// memtable.
   840  	iter, _ := d.NewIter(nil)
   841  	require.NoError(t, d.Flush())
   842  	// The flush moved the recycled memtable into position as an active mutable
   843  	// memtable. There are now two allocated memtables: 1 mutable and 1 pinned
   844  	// by the iterator's read state.
   845  	checkReserved(2 * int64(opts.MemTableSize))
   846  
   847  	// Flushing again should increase the reservation total to 3x: 1 active
   848  	// mutable, 1 for recycling, 1 pinned by iterator's read state.
   849  	require.NoError(t, d.Flush())
   850  	checkReserved(3 * int64(opts.MemTableSize))
   851  
   852  	// Closing the iterator will release the iterator's read state, and the old
   853  	// memtable will be moved into position as the next memtable to recycle.
   854  	// There was already a memtable ready to be recycled, so that memtable will
   855  	// be freed and the overall reservation total is reduced to 2x.
   856  	require.NoError(t, iter.Close())
   857  	checkReserved(2 * int64(opts.MemTableSize))
   858  
   859  	require.NoError(t, d.Close())
   860  }
   861  
   862  func TestMemTableReservationLeak(t *testing.T) {
   863  	d, err := Open("", &Options{FS: vfs.NewMem()})
   864  	require.NoError(t, err)
   865  
   866  	d.mu.Lock()
   867  	last := d.mu.mem.queue[len(d.mu.mem.queue)-1]
   868  	last.readerRef()
   869  	defer func() {
   870  		last.readerUnref(true)
   871  	}()
   872  	d.mu.Unlock()
   873  	if err := d.Close(); err == nil {
   874  		t.Fatalf("expected failure, but found success")
   875  	} else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") {
   876  		t.Fatalf("expected leaked memtable reservation, but found %+v", err)
   877  	} else {
   878  		t.Log(err.Error())
   879  	}
   880  }
   881  
   882  func TestCacheEvict(t *testing.T) {
   883  	cache := NewCache(10 << 20)
   884  	defer cache.Unref()
   885  
   886  	d, err := Open("", &Options{
   887  		Cache: cache,
   888  		FS:    vfs.NewMem(),
   889  	})
   890  	require.NoError(t, err)
   891  
   892  	for i := 0; i < 1000; i++ {
   893  		key := []byte(fmt.Sprintf("%04d", i))
   894  		require.NoError(t, d.Set(key, key, nil))
   895  	}
   896  
   897  	require.NoError(t, d.Flush())
   898  	iter, _ := d.NewIter(nil)
   899  	for iter.First(); iter.Valid(); iter.Next() {
   900  	}
   901  	require.NoError(t, iter.Close())
   902  
   903  	if size := cache.Size(); size == 0 {
   904  		t.Fatalf("expected non-zero cache size")
   905  	}
   906  
   907  	for i := 0; i < 1000; i++ {
   908  		key := []byte(fmt.Sprintf("%04d", i))
   909  		require.NoError(t, d.Delete(key, nil))
   910  	}
   911  
   912  	require.NoError(t, d.Compact([]byte("0"), []byte("1"), false))
   913  
   914  	require.NoError(t, d.Close())
   915  
   916  	if size := cache.Size(); size != 0 {
   917  		t.Fatalf("expected empty cache, but found %d", size)
   918  	}
   919  }
   920  
   921  func TestFlushEmpty(t *testing.T) {
   922  	d, err := Open("", testingRandomized(t, &Options{
   923  		FS: vfs.NewMem(),
   924  	}))
   925  	require.NoError(t, err)
   926  
   927  	// Flushing an empty memtable should not fail.
   928  	require.NoError(t, d.Flush())
   929  	require.NoError(t, d.Close())
   930  }
   931  
   932  func TestRollManifest(t *testing.T) {
   933  	toPreserve := rand.Int31n(5) + 1
   934  	opts := &Options{
   935  		MaxManifestFileSize:   1,
   936  		L0CompactionThreshold: 10,
   937  		L0StopWritesThreshold: 1000,
   938  		FS:                    vfs.NewMem(),
   939  		NumPrevManifest:       int(toPreserve),
   940  	}
   941  	opts.DisableAutomaticCompactions = true
   942  	opts.testingRandomized(t)
   943  	d, err := Open("", opts)
   944  	require.NoError(t, err)
   945  
   946  	manifestFileNumber := func() FileNum {
   947  		d.mu.Lock()
   948  		defer d.mu.Unlock()
   949  		return d.mu.versions.manifestFileNum
   950  	}
   951  	sizeRolloverState := func() (int64, int64) {
   952  		d.mu.Lock()
   953  		defer d.mu.Unlock()
   954  		return d.mu.versions.rotationHelper.DebugInfo()
   955  	}
   956  
   957  	current := func() string {
   958  		desc, err := Peek(d.dirname, d.opts.FS)
   959  		require.NoError(t, err)
   960  		return desc.ManifestFilename
   961  	}
   962  
   963  	lastManifestNum := manifestFileNumber()
   964  	manifestNums := []base.FileNum{lastManifestNum}
   965  	for i := 0; i < 5; i++ {
   966  		// MaxManifestFileSize is 1, but the rollover logic also counts edits
   967  		// since the last snapshot to decide on rollover, so do as many flushes as
   968  		// it demands.
   969  		lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState()
   970  		var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64
   971  		switch i {
   972  		case 0:
   973  			// DB is empty.
   974  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0
   975  		case 1:
   976  			// First edit that caused rollover is not in the snapshot.
   977  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1
   978  		case 2:
   979  			// One flush is in the snapshot. One flush in the edit.
   980  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1
   981  		case 3:
   982  			// Two flushes in the snapshot. One flush in the edit. Will need to do
   983  			// two more flushes, the first of which will be in the next snapshot.
   984  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1
   985  		case 4:
   986  			// Four flushes in the snapshot. One flush in the edit. Will need to do
   987  			// four more flushes, three of which will be in the snapshot.
   988  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1
   989  		}
   990  		require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount)
   991  		require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount)
   992  		// Number of flushes to do to trigger the rollover.
   993  		steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1)
   994  		// Steps can be <= 0, but we need to do at least one edit to trigger the
   995  		// rollover logic.
   996  		if steps <= 0 {
   997  			steps = 1
   998  		}
   999  		for j := 0; j < steps; j++ {
  1000  			require.NoError(t, d.Set([]byte("a"), nil, nil))
  1001  			require.NoError(t, d.Flush())
  1002  		}
  1003  		d.TestOnlyWaitForCleaning()
  1004  		num := manifestFileNumber()
  1005  		if lastManifestNum == num {
  1006  			t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num)
  1007  		}
  1008  
  1009  		manifestNums = append(manifestNums, num)
  1010  		lastManifestNum = num
  1011  
  1012  		expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum)
  1013  		if v := current(); expectedCurrent != v {
  1014  			t.Fatalf("expected %s, but found %s", expectedCurrent, v)
  1015  		}
  1016  	}
  1017  	lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState()
  1018  	require.EqualValues(t, 8, lastSnapshotCount)
  1019  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1020  
  1021  	files, err := d.opts.FS.List("")
  1022  	require.NoError(t, err)
  1023  
  1024  	var manifests []string
  1025  	for _, filename := range files {
  1026  		fileType, _, ok := base.ParseFilename(d.opts.FS, filename)
  1027  		if !ok {
  1028  			continue
  1029  		}
  1030  		if fileType == fileTypeManifest {
  1031  			manifests = append(manifests, filename)
  1032  		}
  1033  	}
  1034  
  1035  	sort.Slice(manifests, func(i, j int) bool {
  1036  		return manifests[i] < manifests[j]
  1037  	})
  1038  
  1039  	var expected []string
  1040  	for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ {
  1041  		expected = append(
  1042  			expected,
  1043  			fmt.Sprintf("MANIFEST-%s", manifestNums[i]),
  1044  		)
  1045  	}
  1046  	require.EqualValues(t, expected, manifests)
  1047  
  1048  	// Test the logic that uses the future snapshot size to rollover.
  1049  	// Reminder: we have a snapshot with 8 files and the manifest has 1 edit
  1050  	// (flush) with 1 file.
  1051  	// Add 8 more files with a different key.
  1052  	lastManifestNum = manifestFileNumber()
  1053  	for j := 0; j < 8; j++ {
  1054  		require.NoError(t, d.Set([]byte("c"), nil, nil))
  1055  		require.NoError(t, d.Flush())
  1056  	}
  1057  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1058  	// Need 16 more files in edits to trigger a rollover.
  1059  	require.EqualValues(t, 16, lastSnapshotCount)
  1060  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1061  	require.NotEqual(t, manifestFileNumber(), lastManifestNum)
  1062  	lastManifestNum = manifestFileNumber()
  1063  	// Do a compaction that moves 8 of the files from L0 to 1 file in L6. This
  1064  	// adds 9 files in edits. We still need 6 more files in edits based on the
  1065  	// last snapshot. But the current version has only 9 L0 files and 1 L6 file,
  1066  	// for a total of 10 files. So 1 flush should push us over that threshold.
  1067  	d.Compact([]byte("c"), []byte("d"), false)
  1068  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1069  	require.EqualValues(t, 16, lastSnapshotCount)
  1070  	require.EqualValues(t, 10, editsSinceSnapshotCount)
  1071  	require.Equal(t, manifestFileNumber(), lastManifestNum)
  1072  	require.NoError(t, d.Set([]byte("c"), nil, nil))
  1073  	require.NoError(t, d.Flush())
  1074  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1075  	require.EqualValues(t, 10, lastSnapshotCount)
  1076  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1077  	require.NotEqual(t, manifestFileNumber(), lastManifestNum)
  1078  
  1079  	require.NoError(t, d.Close())
  1080  }
  1081  
  1082  func TestDBClosed(t *testing.T) {
  1083  	d, err := Open("", &Options{
  1084  		FS: vfs.NewMem(),
  1085  	})
  1086  	require.NoError(t, err)
  1087  	require.NoError(t, d.Close())
  1088  
  1089  	catch := func(f func()) (err error) {
  1090  		defer func() {
  1091  			if r := recover(); r != nil {
  1092  				err = r.(error)
  1093  			}
  1094  		}()
  1095  		f()
  1096  		return nil
  1097  	}
  1098  
  1099  	require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed))
  1100  
  1101  	require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed))
  1102  	require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed))
  1103  	require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed))
  1104  
  1105  	require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed))
  1106  	require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed))
  1107  	require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed))
  1108  	require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed))
  1109  	require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed))
  1110  	require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed))
  1111  	require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed))
  1112  	require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed))
  1113  
  1114  	require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed))
  1115  
  1116  	b := d.NewIndexedBatch()
  1117  	require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed))
  1118  	require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed))
  1119  	require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed))
  1120  }
  1121  
  1122  func TestDBConcurrentCommitCompactFlush(t *testing.T) {
  1123  	d, err := Open("", testingRandomized(t, &Options{
  1124  		FS: vfs.NewMem(),
  1125  	}))
  1126  	require.NoError(t, err)
  1127  
  1128  	// Concurrently commit, compact, and flush in order to stress the locking around
  1129  	// those operations.
  1130  	const n = 1000
  1131  	var wg sync.WaitGroup
  1132  	wg.Add(n)
  1133  	for i := 0; i < n; i++ {
  1134  		go func(i int) {
  1135  			defer wg.Done()
  1136  			_ = d.Set([]byte(fmt.Sprint(i)), nil, nil)
  1137  			var err error
  1138  			switch i % 3 {
  1139  			case 0:
  1140  				err = d.Compact(nil, []byte("\xff"), false)
  1141  			case 1:
  1142  				err = d.Flush()
  1143  			case 2:
  1144  				_, err = d.AsyncFlush()
  1145  			}
  1146  			require.NoError(t, err)
  1147  		}(i)
  1148  	}
  1149  	wg.Wait()
  1150  
  1151  	require.NoError(t, d.Close())
  1152  }
  1153  
  1154  func TestDBConcurrentCompactClose(t *testing.T) {
  1155  	// Test closing while a compaction is ongoing. This ensures compaction code
  1156  	// detects the close and finishes cleanly.
  1157  	mem := vfs.NewMem()
  1158  	for i := 0; i < 100; i++ {
  1159  		opts := &Options{
  1160  			FS: mem,
  1161  			MaxConcurrentCompactions: func() int {
  1162  				return 2
  1163  			},
  1164  		}
  1165  		d, err := Open("", testingRandomized(t, opts))
  1166  		require.NoError(t, err)
  1167  
  1168  		// Ingest a series of files containing a single key each. As the outer
  1169  		// loop progresses, these ingestions will build up compaction debt
  1170  		// causing compactions to be running concurrently with the close below.
  1171  		for j := 0; j < 10; j++ {
  1172  			path := fmt.Sprintf("ext%d", j)
  1173  			f, err := mem.Create(path)
  1174  			require.NoError(t, err)
  1175  			w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1176  				TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1177  			})
  1178  			require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil))
  1179  			require.NoError(t, w.Close())
  1180  			require.NoError(t, d.Ingest([]string{path}))
  1181  		}
  1182  
  1183  		require.NoError(t, d.Close())
  1184  	}
  1185  }
  1186  
  1187  func TestDBApplyBatchNilDB(t *testing.T) {
  1188  	d, err := Open("", &Options{FS: vfs.NewMem()})
  1189  	require.NoError(t, err)
  1190  
  1191  	b1 := &Batch{}
  1192  	b1.Set([]byte("test"), nil, nil)
  1193  
  1194  	b2 := &Batch{}
  1195  	b2.Apply(b1, nil)
  1196  	if b2.memTableSize != 0 {
  1197  		t.Fatalf("expected memTableSize to not be set")
  1198  	}
  1199  	require.NoError(t, d.Apply(b2, nil))
  1200  	if b1.memTableSize != b2.memTableSize {
  1201  		t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize)
  1202  	}
  1203  
  1204  	require.NoError(t, d.Close())
  1205  }
  1206  
  1207  func TestDBApplyBatchMismatch(t *testing.T) {
  1208  	srcDB, err := Open("", &Options{FS: vfs.NewMem()})
  1209  	require.NoError(t, err)
  1210  
  1211  	applyDB, err := Open("", &Options{FS: vfs.NewMem()})
  1212  	require.NoError(t, err)
  1213  
  1214  	err = func() (err error) {
  1215  		defer func() {
  1216  			if v := recover(); v != nil {
  1217  				err = errors.Errorf("%v", v)
  1218  			}
  1219  		}()
  1220  
  1221  		b := srcDB.NewBatch()
  1222  		b.Set([]byte("test"), nil, nil)
  1223  		return applyDB.Apply(b, nil)
  1224  	}()
  1225  	if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") {
  1226  		t.Fatalf("expected error, but found %v", err)
  1227  	}
  1228  
  1229  	require.NoError(t, srcDB.Close())
  1230  	require.NoError(t, applyDB.Close())
  1231  }
  1232  
  1233  func TestCloseCleanerRace(t *testing.T) {
  1234  	mem := vfs.NewMem()
  1235  	for i := 0; i < 20; i++ {
  1236  		db, err := Open("", testingRandomized(t, &Options{FS: mem}))
  1237  		require.NoError(t, err)
  1238  		require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync))
  1239  		require.NoError(t, db.Flush())
  1240  		// Ref the sstables so cannot be deleted.
  1241  		it, _ := db.NewIter(nil)
  1242  		require.NotNil(t, it)
  1243  		require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync))
  1244  		require.NoError(t, db.Compact([]byte("a"), []byte("b"), false))
  1245  		// Only the iterator is keeping the sstables alive.
  1246  		files, err := mem.List("/")
  1247  		require.NoError(t, err)
  1248  		var found bool
  1249  		for _, f := range files {
  1250  			if strings.HasSuffix(f, ".sst") {
  1251  				found = true
  1252  				break
  1253  			}
  1254  		}
  1255  		require.True(t, found)
  1256  		// Close the iterator and the db in succession so file cleaning races with DB.Close() --
  1257  		// latter should wait for file cleaning to finish.
  1258  		require.NoError(t, it.Close())
  1259  		require.NoError(t, db.Close())
  1260  		files, err = mem.List("/")
  1261  		require.NoError(t, err)
  1262  		for _, f := range files {
  1263  			if strings.HasSuffix(f, ".sst") {
  1264  				t.Fatalf("found sst: %s", f)
  1265  			}
  1266  		}
  1267  	}
  1268  }
  1269  
  1270  func TestSSTablesWithApproximateSpanBytes(t *testing.T) {
  1271  	d, err := Open("", &Options{
  1272  		FS: vfs.NewMem(),
  1273  	})
  1274  	require.NoError(t, err)
  1275  	defer func() {
  1276  		if d != nil {
  1277  			require.NoError(t, d.Close())
  1278  		}
  1279  	}()
  1280  
  1281  	// Create two sstables.
  1282  	// sstable is contained within keyspan (fileNum = 5).
  1283  	require.NoError(t, d.Set([]byte("c"), nil, nil))
  1284  	require.NoError(t, d.Set([]byte("d"), nil, nil))
  1285  	require.NoError(t, d.Flush())
  1286  
  1287  	// sstable partially overlaps keyspan (fileNum = 7).
  1288  	require.NoError(t, d.Set([]byte("d"), nil, nil))
  1289  	require.NoError(t, d.Set([]byte("g"), nil, nil))
  1290  	require.NoError(t, d.Flush())
  1291  
  1292  	// cannot use WithApproximateSpanBytes without WithProperties.
  1293  	_, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes())
  1294  	require.Error(t, err)
  1295  
  1296  	// cannot use WithApproximateSpanBytes without WithKeyRangeFilter.
  1297  	_, err = d.SSTables(WithProperties(), WithApproximateSpanBytes())
  1298  	require.Error(t, err)
  1299  
  1300  	tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes())
  1301  	require.NoError(t, err)
  1302  
  1303  	for _, levelTables := range tableInfos {
  1304  		for _, table := range levelTables {
  1305  			approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64)
  1306  			require.NoError(t, err)
  1307  			if table.FileNum == 5 {
  1308  				require.Equal(t, uint64(approximateSpanBytes), table.Size)
  1309  			}
  1310  			if table.FileNum == 7 {
  1311  				require.Less(t, uint64(approximateSpanBytes), table.Size)
  1312  			}
  1313  		}
  1314  	}
  1315  }
  1316  
  1317  func TestFilterSSTablesWithOption(t *testing.T) {
  1318  	d, err := Open("", &Options{
  1319  		FS: vfs.NewMem(),
  1320  	})
  1321  	require.NoError(t, err)
  1322  	defer func() {
  1323  		if d != nil {
  1324  			require.NoError(t, d.Close())
  1325  		}
  1326  	}()
  1327  
  1328  	// Create two sstables.
  1329  	require.NoError(t, d.Set([]byte("/Table/5"), nil, nil))
  1330  	require.NoError(t, d.Flush())
  1331  	require.NoError(t, d.Set([]byte("/Table/10"), nil, nil))
  1332  	require.NoError(t, d.Flush())
  1333  
  1334  	tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6")))
  1335  	require.NoError(t, err)
  1336  
  1337  	totalTables := 0
  1338  	for _, levelTables := range tableInfos {
  1339  		totalTables += len(levelTables)
  1340  	}
  1341  
  1342  	// with filter second sstable should not be returned
  1343  	require.EqualValues(t, 1, totalTables)
  1344  
  1345  	tableInfos, err = d.SSTables()
  1346  	require.NoError(t, err)
  1347  
  1348  	totalTables = 0
  1349  	for _, levelTables := range tableInfos {
  1350  		totalTables += len(levelTables)
  1351  	}
  1352  
  1353  	// without filter
  1354  	require.EqualValues(t, 2, totalTables)
  1355  }
  1356  
  1357  func TestSSTables(t *testing.T) {
  1358  	d, err := Open("", &Options{
  1359  		FS: vfs.NewMem(),
  1360  	})
  1361  	require.NoError(t, err)
  1362  	defer func() {
  1363  		if d != nil {
  1364  			require.NoError(t, d.Close())
  1365  		}
  1366  	}()
  1367  
  1368  	// Create two sstables.
  1369  	require.NoError(t, d.Set([]byte("hello"), nil, nil))
  1370  	require.NoError(t, d.Flush())
  1371  	require.NoError(t, d.Set([]byte("world"), nil, nil))
  1372  	require.NoError(t, d.Flush())
  1373  
  1374  	// by default returned table infos should not contain Properties
  1375  	tableInfos, err := d.SSTables()
  1376  	require.NoError(t, err)
  1377  	for _, levelTables := range tableInfos {
  1378  		for _, info := range levelTables {
  1379  			require.Nil(t, info.Properties)
  1380  		}
  1381  	}
  1382  
  1383  	// with opt `WithProperties()` the `Properties` in table info should not be nil
  1384  	tableInfos, err = d.SSTables(WithProperties())
  1385  	require.NoError(t, err)
  1386  	for _, levelTables := range tableInfos {
  1387  		for _, info := range levelTables {
  1388  			require.NotNil(t, info.Properties)
  1389  		}
  1390  	}
  1391  }
  1392  
  1393  type testTracer struct {
  1394  	enabledOnlyForNonBackgroundContext bool
  1395  	buf                                strings.Builder
  1396  }
  1397  
  1398  func (t *testTracer) Infof(format string, args ...interface{})  {}
  1399  func (t *testTracer) Fatalf(format string, args ...interface{}) {}
  1400  
  1401  func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) {
  1402  	if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() {
  1403  		return
  1404  	}
  1405  	fmt.Fprintf(&t.buf, format, args...)
  1406  	fmt.Fprint(&t.buf, "\n")
  1407  }
  1408  
  1409  func (t *testTracer) IsTracingEnabled(ctx context.Context) bool {
  1410  	if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() {
  1411  		return false
  1412  	}
  1413  	return true
  1414  }
  1415  
  1416  func TestTracing(t *testing.T) {
  1417  	if !invariants.Enabled {
  1418  		// The test relies on timing behavior injected when invariants.Enabled.
  1419  		return
  1420  	}
  1421  	var tracer testTracer
  1422  	c := NewCache(0)
  1423  	defer c.Unref()
  1424  	d, err := Open("", &Options{
  1425  		FS:              vfs.NewMem(),
  1426  		Cache:           c,
  1427  		LoggerAndTracer: &tracer,
  1428  	})
  1429  	require.NoError(t, err)
  1430  	defer func() {
  1431  		require.NoError(t, d.Close())
  1432  	}()
  1433  
  1434  	// Create a sstable.
  1435  	require.NoError(t, d.Set([]byte("hello"), nil, nil))
  1436  	require.NoError(t, d.Flush())
  1437  	_, closer, err := d.Get([]byte("hello"))
  1438  	require.NoError(t, err)
  1439  	closer.Close()
  1440  	readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n"
  1441  	iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n"
  1442  	require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String())
  1443  
  1444  	// Get again, but since it currently uses context.Background(), no trace
  1445  	// output is produced.
  1446  	tracer.buf.Reset()
  1447  	tracer.enabledOnlyForNonBackgroundContext = true
  1448  	_, closer, err = d.Get([]byte("hello"))
  1449  	require.NoError(t, err)
  1450  	closer.Close()
  1451  	require.Equal(t, "", tracer.buf.String())
  1452  
  1453  	ctx, cancel := context.WithCancel(context.Background())
  1454  	defer cancel()
  1455  	iter, _ := d.NewIterWithContext(ctx, nil)
  1456  	iter.SeekGE([]byte("hello"))
  1457  	iter.Close()
  1458  	require.Equal(t, iterTraceString, tracer.buf.String())
  1459  
  1460  	tracer.buf.Reset()
  1461  	snap := d.NewSnapshot()
  1462  	iter, _ = snap.NewIterWithContext(ctx, nil)
  1463  	iter.SeekGE([]byte("hello"))
  1464  	iter.Close()
  1465  	require.Equal(t, iterTraceString, tracer.buf.String())
  1466  	snap.Close()
  1467  
  1468  	tracer.buf.Reset()
  1469  	b := d.NewIndexedBatch()
  1470  	iter = b.NewIterWithContext(ctx, nil)
  1471  	iter.SeekGE([]byte("hello"))
  1472  	iter.Close()
  1473  	require.Equal(t, iterTraceString, tracer.buf.String())
  1474  	b.Close()
  1475  }
  1476  
  1477  func TestMemtableIngestInversion(t *testing.T) {
  1478  	memFS := vfs.NewMem()
  1479  	opts := &Options{
  1480  		FS:                          memFS,
  1481  		MemTableSize:                256 << 10, // 4KB
  1482  		MemTableStopWritesThreshold: 1000,
  1483  		L0StopWritesThreshold:       1000,
  1484  		L0CompactionThreshold:       2,
  1485  		MaxConcurrentCompactions: func() int {
  1486  			return 1000
  1487  		},
  1488  	}
  1489  
  1490  	const channelTimeout = 5 * time.Second
  1491  
  1492  	// We induce delay in compactions by passing in an EventListener that stalls on
  1493  	// the first TableCreated event for a compaction job we want to block.
  1494  	// FlushBegin and CompactionBegin has info on compaction start/output levels
  1495  	// which is what we need to identify what compactions to block. However
  1496  	// FlushBegin and CompactionBegin are called while holding db.mu, so we cannot
  1497  	// block those events forever. Instead, we grab the job ID from those events
  1498  	// and store it. Then during TableCreated, we check if we're creating an output
  1499  	// for a job we have identified earlier as one to block, and then hold on a
  1500  	// semaphore there until there's a signal from the test code to resume with the
  1501  	// compaction.
  1502  	//
  1503  	// If nextBlockedCompaction is non-zero, we must block the next compaction
  1504  	// out of the nextBlockedCompaction - 3 start level. 1 means block the next
  1505  	// intra-L0 compaction and 2 means block the next flush (as flushes have
  1506  	// a -1 start level).
  1507  	var nextBlockedCompaction, blockedJobID int
  1508  	var blockedCompactionsMu sync.Mutex // protects the above two variables.
  1509  	nextSem := make(chan chan struct{}, 1)
  1510  	var el EventListener
  1511  	el.EnsureDefaults(testLogger{t: t})
  1512  	el.FlushBegin = func(info FlushInfo) {
  1513  		blockedCompactionsMu.Lock()
  1514  		defer blockedCompactionsMu.Unlock()
  1515  		if nextBlockedCompaction == 2 {
  1516  			nextBlockedCompaction = 0
  1517  			blockedJobID = info.JobID
  1518  		}
  1519  	}
  1520  	el.CompactionBegin = func(info CompactionInfo) {
  1521  		// 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush,
  1522  		// 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on.
  1523  		blockedCompactionsMu.Lock()
  1524  		defer blockedCompactionsMu.Unlock()
  1525  		blockValue := info.Input[0].Level + 3
  1526  		if info.Input[0].Level == 0 && info.Output.Level == 0 {
  1527  			// Intra L0 compaction, denoted by casValue of 1.
  1528  			blockValue = 1
  1529  		}
  1530  		if nextBlockedCompaction == blockValue {
  1531  			nextBlockedCompaction = 0
  1532  			blockedJobID = info.JobID
  1533  		}
  1534  	}
  1535  	el.TableCreated = func(info TableCreateInfo) {
  1536  		blockedCompactionsMu.Lock()
  1537  		if info.JobID != blockedJobID {
  1538  			blockedCompactionsMu.Unlock()
  1539  			return
  1540  		}
  1541  		blockedJobID = 0
  1542  		blockedCompactionsMu.Unlock()
  1543  		sem := make(chan struct{})
  1544  		nextSem <- sem
  1545  		<-sem
  1546  	}
  1547  	tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el)
  1548  	opts.EventListener = &tel
  1549  	opts.Experimental.L0CompactionConcurrency = 1
  1550  	d, err := Open("", opts)
  1551  	require.NoError(t, err)
  1552  	defer func() {
  1553  		if d != nil {
  1554  			require.NoError(t, d.Close())
  1555  		}
  1556  	}()
  1557  
  1558  	printLSM := func() {
  1559  		d.mu.Lock()
  1560  		s := d.mu.versions.currentVersion().String()
  1561  		d.mu.Unlock()
  1562  		t.Logf("%s", s)
  1563  	}
  1564  
  1565  	// Create some sstables. These should go into L6. These are irrelevant for
  1566  	// the rest of the test.
  1567  	require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil))
  1568  	require.NoError(t, d.Flush())
  1569  	require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil))
  1570  	require.NoError(t, d.Flush())
  1571  	require.NoError(t, d.Compact([]byte("a"), []byte("z"), true))
  1572  
  1573  	var baseCompactionSem, flushSem, intraL0Sem chan struct{}
  1574  	// Block an L0 -> LBase compaction. This is necessary to induce intra-L0
  1575  	// compactions later on.
  1576  	blockedCompactionsMu.Lock()
  1577  	nextBlockedCompaction = 3
  1578  	blockedCompactionsMu.Unlock()
  1579  	timeoutSem := time.After(channelTimeout)
  1580  	t.Log("blocking an L0 -> LBase compaction")
  1581  	// Write an sstable to L0 until we're blocked on an L0 -> LBase compaction.
  1582  	breakLoop := false
  1583  	for !breakLoop {
  1584  		select {
  1585  		case sem := <-nextSem:
  1586  			baseCompactionSem = sem
  1587  			breakLoop = true
  1588  		case <-timeoutSem:
  1589  			t.Fatal("did not get blocked on an LBase compaction")
  1590  		default:
  1591  			require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil))
  1592  			require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil))
  1593  			require.NoError(t, d.Flush())
  1594  			time.Sleep(100 * time.Millisecond)
  1595  		}
  1596  	}
  1597  	printLSM()
  1598  
  1599  	// Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb.
  1600  	// The purpose of the sstable containing cc is to inflate the L0 sublevel
  1601  	// count of the interval at cc, as that's where we want the intra-L0 compaction
  1602  	// to be seeded. However we also need a file left of that interval to have
  1603  	// the same (or higher) sublevel to trigger the bug in
  1604  	// cockroachdb/cockroach#101896. That's why we ingest a file after it to
  1605  	// "bridge" the bb/cc intervals, and then ingest a file at bb. These go
  1606  	// into sublevels like this:
  1607  	//
  1608  	//    bb
  1609  	//    bb
  1610  	//    bb-----cc
  1611  	//           cc
  1612  	//
  1613  	// Eventually, we'll drop an ingested file containing a range del starting at
  1614  	// cc around here:
  1615  	//
  1616  	//    bb
  1617  	//    bb     cc---...
  1618  	//    bb-----cc
  1619  	//           cc
  1620  	{
  1621  		path := "ingest1.sst"
  1622  		f, err := memFS.Create(path)
  1623  		require.NoError(t, err)
  1624  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1625  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1626  		})
  1627  		require.NoError(t, w.Set([]byte("cc"), []byte("foo")))
  1628  		require.NoError(t, w.Close())
  1629  		require.NoError(t, d.Ingest([]string{path}))
  1630  	}
  1631  	{
  1632  		path := "ingest2.sst"
  1633  		f, err := memFS.Create(path)
  1634  		require.NoError(t, err)
  1635  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1636  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1637  		})
  1638  		require.NoError(t, w.Set([]byte("bb"), []byte("foo2")))
  1639  		require.NoError(t, w.Set([]byte("cc"), []byte("foo2")))
  1640  		require.NoError(t, w.Close())
  1641  		require.NoError(t, d.Ingest([]string{path}))
  1642  	}
  1643  	{
  1644  		path := "ingest3.sst"
  1645  		f, err := memFS.Create(path)
  1646  		require.NoError(t, err)
  1647  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1648  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1649  		})
  1650  		require.NoError(t, w.Set([]byte("bb"), []byte("foo3")))
  1651  		require.NoError(t, w.Close())
  1652  		require.NoError(t, d.Ingest([]string{path}))
  1653  	}
  1654  	{
  1655  		path := "ingest4.sst"
  1656  		f, err := memFS.Create(path)
  1657  		require.NoError(t, err)
  1658  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1659  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1660  		})
  1661  		require.NoError(t, w.Set([]byte("bb"), []byte("foo4")))
  1662  		require.NoError(t, w.Close())
  1663  		require.NoError(t, d.Ingest([]string{path}))
  1664  	}
  1665  
  1666  	// We now have a base compaction blocked. Block a memtable flush to cause
  1667  	// memtables to queue up.
  1668  	//
  1669  	// Memtable (stuck):
  1670  	//
  1671  	//   b-----------------g
  1672  	//
  1673  	// Relevant L0 ssstables
  1674  	//
  1675  	//    bb
  1676  	//    bb
  1677  	//    bb-----cc
  1678  	//           cc
  1679  	blockedCompactionsMu.Lock()
  1680  	nextBlockedCompaction = 2
  1681  	blockedCompactionsMu.Unlock()
  1682  	t.Log("blocking a flush")
  1683  	require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil))
  1684  	require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil))
  1685  	_, _ = d.AsyncFlush()
  1686  	select {
  1687  	case sem := <-nextSem:
  1688  		flushSem = sem
  1689  	case <-time.After(channelTimeout):
  1690  		t.Fatal("did not get blocked on a flush")
  1691  	}
  1692  	// Add one memtable to flush queue, and finish it off.
  1693  	//
  1694  	// Memtables (stuck):
  1695  	//
  1696  	//   b-----------------g (waiting to flush)
  1697  	//   b-----------------g (flushing, blocked)
  1698  	//
  1699  	// Relevant L0 ssstables
  1700  	//
  1701  	//    bb
  1702  	//    bb
  1703  	//    bb-----cc
  1704  	//           cc
  1705  	require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil))
  1706  	require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil))
  1707  	// note: this flush will wait for the earlier, blocked flush, but it closes
  1708  	// off the memtable which is what we want.
  1709  	_, _ = d.AsyncFlush()
  1710  
  1711  	// Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum
  1712  	// than the ingest below it.
  1713  	require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil))
  1714  	// Block an intra-L0 compaction, as one might happen around this time.
  1715  	blockedCompactionsMu.Lock()
  1716  	nextBlockedCompaction = 1
  1717  	blockedCompactionsMu.Unlock()
  1718  	t.Log("blocking an intra-L0 compaction")
  1719  	// Ingest a file containing a cc-e rangedel.
  1720  	//
  1721  	// Memtables:
  1722  	//
  1723  	//         c             (mutable)
  1724  	//   b-----------------g (waiting to flush)
  1725  	//   b-----------------g (flushing, blocked)
  1726  	//
  1727  	// Relevant L0 ssstables
  1728  	//
  1729  	//    bb
  1730  	//    bb     cc-----e (just ingested)
  1731  	//    bb-----cc
  1732  	//           cc
  1733  	{
  1734  		path := "ingest5.sst"
  1735  		f, err := memFS.Create(path)
  1736  		require.NoError(t, err)
  1737  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1738  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1739  		})
  1740  		require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e")))
  1741  		require.NoError(t, w.Close())
  1742  		require.NoError(t, d.Ingest([]string{path}))
  1743  	}
  1744  	t.Log("main ingest complete")
  1745  	printLSM()
  1746  	t.Logf("%s", d.Metrics().String())
  1747  
  1748  	require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil))
  1749  
  1750  	// Do another ingest with a seqnum newer than d. The purpose of this is to
  1751  	// increase the LargestSeqNum of the intra-L0 compaction output *beyond*
  1752  	// the flush that contains d=ThisShouldNotBeDeleted, therefore causing
  1753  	// that point key to be deleted (in the buggy code).
  1754  	//
  1755  	// Memtables:
  1756  	//
  1757  	//         c-----d       (mutable)
  1758  	//   b-----------------g (waiting to flush)
  1759  	//   b-----------------g (flushing, blocked)
  1760  	//
  1761  	// Relevant L0 ssstables
  1762  	//
  1763  	//    bb     cc
  1764  	//    bb     cc-----e (just ingested)
  1765  	//    bb-----cc
  1766  	//           cc
  1767  	{
  1768  		path := "ingest6.sst"
  1769  		f, err := memFS.Create(path)
  1770  		require.NoError(t, err)
  1771  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1772  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1773  		})
  1774  		require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter")))
  1775  		require.NoError(t, w.Close())
  1776  		require.NoError(t, d.Ingest([]string{path}))
  1777  	}
  1778  
  1779  	// Unblock earlier flushes. We will first finish flushing the blocked
  1780  	// memtable, and end up in this state:
  1781  	//
  1782  	// Memtables:
  1783  	//
  1784  	//         c-----d       (mutable)
  1785  	//   b-----------------g (waiting to flush)
  1786  	//
  1787  	// Relevant L0 ssstables
  1788  	//
  1789  	//  b-------------------g (irrelevant, just flushed)
  1790  	//    bb     cc (has LargestSeqNum > earliestUnflushedSeqNum)
  1791  	//    bb     cc-----e (has a rangedel)
  1792  	//    bb-----cc
  1793  	//           cc
  1794  	//
  1795  	// Note that while b----g is relatively old (and so has a low LargestSeqNum),
  1796  	// it bridges a bunch of intervals. Had we regenerated sublevels from scratch,
  1797  	// it'd have gone below the cc-e sstable. But due to #101896, we just slapped
  1798  	// it on top. Now, as long as our seed interval is the one at cc and our seed
  1799  	// file is the just-flushed L0 sstable, we will go down and include anything
  1800  	// in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum.
  1801  	//
  1802  	// All asterisked L0 sstables should now get picked in an intra-L0 compaction
  1803  	// right after the flush finishes, that we then block:
  1804  	//
  1805  	//  b-------------------g*
  1806  	//    bb*    cc*
  1807  	//    bb*    cc-----e*
  1808  	//    bb-----cc*
  1809  	//           cc*
  1810  	t.Log("unblocking flush")
  1811  	flushSem <- struct{}{}
  1812  	printLSM()
  1813  
  1814  	select {
  1815  	case sem := <-nextSem:
  1816  		intraL0Sem = sem
  1817  	case <-time.After(channelTimeout):
  1818  		t.Fatal("did not get blocked on an intra L0 compaction")
  1819  	}
  1820  
  1821  	// Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted
  1822  	// will land in L0 and since that was the last key written to a memtable,
  1823  	// and the ingestion at cc came after it, the output of the intra-L0
  1824  	// compaction will elevate the cc-e rangedel above it and delete it
  1825  	// (if #101896 is not fixed).
  1826  	ch, _ := d.AsyncFlush()
  1827  	<-ch
  1828  
  1829  	// Unblock earlier intra-L0 compaction.
  1830  	t.Log("unblocking intraL0")
  1831  	intraL0Sem <- struct{}{}
  1832  	printLSM()
  1833  
  1834  	// Try reading d a couple times.
  1835  	for i := 0; i < 2; i++ {
  1836  		val, closer, err := d.Get([]byte("d"))
  1837  		require.NoError(t, err)
  1838  		require.Equal(t, []byte("ThisShouldNotBeDeleted"), val)
  1839  		if closer != nil {
  1840  			closer.Close()
  1841  		}
  1842  		time.Sleep(100 * time.Millisecond)
  1843  	}
  1844  
  1845  	// Unblock everything.
  1846  	baseCompactionSem <- struct{}{}
  1847  }
  1848  
  1849  func BenchmarkDelete(b *testing.B) {
  1850  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1851  	const keyCount = 10000
  1852  	var keys [keyCount][]byte
  1853  	for i := 0; i < keyCount; i++ {
  1854  		keys[i] = []byte(strconv.Itoa(rng.Int()))
  1855  	}
  1856  	val := bytes.Repeat([]byte("x"), 10)
  1857  
  1858  	benchmark := func(b *testing.B, useSingleDelete bool) {
  1859  		d, err := Open(
  1860  			"",
  1861  			&Options{
  1862  				FS: vfs.NewMem(),
  1863  			})
  1864  		if err != nil {
  1865  			b.Fatal(err)
  1866  		}
  1867  		defer func() {
  1868  			if err := d.Close(); err != nil {
  1869  				b.Fatal(err)
  1870  			}
  1871  		}()
  1872  
  1873  		b.StartTimer()
  1874  		for _, key := range keys {
  1875  			_ = d.Set(key, val, nil)
  1876  			if useSingleDelete {
  1877  				_ = d.SingleDelete(key, nil)
  1878  			} else {
  1879  				_ = d.Delete(key, nil)
  1880  			}
  1881  		}
  1882  		// Manually flush as it is flushing/compaction where SingleDelete
  1883  		// performance shows up. With SingleDelete, we can elide all of the
  1884  		// SingleDelete and Set records.
  1885  		if err := d.Flush(); err != nil {
  1886  			b.Fatal(err)
  1887  		}
  1888  		b.StopTimer()
  1889  	}
  1890  
  1891  	b.Run("delete", func(b *testing.B) {
  1892  		for i := 0; i < b.N; i++ {
  1893  			benchmark(b, false)
  1894  		}
  1895  	})
  1896  
  1897  	b.Run("single-delete", func(b *testing.B) {
  1898  		for i := 0; i < b.N; i++ {
  1899  			benchmark(b, true)
  1900  		}
  1901  	})
  1902  }
  1903  
  1904  func BenchmarkNewIterReadAmp(b *testing.B) {
  1905  	for _, readAmp := range []int{10, 100, 1000} {
  1906  		b.Run(strconv.Itoa(readAmp), func(b *testing.B) {
  1907  			opts := &Options{
  1908  				FS:                    vfs.NewMem(),
  1909  				L0StopWritesThreshold: 1000,
  1910  			}
  1911  			opts.DisableAutomaticCompactions = true
  1912  
  1913  			d, err := Open("", opts)
  1914  			require.NoError(b, err)
  1915  
  1916  			for i := 0; i < readAmp; i++ {
  1917  				require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync))
  1918  				require.NoError(b, d.Flush())
  1919  			}
  1920  
  1921  			require.Equal(b, d.Metrics().ReadAmp(), readAmp)
  1922  
  1923  			b.StopTimer()
  1924  			b.ResetTimer()
  1925  			for i := 0; i < b.N; i++ {
  1926  				b.StartTimer()
  1927  				iter, _ := d.NewIter(nil)
  1928  				b.StopTimer()
  1929  				require.NoError(b, iter.Close())
  1930  			}
  1931  
  1932  			require.NoError(b, d.Close())
  1933  		})
  1934  	}
  1935  }
  1936  
  1937  func verifyGet(t *testing.T, r Reader, key, expected []byte) {
  1938  	val, closer, err := r.Get(key)
  1939  	require.NoError(t, err)
  1940  	if !bytes.Equal(expected, val) {
  1941  		t.Fatalf("expected %s, but got %s", expected, val)
  1942  	}
  1943  	closer.Close()
  1944  }
  1945  
  1946  func verifyGetNotFound(t *testing.T, r Reader, key []byte) {
  1947  	val, _, err := r.Get(key)
  1948  	if err != base.ErrNotFound {
  1949  		t.Fatalf("expected nil, but got %s", val)
  1950  	}
  1951  }
  1952  
  1953  func BenchmarkRotateMemtables(b *testing.B) {
  1954  	o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */}
  1955  	d, err := Open("", o)
  1956  	require.NoError(b, err)
  1957  
  1958  	// We want to jump to full-sized memtables.
  1959  	d.mu.Lock()
  1960  	d.mu.mem.nextSize = o.MemTableSize
  1961  	d.mu.Unlock()
  1962  	require.NoError(b, d.Flush())
  1963  
  1964  	b.ResetTimer()
  1965  	for i := 0; i < b.N; i++ {
  1966  		if err := d.Flush(); err != nil {
  1967  			b.Fatal(err)
  1968  		}
  1969  	}
  1970  }