github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/db_test.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"path/filepath"
    13  	"slices"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/errors"
    21  	"github.com/cockroachdb/pebble/internal/base"
    22  	"github.com/cockroachdb/pebble/internal/cache"
    23  	"github.com/cockroachdb/pebble/internal/invariants"
    24  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    25  	"github.com/cockroachdb/pebble/sstable"
    26  	"github.com/cockroachdb/pebble/vfs"
    27  	"github.com/stretchr/testify/require"
    28  	"golang.org/x/exp/rand"
    29  )
    30  
    31  // try repeatedly calls f, sleeping between calls with exponential back-off,
    32  // until f returns a nil error or the total sleep time is greater than or equal
    33  // to maxTotalSleep. It always calls f at least once.
    34  func try(initialSleep, maxTotalSleep time.Duration, f func() error) error {
    35  	totalSleep := time.Duration(0)
    36  	for d := initialSleep; ; d *= 2 {
    37  		time.Sleep(d)
    38  		totalSleep += d
    39  		if err := f(); err == nil || totalSleep >= maxTotalSleep {
    40  			return err
    41  		}
    42  	}
    43  }
    44  
    45  func TestTry(t *testing.T) {
    46  	c := make(chan struct{})
    47  	go func() {
    48  		time.Sleep(1 * time.Millisecond)
    49  		close(c)
    50  	}()
    51  
    52  	attemptsMu := sync.Mutex{}
    53  	attempts := 0
    54  
    55  	err := try(100*time.Microsecond, 20*time.Second, func() error {
    56  		attemptsMu.Lock()
    57  		attempts++
    58  		attemptsMu.Unlock()
    59  
    60  		select {
    61  		default:
    62  			return errors.New("timed out")
    63  		case <-c:
    64  			return nil
    65  		}
    66  	})
    67  	require.NoError(t, err)
    68  
    69  	attemptsMu.Lock()
    70  	a := attempts
    71  	attemptsMu.Unlock()
    72  
    73  	if a == 0 {
    74  		t.Fatalf("attempts: got 0, want > 0")
    75  	}
    76  }
    77  
    78  func TestBasicReads(t *testing.T) {
    79  	testCases := []struct {
    80  		dirname string
    81  		wantMap map[string]string
    82  	}{
    83  		{
    84  			"db-stage-1",
    85  			map[string]string{
    86  				"aaa":  "",
    87  				"bar":  "",
    88  				"baz":  "",
    89  				"foo":  "",
    90  				"quux": "",
    91  				"zzz":  "",
    92  			},
    93  		},
    94  		{
    95  			"db-stage-2",
    96  			map[string]string{
    97  				"aaa":  "",
    98  				"bar":  "",
    99  				"baz":  "three",
   100  				"foo":  "four",
   101  				"quux": "",
   102  				"zzz":  "",
   103  			},
   104  		},
   105  		{
   106  			"db-stage-3",
   107  			map[string]string{
   108  				"aaa":  "",
   109  				"bar":  "",
   110  				"baz":  "three",
   111  				"foo":  "four",
   112  				"quux": "",
   113  				"zzz":  "",
   114  			},
   115  		},
   116  		{
   117  			"db-stage-4",
   118  			map[string]string{
   119  				"aaa":  "",
   120  				"bar":  "",
   121  				"baz":  "",
   122  				"foo":  "five",
   123  				"quux": "six",
   124  				"zzz":  "",
   125  			},
   126  		},
   127  	}
   128  	for _, tc := range testCases {
   129  		t.Run(tc.dirname, func(t *testing.T) {
   130  			fs := vfs.NewMem()
   131  			_, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname)
   132  			if err != nil {
   133  				t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err)
   134  			}
   135  			d, err := Open(tc.dirname, testingRandomized(t, &Options{
   136  				FS: fs,
   137  			}))
   138  			if err != nil {
   139  				t.Fatalf("%s: Open failed: %v", tc.dirname, err)
   140  			}
   141  			for key, want := range tc.wantMap {
   142  				got, closer, err := d.Get([]byte(key))
   143  				if err != nil && err != ErrNotFound {
   144  					t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err)
   145  				}
   146  				if string(got) != string(want) {
   147  					t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want)
   148  				}
   149  				if closer != nil {
   150  					closer.Close()
   151  				}
   152  			}
   153  			err = d.Close()
   154  			if err != nil {
   155  				t.Fatalf("%s: Close failed: %v", tc.dirname, err)
   156  			}
   157  		})
   158  	}
   159  }
   160  
   161  func TestBasicWrites(t *testing.T) {
   162  	d, err := Open("", testingRandomized(t, &Options{
   163  		FS: vfs.NewMem(),
   164  	}))
   165  	require.NoError(t, err)
   166  
   167  	names := []string{
   168  		"Alatar",
   169  		"Gandalf",
   170  		"Pallando",
   171  		"Radagast",
   172  		"Saruman",
   173  		"Joe",
   174  	}
   175  	wantMap := map[string]string{}
   176  
   177  	inBatch, batch, pending := false, &Batch{}, [][]string(nil)
   178  	set0 := func(k, v string) error {
   179  		return d.Set([]byte(k), []byte(v), nil)
   180  	}
   181  	del0 := func(k string) error {
   182  		return d.Delete([]byte(k), nil)
   183  	}
   184  	set1 := func(k, v string) error {
   185  		batch.Set([]byte(k), []byte(v), nil)
   186  		return nil
   187  	}
   188  	del1 := func(k string) error {
   189  		batch.Delete([]byte(k), nil)
   190  		return nil
   191  	}
   192  	set, del := set0, del0
   193  
   194  	testCases := []string{
   195  		"set Gandalf Grey",
   196  		"set Saruman White",
   197  		"set Radagast Brown",
   198  		"delete Saruman",
   199  		"set Gandalf White",
   200  		"batch",
   201  		"  set Alatar AliceBlue",
   202  		"apply",
   203  		"delete Pallando",
   204  		"set Alatar AntiqueWhite",
   205  		"set Pallando PapayaWhip",
   206  		"batch",
   207  		"apply",
   208  		"set Pallando PaleVioletRed",
   209  		"batch",
   210  		"  delete Alatar",
   211  		"  set Gandalf GhostWhite",
   212  		"  set Saruman Seashell",
   213  		"  delete Saruman",
   214  		"  set Saruman SeaGreen",
   215  		"  set Radagast RosyBrown",
   216  		"  delete Pallando",
   217  		"apply",
   218  		"delete Radagast",
   219  		"delete Radagast",
   220  		"delete Radagast",
   221  		"set Gandalf Goldenrod",
   222  		"set Pallando PeachPuff",
   223  		"batch",
   224  		"  delete Joe",
   225  		"  delete Saruman",
   226  		"  delete Radagast",
   227  		"  delete Pallando",
   228  		"  delete Gandalf",
   229  		"  delete Alatar",
   230  		"apply",
   231  		"set Joe Plumber",
   232  	}
   233  	for i, tc := range testCases {
   234  		s := strings.Split(strings.TrimSpace(tc), " ")
   235  		switch s[0] {
   236  		case "set":
   237  			if err := set(s[1], s[2]); err != nil {
   238  				t.Fatalf("#%d %s: %v", i, tc, err)
   239  			}
   240  			if inBatch {
   241  				pending = append(pending, s)
   242  			} else {
   243  				wantMap[s[1]] = s[2]
   244  			}
   245  		case "delete":
   246  			if err := del(s[1]); err != nil {
   247  				t.Fatalf("#%d %s: %v", i, tc, err)
   248  			}
   249  			if inBatch {
   250  				pending = append(pending, s)
   251  			} else {
   252  				delete(wantMap, s[1])
   253  			}
   254  		case "batch":
   255  			inBatch, batch, set, del = true, &Batch{}, set1, del1
   256  		case "apply":
   257  			if err := d.Apply(batch, nil); err != nil {
   258  				t.Fatalf("#%d %s: %v", i, tc, err)
   259  			}
   260  			for _, p := range pending {
   261  				switch p[0] {
   262  				case "set":
   263  					wantMap[p[1]] = p[2]
   264  				case "delete":
   265  					delete(wantMap, p[1])
   266  				}
   267  			}
   268  			inBatch, pending, set, del = false, nil, set0, del0
   269  		default:
   270  			t.Fatalf("#%d %s: bad test case: %q", i, tc, s)
   271  		}
   272  
   273  		fail := false
   274  		for _, name := range names {
   275  			g, closer, err := d.Get([]byte(name))
   276  			if err != nil && err != ErrNotFound {
   277  				t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err)
   278  				fail = true
   279  			}
   280  			got, gOK := string(g), err == nil
   281  			want, wOK := wantMap[name]
   282  			if got != want || gOK != wOK {
   283  				t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t",
   284  					i, tc, name, got, gOK, want, wOK)
   285  				fail = true
   286  			}
   287  			if closer != nil {
   288  				closer.Close()
   289  			}
   290  		}
   291  		if fail {
   292  			return
   293  		}
   294  	}
   295  
   296  	require.NoError(t, d.Close())
   297  }
   298  
   299  func TestRandomWrites(t *testing.T) {
   300  	d, err := Open("", testingRandomized(t, &Options{
   301  		FS:           vfs.NewMem(),
   302  		MemTableSize: 8 * 1024,
   303  	}))
   304  	require.NoError(t, err)
   305  
   306  	keys := [64][]byte{}
   307  	wants := [64]int{}
   308  	for k := range keys {
   309  		keys[k] = []byte(strconv.Itoa(k))
   310  		wants[k] = -1
   311  	}
   312  	xxx := bytes.Repeat([]byte("x"), 512)
   313  
   314  	rng := rand.New(rand.NewSource(123))
   315  	const N = 1000
   316  	for i := 0; i < N; i++ {
   317  		k := rng.Intn(len(keys))
   318  		if rng.Intn(20) != 0 {
   319  			wants[k] = rng.Intn(len(xxx) + 1)
   320  			if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil {
   321  				t.Fatalf("i=%d: Set: %v", i, err)
   322  			}
   323  		} else {
   324  			wants[k] = -1
   325  			if err := d.Delete(keys[k], nil); err != nil {
   326  				t.Fatalf("i=%d: Delete: %v", i, err)
   327  			}
   328  		}
   329  
   330  		if i != N-1 || rng.Intn(50) != 0 {
   331  			continue
   332  		}
   333  		for k := range keys {
   334  			got := -1
   335  			if v, closer, err := d.Get(keys[k]); err != nil {
   336  				if err != ErrNotFound {
   337  					t.Fatalf("Get: %v", err)
   338  				}
   339  			} else {
   340  				got = len(v)
   341  				closer.Close()
   342  			}
   343  			if got != wants[k] {
   344  				t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k])
   345  			}
   346  		}
   347  	}
   348  
   349  	require.NoError(t, d.Close())
   350  }
   351  
   352  func TestLargeBatch(t *testing.T) {
   353  	d, err := Open("", testingRandomized(t, &Options{
   354  		FS:                          vfs.NewMem(),
   355  		MemTableSize:                1400,
   356  		MemTableStopWritesThreshold: 100,
   357  	}))
   358  	require.NoError(t, err)
   359  
   360  	verifyLSM := func(expected string) func() error {
   361  		return func() error {
   362  			d.mu.Lock()
   363  			s := d.mu.versions.currentVersion().String()
   364  			d.mu.Unlock()
   365  			if expected != s {
   366  				if testing.Verbose() {
   367  					fmt.Println(strings.TrimSpace(s))
   368  				}
   369  				return errors.Errorf("expected %s, but found %s", expected, s)
   370  			}
   371  			return nil
   372  		}
   373  	}
   374  
   375  	logNum := func() base.DiskFileNum {
   376  		d.mu.Lock()
   377  		defer d.mu.Unlock()
   378  		return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum
   379  	}
   380  	fileSize := func(fileNum base.DiskFileNum) int64 {
   381  		info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum))
   382  		require.NoError(t, err)
   383  		return info.Size()
   384  	}
   385  	memTableCreationSeqNum := func() uint64 {
   386  		d.mu.Lock()
   387  		defer d.mu.Unlock()
   388  		return d.mu.mem.mutable.logSeqNum
   389  	}
   390  
   391  	startLogNum := logNum()
   392  	startLogStartSize := fileSize(startLogNum)
   393  	startSeqNum := d.mu.versions.logSeqNum.Load()
   394  
   395  	// Write a key with a value larger than the memtable size.
   396  	require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil))
   397  
   398  	// Verify that the large batch was written to the WAL that existed before it
   399  	// was committed. We verify that WAL rotation occurred, where the large batch
   400  	// was written to, and that the new WAL is empty.
   401  	endLogNum := logNum()
   402  	if startLogNum == endLogNum {
   403  		t.Fatal("expected WAL rotation")
   404  	}
   405  	startLogEndSize := fileSize(startLogNum)
   406  	if startLogEndSize == startLogStartSize {
   407  		t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d",
   408  			startLogNum, startLogEndSize)
   409  	}
   410  	endLogSize := fileSize(endLogNum)
   411  	if endLogSize != 0 {
   412  		t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize)
   413  	}
   414  	if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum {
   415  		t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum)
   416  	}
   417  
   418  	// Verify this results in one L0 table being created.
   419  	require.NoError(t, try(100*time.Microsecond, 20*time.Second,
   420  		verifyLSM("0.0:\n  000005:[a#10,SET-a#10,SET]\n")))
   421  
   422  	require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil))
   423  
   424  	// Verify this results in a second L0 table being created.
   425  	require.NoError(t, try(100*time.Microsecond, 20*time.Second,
   426  		verifyLSM("0.0:\n  000005:[a#10,SET-a#10,SET]\n  000007:[b#11,SET-b#11,SET]\n")))
   427  
   428  	// Allocate a bunch of batches to exhaust the batchPool. None of these
   429  	// batches should have a non-zero count.
   430  	for i := 0; i < 10; i++ {
   431  		b := d.NewBatch()
   432  		require.EqualValues(t, 0, b.Count())
   433  	}
   434  
   435  	require.NoError(t, d.Close())
   436  }
   437  
   438  func TestGetNoCache(t *testing.T) {
   439  	cache := NewCache(0)
   440  	defer cache.Unref()
   441  
   442  	d, err := Open("", testingRandomized(t, &Options{
   443  		Cache: cache,
   444  		FS:    vfs.NewMem(),
   445  	}))
   446  	require.NoError(t, err)
   447  
   448  	require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil))
   449  	require.NoError(t, d.Flush())
   450  	verifyGet(t, d, []byte("a"), []byte("aa"))
   451  
   452  	require.NoError(t, d.Close())
   453  }
   454  
   455  func TestGetMerge(t *testing.T) {
   456  	d, err := Open("", testingRandomized(t, &Options{
   457  		FS: vfs.NewMem(),
   458  	}))
   459  	require.NoError(t, err)
   460  
   461  	key := []byte("a")
   462  	verify := func(expected string) {
   463  		val, closer, err := d.Get(key)
   464  		require.NoError(t, err)
   465  
   466  		if expected != string(val) {
   467  			t.Fatalf("expected %s, but got %s", expected, val)
   468  		}
   469  		closer.Close()
   470  	}
   471  
   472  	const val = "1"
   473  	for i := 1; i <= 3; i++ {
   474  		require.NoError(t, d.Merge(key, []byte(val), nil))
   475  
   476  		expected := strings.Repeat(val, i)
   477  		verify(expected)
   478  
   479  		require.NoError(t, d.Flush())
   480  		verify(expected)
   481  	}
   482  
   483  	require.NoError(t, d.Close())
   484  }
   485  
   486  func TestMergeOrderSameAfterFlush(t *testing.T) {
   487  	// Ensure compaction iterator (used by flush) and user iterator process merge
   488  	// operands in the same order
   489  	d, err := Open("", testingRandomized(t, &Options{
   490  		FS: vfs.NewMem(),
   491  	}))
   492  	require.NoError(t, err)
   493  
   494  	key := []byte("a")
   495  	verify := func(expected string) {
   496  		iter, _ := d.NewIter(nil)
   497  		if !iter.SeekGE([]byte("a")) {
   498  			t.Fatal("expected one value, but got empty iterator")
   499  		}
   500  		if expected != string(iter.Value()) {
   501  			t.Fatalf("expected %s, but got %s", expected, string(iter.Value()))
   502  		}
   503  		if !iter.SeekLT([]byte("b")) {
   504  			t.Fatal("expected one value, but got empty iterator")
   505  		}
   506  		if expected != string(iter.Value()) {
   507  			t.Fatalf("expected %s, but got %s", expected, string(iter.Value()))
   508  		}
   509  		require.NoError(t, iter.Close())
   510  	}
   511  
   512  	require.NoError(t, d.Merge(key, []byte("0"), nil))
   513  	require.NoError(t, d.Merge(key, []byte("1"), nil))
   514  
   515  	verify("01")
   516  	require.NoError(t, d.Flush())
   517  	verify("01")
   518  
   519  	require.NoError(t, d.Close())
   520  }
   521  
   522  type closableMerger struct {
   523  	lastBuf []byte
   524  	closed  bool
   525  }
   526  
   527  func (m *closableMerger) MergeNewer(value []byte) error {
   528  	m.lastBuf = append(m.lastBuf[:0], value...)
   529  	return nil
   530  }
   531  
   532  func (m *closableMerger) MergeOlder(value []byte) error {
   533  	m.lastBuf = append(m.lastBuf[:0], value...)
   534  	return nil
   535  }
   536  
   537  func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) {
   538  	return m.lastBuf, m, nil
   539  }
   540  
   541  func (m *closableMerger) Close() error {
   542  	m.closed = true
   543  	return nil
   544  }
   545  
   546  func TestMergerClosing(t *testing.T) {
   547  	m := &closableMerger{}
   548  
   549  	d, err := Open("", testingRandomized(t, &Options{
   550  		FS: vfs.NewMem(),
   551  		Merger: &Merger{
   552  			Merge: func(key, value []byte) (base.ValueMerger, error) {
   553  				return m, m.MergeNewer(value)
   554  			},
   555  		},
   556  	}))
   557  	require.NoError(t, err)
   558  
   559  	defer func() {
   560  		require.NoError(t, d.Close())
   561  	}()
   562  
   563  	err = d.Merge([]byte("a"), []byte("b"), nil)
   564  	require.NoError(t, err)
   565  	require.False(t, m.closed)
   566  
   567  	val, closer, err := d.Get([]byte("a"))
   568  	require.NoError(t, err)
   569  	require.Equal(t, []byte("b"), val)
   570  	require.NotNil(t, closer)
   571  	require.False(t, m.closed)
   572  	_ = closer.Close()
   573  	require.True(t, m.closed)
   574  }
   575  
   576  func TestLogData(t *testing.T) {
   577  	d, err := Open("", testingRandomized(t, &Options{
   578  		FS: vfs.NewMem(),
   579  	}))
   580  	require.NoError(t, err)
   581  
   582  	defer func() {
   583  		require.NoError(t, d.Close())
   584  	}()
   585  
   586  	require.NoError(t, d.LogData([]byte("foo"), Sync))
   587  	require.NoError(t, d.LogData([]byte("bar"), Sync))
   588  	// TODO(itsbilal): Confirm that we wrote some bytes to the WAL.
   589  	// For now, LogData proceeding ahead without a panic is good enough.
   590  }
   591  
   592  func TestSingleDeleteGet(t *testing.T) {
   593  	d, err := Open("", testingRandomized(t, &Options{
   594  		FS: vfs.NewMem(),
   595  	}))
   596  	require.NoError(t, err)
   597  	defer func() {
   598  		require.NoError(t, d.Close())
   599  	}()
   600  
   601  	key := []byte("key")
   602  	val := []byte("val")
   603  
   604  	require.NoError(t, d.Set(key, val, nil))
   605  	verifyGet(t, d, key, val)
   606  
   607  	key2 := []byte("key2")
   608  	val2 := []byte("val2")
   609  
   610  	require.NoError(t, d.Set(key2, val2, nil))
   611  	verifyGet(t, d, key2, val2)
   612  
   613  	require.NoError(t, d.SingleDelete(key2, nil))
   614  	verifyGetNotFound(t, d, key2)
   615  }
   616  
   617  func TestSingleDeleteFlush(t *testing.T) {
   618  	d, err := Open("", testingRandomized(t, &Options{
   619  		FS: vfs.NewMem(),
   620  	}))
   621  	require.NoError(t, err)
   622  	defer func() {
   623  		require.NoError(t, d.Close())
   624  	}()
   625  
   626  	key := []byte("key")
   627  	valFirst := []byte("first")
   628  	valSecond := []byte("second")
   629  	key2 := []byte("key2")
   630  	val2 := []byte("val2")
   631  
   632  	require.NoError(t, d.Set(key, valFirst, nil))
   633  	require.NoError(t, d.Set(key2, val2, nil))
   634  	require.NoError(t, d.Flush())
   635  
   636  	require.NoError(t, d.SingleDelete(key, nil))
   637  	require.NoError(t, d.Set(key, valSecond, nil))
   638  	require.NoError(t, d.Delete(key2, nil))
   639  	require.NoError(t, d.Set(key2, val2, nil))
   640  	require.NoError(t, d.Flush())
   641  
   642  	require.NoError(t, d.SingleDelete(key, nil))
   643  	require.NoError(t, d.Delete(key2, nil))
   644  	require.NoError(t, d.Flush())
   645  
   646  	verifyGetNotFound(t, d, key)
   647  	verifyGetNotFound(t, d, key2)
   648  }
   649  
   650  func TestUnremovableSingleDelete(t *testing.T) {
   651  	d, err := Open("", testingRandomized(t, &Options{
   652  		FS:                    vfs.NewMem(),
   653  		L0CompactionThreshold: 8,
   654  	}))
   655  	require.NoError(t, err)
   656  	defer func() {
   657  		require.NoError(t, d.Close())
   658  	}()
   659  
   660  	key := []byte("key")
   661  	valFirst := []byte("valFirst")
   662  	valSecond := []byte("valSecond")
   663  
   664  	require.NoError(t, d.Set(key, valFirst, nil))
   665  	ss := d.NewSnapshot()
   666  	defer ss.Close()
   667  	require.NoError(t, d.SingleDelete(key, nil))
   668  	require.NoError(t, d.Set(key, valSecond, nil))
   669  	require.NoError(t, d.Flush())
   670  
   671  	verifyGet(t, ss, key, valFirst)
   672  	verifyGet(t, d, key, valSecond)
   673  
   674  	require.NoError(t, d.SingleDelete(key, nil))
   675  
   676  	verifyGet(t, ss, key, valFirst)
   677  	verifyGetNotFound(t, d, key)
   678  
   679  	require.NoError(t, d.Flush())
   680  
   681  	verifyGet(t, ss, key, valFirst)
   682  	verifyGetNotFound(t, d, key)
   683  }
   684  
   685  func TestIterLeak(t *testing.T) {
   686  	for _, leak := range []bool{true, false} {
   687  		t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) {
   688  			for _, flush := range []bool{true, false} {
   689  				t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) {
   690  					d, err := Open("", testingRandomized(t, &Options{
   691  						FS: vfs.NewMem(),
   692  					}))
   693  					require.NoError(t, err)
   694  
   695  					require.NoError(t, d.Set([]byte("a"), []byte("a"), nil))
   696  					if flush {
   697  						require.NoError(t, d.Flush())
   698  					}
   699  					iter, _ := d.NewIter(nil)
   700  					iter.First()
   701  					if !leak {
   702  						require.NoError(t, iter.Close())
   703  						require.NoError(t, d.Close())
   704  					} else {
   705  						defer iter.Close()
   706  						if err := d.Close(); err == nil {
   707  							t.Fatalf("expected failure, but found success")
   708  						} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   709  							t.Fatalf("expected leaked iterators, but found %+v", err)
   710  						} else {
   711  							t.Log(err.Error())
   712  						}
   713  					}
   714  				})
   715  			}
   716  		})
   717  	}
   718  }
   719  
   720  // Make sure that we detect an iter leak when only one DB closes
   721  // while the second db still holds a reference to the TableCache.
   722  func TestIterLeakSharedCache(t *testing.T) {
   723  	for _, leak := range []bool{true, false} {
   724  		t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) {
   725  			for _, flush := range []bool{true, false} {
   726  				t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) {
   727  					d1, err := Open("", &Options{
   728  						FS: vfs.NewMem(),
   729  					})
   730  					require.NoError(t, err)
   731  
   732  					d2, err := Open("", &Options{
   733  						FS: vfs.NewMem(),
   734  					})
   735  					require.NoError(t, err)
   736  
   737  					require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil))
   738  					if flush {
   739  						require.NoError(t, d1.Flush())
   740  					}
   741  
   742  					require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil))
   743  					if flush {
   744  						require.NoError(t, d2.Flush())
   745  					}
   746  
   747  					// Check if leak detection works with only one db closing.
   748  					{
   749  						iter1, _ := d1.NewIter(nil)
   750  						iter1.First()
   751  						if !leak {
   752  							require.NoError(t, iter1.Close())
   753  							require.NoError(t, d1.Close())
   754  						} else {
   755  							defer iter1.Close()
   756  							if err := d1.Close(); err == nil {
   757  								t.Fatalf("expected failure, but found success")
   758  							} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   759  								t.Fatalf("expected leaked iterators, but found %+v", err)
   760  							} else {
   761  								t.Log(err.Error())
   762  							}
   763  						}
   764  					}
   765  
   766  					{
   767  						iter2, _ := d2.NewIter(nil)
   768  						iter2.First()
   769  						if !leak {
   770  							require.NoError(t, iter2.Close())
   771  							require.NoError(t, d2.Close())
   772  						} else {
   773  							defer iter2.Close()
   774  							if err := d2.Close(); err == nil {
   775  								t.Fatalf("expected failure, but found success")
   776  							} else if !strings.HasPrefix(err.Error(), "leaked iterators:") {
   777  								t.Fatalf("expected leaked iterators, but found %+v", err)
   778  							} else {
   779  								t.Log(err.Error())
   780  							}
   781  						}
   782  					}
   783  
   784  				})
   785  			}
   786  		})
   787  	}
   788  }
   789  
   790  func TestMemTableReservation(t *testing.T) {
   791  	opts := &Options{
   792  		Cache:        NewCache(128 << 10 /* 128 KB */),
   793  		MemTableSize: initialMemTableSize,
   794  		FS:           vfs.NewMem(),
   795  	}
   796  	defer opts.Cache.Unref()
   797  	opts.testingRandomized(t)
   798  	opts.EnsureDefaults()
   799  	// We're going to be looking at and asserting the global memtable reservation
   800  	// amount below so we don't want to race with any triggered stats collections.
   801  	opts.private.disableTableStats = true
   802  
   803  	// Add a block to the cache. Note that the memtable size is larger than the
   804  	// cache size, so opening the DB should cause this block to be evicted.
   805  	tmpID := opts.Cache.NewID()
   806  	helloWorld := []byte("hello world")
   807  	value := cache.Alloc(len(helloWorld))
   808  	copy(value.Buf(), helloWorld)
   809  	opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release()
   810  
   811  	d, err := Open("", opts)
   812  	require.NoError(t, err)
   813  
   814  	checkReserved := func(expected int64) {
   815  		t.Helper()
   816  		if reserved := d.memTableReserved.Load(); expected != reserved {
   817  			t.Fatalf("expected %d reserved, but found %d", expected, reserved)
   818  		}
   819  	}
   820  
   821  	checkReserved(int64(opts.MemTableSize))
   822  	if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 {
   823  		t.Fatalf("expected 2 refs, but found %d", refs)
   824  	}
   825  	// Verify the memtable reservation has caused our test block to be evicted.
   826  	if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil {
   827  		t.Fatalf("expected failure, but found success: %s", h.Get())
   828  	}
   829  
   830  	// Flush the memtable. The memtable reservation should double because old
   831  	// memtable will be recycled, saved for the next memtable allocation.
   832  	require.NoError(t, d.Flush())
   833  	checkReserved(int64(2 * opts.MemTableSize))
   834  	// Flush again. The memtable reservation should be unchanged because at most
   835  	// 1 memtable may be preserved for recycling.
   836  
   837  	// Flush in the presence of an active iterator. The iterator will hold a
   838  	// reference to a readState which will in turn hold a reader reference to the
   839  	// memtable.
   840  	iter, _ := d.NewIter(nil)
   841  	require.NoError(t, d.Flush())
   842  	// The flush moved the recycled memtable into position as an active mutable
   843  	// memtable. There are now two allocated memtables: 1 mutable and 1 pinned
   844  	// by the iterator's read state.
   845  	checkReserved(2 * int64(opts.MemTableSize))
   846  
   847  	// Flushing again should increase the reservation total to 3x: 1 active
   848  	// mutable, 1 for recycling, 1 pinned by iterator's read state.
   849  	require.NoError(t, d.Flush())
   850  	checkReserved(3 * int64(opts.MemTableSize))
   851  
   852  	// Closing the iterator will release the iterator's read state, and the old
   853  	// memtable will be moved into position as the next memtable to recycle.
   854  	// There was already a memtable ready to be recycled, so that memtable will
   855  	// be freed and the overall reservation total is reduced to 2x.
   856  	require.NoError(t, iter.Close())
   857  	checkReserved(2 * int64(opts.MemTableSize))
   858  
   859  	require.NoError(t, d.Close())
   860  }
   861  
   862  func TestMemTableReservationLeak(t *testing.T) {
   863  	d, err := Open("", &Options{FS: vfs.NewMem()})
   864  	require.NoError(t, err)
   865  
   866  	d.mu.Lock()
   867  	last := d.mu.mem.queue[len(d.mu.mem.queue)-1]
   868  	last.readerRef()
   869  	defer func() {
   870  		last.readerUnref(true)
   871  	}()
   872  	d.mu.Unlock()
   873  	if err := d.Close(); err == nil {
   874  		t.Fatalf("expected failure, but found success")
   875  	} else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") {
   876  		t.Fatalf("expected leaked memtable reservation, but found %+v", err)
   877  	} else {
   878  		t.Log(err.Error())
   879  	}
   880  }
   881  
   882  func TestCacheEvict(t *testing.T) {
   883  	cache := NewCache(10 << 20)
   884  	defer cache.Unref()
   885  
   886  	d, err := Open("", &Options{
   887  		Cache: cache,
   888  		FS:    vfs.NewMem(),
   889  	})
   890  	require.NoError(t, err)
   891  
   892  	for i := 0; i < 1000; i++ {
   893  		key := []byte(fmt.Sprintf("%04d", i))
   894  		require.NoError(t, d.Set(key, key, nil))
   895  	}
   896  
   897  	require.NoError(t, d.Flush())
   898  	iter, _ := d.NewIter(nil)
   899  	for iter.First(); iter.Valid(); iter.Next() {
   900  	}
   901  	require.NoError(t, iter.Close())
   902  
   903  	if size := cache.Size(); size == 0 {
   904  		t.Fatalf("expected non-zero cache size")
   905  	}
   906  
   907  	for i := 0; i < 1000; i++ {
   908  		key := []byte(fmt.Sprintf("%04d", i))
   909  		require.NoError(t, d.Delete(key, nil))
   910  	}
   911  
   912  	require.NoError(t, d.Compact([]byte("0"), []byte("1"), false))
   913  
   914  	require.NoError(t, d.Close())
   915  
   916  	if size := cache.Size(); size != 0 {
   917  		t.Fatalf("expected empty cache, but found %d", size)
   918  	}
   919  }
   920  
   921  func TestFlushEmpty(t *testing.T) {
   922  	d, err := Open("", testingRandomized(t, &Options{
   923  		FS: vfs.NewMem(),
   924  	}))
   925  	require.NoError(t, err)
   926  
   927  	// Flushing an empty memtable should not fail.
   928  	require.NoError(t, d.Flush())
   929  	require.NoError(t, d.Close())
   930  }
   931  
   932  func TestRollManifest(t *testing.T) {
   933  	toPreserve := rand.Int31n(5) + 1
   934  	opts := &Options{
   935  		MaxManifestFileSize:   1,
   936  		L0CompactionThreshold: 10,
   937  		L0StopWritesThreshold: 1000,
   938  		FS:                    vfs.NewMem(),
   939  		NumPrevManifest:       int(toPreserve),
   940  	}
   941  	opts.DisableAutomaticCompactions = true
   942  	opts.testingRandomized(t)
   943  	d, err := Open("", opts)
   944  	require.NoError(t, err)
   945  
   946  	manifestFileNumber := func() base.DiskFileNum {
   947  		d.mu.Lock()
   948  		defer d.mu.Unlock()
   949  		return d.mu.versions.manifestFileNum
   950  	}
   951  	sizeRolloverState := func() (int64, int64) {
   952  		d.mu.Lock()
   953  		defer d.mu.Unlock()
   954  		return d.mu.versions.rotationHelper.DebugInfo()
   955  	}
   956  
   957  	current := func() string {
   958  		desc, err := Peek(d.dirname, d.opts.FS)
   959  		require.NoError(t, err)
   960  		return desc.ManifestFilename
   961  	}
   962  
   963  	lastManifestNum := manifestFileNumber()
   964  	manifestNums := []base.DiskFileNum{lastManifestNum}
   965  	for i := 0; i < 5; i++ {
   966  		// MaxManifestFileSize is 1, but the rollover logic also counts edits
   967  		// since the last snapshot to decide on rollover, so do as many flushes as
   968  		// it demands.
   969  		lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState()
   970  		var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64
   971  		switch i {
   972  		case 0:
   973  			// DB is empty.
   974  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0
   975  		case 1:
   976  			// First edit that caused rollover is not in the snapshot.
   977  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1
   978  		case 2:
   979  			// One flush is in the snapshot. One flush in the edit.
   980  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1
   981  		case 3:
   982  			// Two flushes in the snapshot. One flush in the edit. Will need to do
   983  			// two more flushes, the first of which will be in the next snapshot.
   984  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1
   985  		case 4:
   986  			// Four flushes in the snapshot. One flush in the edit. Will need to do
   987  			// four more flushes, three of which will be in the snapshot.
   988  			expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1
   989  		}
   990  		require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount)
   991  		require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount)
   992  		// Number of flushes to do to trigger the rollover.
   993  		steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1)
   994  		// Steps can be <= 0, but we need to do at least one edit to trigger the
   995  		// rollover logic.
   996  		if steps <= 0 {
   997  			steps = 1
   998  		}
   999  		for j := 0; j < steps; j++ {
  1000  			require.NoError(t, d.Set([]byte("a"), nil, nil))
  1001  			require.NoError(t, d.Flush())
  1002  		}
  1003  		d.TestOnlyWaitForCleaning()
  1004  		num := manifestFileNumber()
  1005  		if lastManifestNum == num {
  1006  			t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num)
  1007  		}
  1008  
  1009  		manifestNums = append(manifestNums, num)
  1010  		lastManifestNum = num
  1011  
  1012  		expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum)
  1013  		if v := current(); expectedCurrent != v {
  1014  			t.Fatalf("expected %s, but found %s", expectedCurrent, v)
  1015  		}
  1016  	}
  1017  	lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState()
  1018  	require.EqualValues(t, 8, lastSnapshotCount)
  1019  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1020  
  1021  	files, err := d.opts.FS.List("")
  1022  	require.NoError(t, err)
  1023  
  1024  	var manifests []string
  1025  	for _, filename := range files {
  1026  		fileType, _, ok := base.ParseFilename(d.opts.FS, filename)
  1027  		if !ok {
  1028  			continue
  1029  		}
  1030  		if fileType == fileTypeManifest {
  1031  			manifests = append(manifests, filename)
  1032  		}
  1033  	}
  1034  	slices.Sort(manifests)
  1035  
  1036  	var expected []string
  1037  	for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ {
  1038  		expected = append(
  1039  			expected,
  1040  			fmt.Sprintf("MANIFEST-%s", manifestNums[i]),
  1041  		)
  1042  	}
  1043  	require.EqualValues(t, expected, manifests)
  1044  
  1045  	// Test the logic that uses the future snapshot size to rollover.
  1046  	// Reminder: we have a snapshot with 8 files and the manifest has 1 edit
  1047  	// (flush) with 1 file.
  1048  	// Add 8 more files with a different key.
  1049  	lastManifestNum = manifestFileNumber()
  1050  	for j := 0; j < 8; j++ {
  1051  		require.NoError(t, d.Set([]byte("c"), nil, nil))
  1052  		require.NoError(t, d.Flush())
  1053  	}
  1054  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1055  	// Need 16 more files in edits to trigger a rollover.
  1056  	require.EqualValues(t, 16, lastSnapshotCount)
  1057  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1058  	require.NotEqual(t, manifestFileNumber(), lastManifestNum)
  1059  	lastManifestNum = manifestFileNumber()
  1060  	// Do a compaction that moves 8 of the files from L0 to 1 file in L6. This
  1061  	// adds 9 files in edits. We still need 6 more files in edits based on the
  1062  	// last snapshot. But the current version has only 9 L0 files and 1 L6 file,
  1063  	// for a total of 10 files. So 1 flush should push us over that threshold.
  1064  	d.Compact([]byte("c"), []byte("d"), false)
  1065  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1066  	require.EqualValues(t, 16, lastSnapshotCount)
  1067  	require.EqualValues(t, 10, editsSinceSnapshotCount)
  1068  	require.Equal(t, manifestFileNumber(), lastManifestNum)
  1069  	require.NoError(t, d.Set([]byte("c"), nil, nil))
  1070  	require.NoError(t, d.Flush())
  1071  	lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState()
  1072  	require.EqualValues(t, 10, lastSnapshotCount)
  1073  	require.EqualValues(t, 1, editsSinceSnapshotCount)
  1074  	require.NotEqual(t, manifestFileNumber(), lastManifestNum)
  1075  
  1076  	require.NoError(t, d.Close())
  1077  }
  1078  
  1079  func TestDBClosed(t *testing.T) {
  1080  	d, err := Open("", &Options{
  1081  		FS: vfs.NewMem(),
  1082  	})
  1083  	require.NoError(t, err)
  1084  	require.NoError(t, d.Close())
  1085  
  1086  	catch := func(f func()) (err error) {
  1087  		defer func() {
  1088  			if r := recover(); r != nil {
  1089  				err = r.(error)
  1090  			}
  1091  		}()
  1092  		f()
  1093  		return nil
  1094  	}
  1095  
  1096  	require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed))
  1097  
  1098  	require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed))
  1099  	require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed))
  1100  	require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed))
  1101  
  1102  	require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed))
  1103  	require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed))
  1104  	require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed))
  1105  	require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed))
  1106  	require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed))
  1107  	require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed))
  1108  	require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed))
  1109  	require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed))
  1110  
  1111  	require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed))
  1112  
  1113  	b := d.NewIndexedBatch()
  1114  	require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed))
  1115  	require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed))
  1116  	require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed))
  1117  }
  1118  
  1119  func TestDBConcurrentCommitCompactFlush(t *testing.T) {
  1120  	d, err := Open("", testingRandomized(t, &Options{
  1121  		FS: vfs.NewMem(),
  1122  	}))
  1123  	require.NoError(t, err)
  1124  
  1125  	// Concurrently commit, compact, and flush in order to stress the locking around
  1126  	// those operations.
  1127  	const n = 1000
  1128  	var wg sync.WaitGroup
  1129  	wg.Add(n)
  1130  	for i := 0; i < n; i++ {
  1131  		go func(i int) {
  1132  			defer wg.Done()
  1133  			_ = d.Set([]byte(fmt.Sprint(i)), nil, nil)
  1134  			var err error
  1135  			switch i % 3 {
  1136  			case 0:
  1137  				err = d.Compact(nil, []byte("\xff"), false)
  1138  			case 1:
  1139  				err = d.Flush()
  1140  			case 2:
  1141  				_, err = d.AsyncFlush()
  1142  			}
  1143  			require.NoError(t, err)
  1144  		}(i)
  1145  	}
  1146  	wg.Wait()
  1147  
  1148  	require.NoError(t, d.Close())
  1149  }
  1150  
  1151  func TestDBConcurrentCompactClose(t *testing.T) {
  1152  	// Test closing while a compaction is ongoing. This ensures compaction code
  1153  	// detects the close and finishes cleanly.
  1154  	mem := vfs.NewMem()
  1155  	for i := 0; i < 100; i++ {
  1156  		opts := &Options{
  1157  			FS: mem,
  1158  			MaxConcurrentCompactions: func() int {
  1159  				return 2
  1160  			},
  1161  		}
  1162  		d, err := Open("", testingRandomized(t, opts))
  1163  		require.NoError(t, err)
  1164  
  1165  		// Ingest a series of files containing a single key each. As the outer
  1166  		// loop progresses, these ingestions will build up compaction debt
  1167  		// causing compactions to be running concurrently with the close below.
  1168  		for j := 0; j < 10; j++ {
  1169  			path := fmt.Sprintf("ext%d", j)
  1170  			f, err := mem.Create(path)
  1171  			require.NoError(t, err)
  1172  			w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1173  				TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1174  			})
  1175  			require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil))
  1176  			require.NoError(t, w.Close())
  1177  			require.NoError(t, d.Ingest([]string{path}))
  1178  		}
  1179  
  1180  		require.NoError(t, d.Close())
  1181  	}
  1182  }
  1183  
  1184  func TestDBApplyBatchNilDB(t *testing.T) {
  1185  	d, err := Open("", &Options{FS: vfs.NewMem()})
  1186  	require.NoError(t, err)
  1187  
  1188  	b1 := &Batch{}
  1189  	b1.Set([]byte("test"), nil, nil)
  1190  
  1191  	b2 := &Batch{}
  1192  	b2.Apply(b1, nil)
  1193  	if b2.memTableSize != 0 {
  1194  		t.Fatalf("expected memTableSize to not be set")
  1195  	}
  1196  	require.NoError(t, d.Apply(b2, nil))
  1197  	if b1.memTableSize != b2.memTableSize {
  1198  		t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize)
  1199  	}
  1200  
  1201  	require.NoError(t, d.Close())
  1202  }
  1203  
  1204  func TestDBApplyBatchMismatch(t *testing.T) {
  1205  	srcDB, err := Open("", &Options{FS: vfs.NewMem()})
  1206  	require.NoError(t, err)
  1207  
  1208  	applyDB, err := Open("", &Options{FS: vfs.NewMem()})
  1209  	require.NoError(t, err)
  1210  
  1211  	err = func() (err error) {
  1212  		defer func() {
  1213  			if v := recover(); v != nil {
  1214  				err = errors.Errorf("%v", v)
  1215  			}
  1216  		}()
  1217  
  1218  		b := srcDB.NewBatch()
  1219  		b.Set([]byte("test"), nil, nil)
  1220  		return applyDB.Apply(b, nil)
  1221  	}()
  1222  	if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") {
  1223  		t.Fatalf("expected error, but found %v", err)
  1224  	}
  1225  
  1226  	require.NoError(t, srcDB.Close())
  1227  	require.NoError(t, applyDB.Close())
  1228  }
  1229  
  1230  func TestCloseCleanerRace(t *testing.T) {
  1231  	mem := vfs.NewMem()
  1232  	for i := 0; i < 20; i++ {
  1233  		db, err := Open("", testingRandomized(t, &Options{FS: mem}))
  1234  		require.NoError(t, err)
  1235  		require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync))
  1236  		require.NoError(t, db.Flush())
  1237  		// Ref the sstables so cannot be deleted.
  1238  		it, _ := db.NewIter(nil)
  1239  		require.NotNil(t, it)
  1240  		require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync))
  1241  		require.NoError(t, db.Compact([]byte("a"), []byte("b"), false))
  1242  		// Only the iterator is keeping the sstables alive.
  1243  		files, err := mem.List("/")
  1244  		require.NoError(t, err)
  1245  		var found bool
  1246  		for _, f := range files {
  1247  			if strings.HasSuffix(f, ".sst") {
  1248  				found = true
  1249  				break
  1250  			}
  1251  		}
  1252  		require.True(t, found)
  1253  		// Close the iterator and the db in succession so file cleaning races with DB.Close() --
  1254  		// latter should wait for file cleaning to finish.
  1255  		require.NoError(t, it.Close())
  1256  		require.NoError(t, db.Close())
  1257  		files, err = mem.List("/")
  1258  		require.NoError(t, err)
  1259  		for _, f := range files {
  1260  			if strings.HasSuffix(f, ".sst") {
  1261  				t.Fatalf("found sst: %s", f)
  1262  			}
  1263  		}
  1264  	}
  1265  }
  1266  
  1267  func TestSSTablesWithApproximateSpanBytes(t *testing.T) {
  1268  	d, err := Open("", &Options{
  1269  		FS: vfs.NewMem(),
  1270  	})
  1271  	require.NoError(t, err)
  1272  	defer func() {
  1273  		if d != nil {
  1274  			require.NoError(t, d.Close())
  1275  		}
  1276  	}()
  1277  
  1278  	// Create two sstables.
  1279  	// sstable is contained within keyspan (fileNum = 5).
  1280  	require.NoError(t, d.Set([]byte("c"), nil, nil))
  1281  	require.NoError(t, d.Set([]byte("d"), nil, nil))
  1282  	require.NoError(t, d.Flush())
  1283  
  1284  	// sstable partially overlaps keyspan (fileNum = 7).
  1285  	require.NoError(t, d.Set([]byte("d"), nil, nil))
  1286  	require.NoError(t, d.Set([]byte("g"), nil, nil))
  1287  	require.NoError(t, d.Flush())
  1288  
  1289  	// cannot use WithApproximateSpanBytes without WithProperties.
  1290  	_, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes())
  1291  	require.Error(t, err)
  1292  
  1293  	// cannot use WithApproximateSpanBytes without WithKeyRangeFilter.
  1294  	_, err = d.SSTables(WithProperties(), WithApproximateSpanBytes())
  1295  	require.Error(t, err)
  1296  
  1297  	tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes())
  1298  	require.NoError(t, err)
  1299  
  1300  	for _, levelTables := range tableInfos {
  1301  		for _, table := range levelTables {
  1302  			approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64)
  1303  			require.NoError(t, err)
  1304  			if table.FileNum == 5 {
  1305  				require.Equal(t, uint64(approximateSpanBytes), table.Size)
  1306  			}
  1307  			if table.FileNum == 7 {
  1308  				require.Less(t, uint64(approximateSpanBytes), table.Size)
  1309  			}
  1310  		}
  1311  	}
  1312  }
  1313  
  1314  func TestFilterSSTablesWithOption(t *testing.T) {
  1315  	d, err := Open("", &Options{
  1316  		FS: vfs.NewMem(),
  1317  	})
  1318  	require.NoError(t, err)
  1319  	defer func() {
  1320  		if d != nil {
  1321  			require.NoError(t, d.Close())
  1322  		}
  1323  	}()
  1324  
  1325  	// Create two sstables.
  1326  	require.NoError(t, d.Set([]byte("/Table/5"), nil, nil))
  1327  	require.NoError(t, d.Flush())
  1328  	require.NoError(t, d.Set([]byte("/Table/10"), nil, nil))
  1329  	require.NoError(t, d.Flush())
  1330  
  1331  	tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6")))
  1332  	require.NoError(t, err)
  1333  
  1334  	totalTables := 0
  1335  	for _, levelTables := range tableInfos {
  1336  		totalTables += len(levelTables)
  1337  	}
  1338  
  1339  	// with filter second sstable should not be returned
  1340  	require.EqualValues(t, 1, totalTables)
  1341  
  1342  	tableInfos, err = d.SSTables()
  1343  	require.NoError(t, err)
  1344  
  1345  	totalTables = 0
  1346  	for _, levelTables := range tableInfos {
  1347  		totalTables += len(levelTables)
  1348  	}
  1349  
  1350  	// without filter
  1351  	require.EqualValues(t, 2, totalTables)
  1352  }
  1353  
  1354  func TestSSTables(t *testing.T) {
  1355  	d, err := Open("", &Options{
  1356  		FS: vfs.NewMem(),
  1357  	})
  1358  	require.NoError(t, err)
  1359  	defer func() {
  1360  		if d != nil {
  1361  			require.NoError(t, d.Close())
  1362  		}
  1363  	}()
  1364  
  1365  	// Create two sstables.
  1366  	require.NoError(t, d.Set([]byte("hello"), nil, nil))
  1367  	require.NoError(t, d.Flush())
  1368  	require.NoError(t, d.Set([]byte("world"), nil, nil))
  1369  	require.NoError(t, d.Flush())
  1370  
  1371  	// by default returned table infos should not contain Properties
  1372  	tableInfos, err := d.SSTables()
  1373  	require.NoError(t, err)
  1374  	for _, levelTables := range tableInfos {
  1375  		for _, info := range levelTables {
  1376  			require.Nil(t, info.Properties)
  1377  		}
  1378  	}
  1379  
  1380  	// with opt `WithProperties()` the `Properties` in table info should not be nil
  1381  	tableInfos, err = d.SSTables(WithProperties())
  1382  	require.NoError(t, err)
  1383  	for _, levelTables := range tableInfos {
  1384  		for _, info := range levelTables {
  1385  			require.NotNil(t, info.Properties)
  1386  		}
  1387  	}
  1388  }
  1389  
  1390  type testTracer struct {
  1391  	enabledOnlyForNonBackgroundContext bool
  1392  	buf                                strings.Builder
  1393  }
  1394  
  1395  func (t *testTracer) Infof(format string, args ...interface{})  {}
  1396  func (t *testTracer) Errorf(format string, args ...interface{}) {}
  1397  func (t *testTracer) Fatalf(format string, args ...interface{}) {}
  1398  
  1399  func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) {
  1400  	if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() {
  1401  		return
  1402  	}
  1403  	fmt.Fprintf(&t.buf, format, args...)
  1404  	fmt.Fprint(&t.buf, "\n")
  1405  }
  1406  
  1407  func (t *testTracer) IsTracingEnabled(ctx context.Context) bool {
  1408  	if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() {
  1409  		return false
  1410  	}
  1411  	return true
  1412  }
  1413  
  1414  func TestTracing(t *testing.T) {
  1415  	if !invariants.Enabled {
  1416  		// The test relies on timing behavior injected when invariants.Enabled.
  1417  		return
  1418  	}
  1419  	var tracer testTracer
  1420  	c := NewCache(0)
  1421  	defer c.Unref()
  1422  	d, err := Open("", &Options{
  1423  		FS:              vfs.NewMem(),
  1424  		Cache:           c,
  1425  		LoggerAndTracer: &tracer,
  1426  	})
  1427  	require.NoError(t, err)
  1428  	defer func() {
  1429  		require.NoError(t, d.Close())
  1430  	}()
  1431  
  1432  	// Create a sstable.
  1433  	require.NoError(t, d.Set([]byte("hello"), nil, nil))
  1434  	require.NoError(t, d.Flush())
  1435  	_, closer, err := d.Get([]byte("hello"))
  1436  	require.NoError(t, err)
  1437  	closer.Close()
  1438  	readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n"
  1439  	iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n"
  1440  	require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String())
  1441  
  1442  	// Get again, but since it currently uses context.Background(), no trace
  1443  	// output is produced.
  1444  	tracer.buf.Reset()
  1445  	tracer.enabledOnlyForNonBackgroundContext = true
  1446  	_, closer, err = d.Get([]byte("hello"))
  1447  	require.NoError(t, err)
  1448  	closer.Close()
  1449  	require.Equal(t, "", tracer.buf.String())
  1450  
  1451  	ctx, cancel := context.WithCancel(context.Background())
  1452  	defer cancel()
  1453  	iter, _ := d.NewIterWithContext(ctx, nil)
  1454  	iter.SeekGE([]byte("hello"))
  1455  	iter.Close()
  1456  	require.Equal(t, iterTraceString, tracer.buf.String())
  1457  
  1458  	tracer.buf.Reset()
  1459  	snap := d.NewSnapshot()
  1460  	iter, _ = snap.NewIterWithContext(ctx, nil)
  1461  	iter.SeekGE([]byte("hello"))
  1462  	iter.Close()
  1463  	require.Equal(t, iterTraceString, tracer.buf.String())
  1464  	snap.Close()
  1465  
  1466  	tracer.buf.Reset()
  1467  	b := d.NewIndexedBatch()
  1468  	iter, err = b.NewIterWithContext(ctx, nil)
  1469  	require.NoError(t, err)
  1470  	iter.SeekGE([]byte("hello"))
  1471  	iter.Close()
  1472  	require.Equal(t, iterTraceString, tracer.buf.String())
  1473  	b.Close()
  1474  }
  1475  
  1476  func TestMemtableIngestInversion(t *testing.T) {
  1477  	memFS := vfs.NewMem()
  1478  	opts := &Options{
  1479  		FS:                          memFS,
  1480  		MemTableSize:                256 << 10, // 4KB
  1481  		MemTableStopWritesThreshold: 1000,
  1482  		L0StopWritesThreshold:       1000,
  1483  		L0CompactionThreshold:       2,
  1484  		MaxConcurrentCompactions: func() int {
  1485  			return 1000
  1486  		},
  1487  	}
  1488  
  1489  	const channelTimeout = 5 * time.Second
  1490  
  1491  	// We induce delay in compactions by passing in an EventListener that stalls on
  1492  	// the first TableCreated event for a compaction job we want to block.
  1493  	// FlushBegin and CompactionBegin has info on compaction start/output levels
  1494  	// which is what we need to identify what compactions to block. However
  1495  	// FlushBegin and CompactionBegin are called while holding db.mu, so we cannot
  1496  	// block those events forever. Instead, we grab the job ID from those events
  1497  	// and store it. Then during TableCreated, we check if we're creating an output
  1498  	// for a job we have identified earlier as one to block, and then hold on a
  1499  	// semaphore there until there's a signal from the test code to resume with the
  1500  	// compaction.
  1501  	//
  1502  	// If nextBlockedCompaction is non-zero, we must block the next compaction
  1503  	// out of the nextBlockedCompaction - 3 start level. 1 means block the next
  1504  	// intra-L0 compaction and 2 means block the next flush (as flushes have
  1505  	// a -1 start level).
  1506  	var nextBlockedCompaction, blockedJobID int
  1507  	var blockedCompactionsMu sync.Mutex // protects the above two variables.
  1508  	nextSem := make(chan chan struct{}, 1)
  1509  	var el EventListener
  1510  	el.EnsureDefaults(testLogger{t: t})
  1511  	el.FlushBegin = func(info FlushInfo) {
  1512  		blockedCompactionsMu.Lock()
  1513  		defer blockedCompactionsMu.Unlock()
  1514  		if nextBlockedCompaction == 2 {
  1515  			nextBlockedCompaction = 0
  1516  			blockedJobID = info.JobID
  1517  		}
  1518  	}
  1519  	el.CompactionBegin = func(info CompactionInfo) {
  1520  		// 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush,
  1521  		// 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on.
  1522  		blockedCompactionsMu.Lock()
  1523  		defer blockedCompactionsMu.Unlock()
  1524  		blockValue := info.Input[0].Level + 3
  1525  		if info.Input[0].Level == 0 && info.Output.Level == 0 {
  1526  			// Intra L0 compaction, denoted by casValue of 1.
  1527  			blockValue = 1
  1528  		}
  1529  		if nextBlockedCompaction == blockValue {
  1530  			nextBlockedCompaction = 0
  1531  			blockedJobID = info.JobID
  1532  		}
  1533  	}
  1534  	el.TableCreated = func(info TableCreateInfo) {
  1535  		blockedCompactionsMu.Lock()
  1536  		if info.JobID != blockedJobID {
  1537  			blockedCompactionsMu.Unlock()
  1538  			return
  1539  		}
  1540  		blockedJobID = 0
  1541  		blockedCompactionsMu.Unlock()
  1542  		sem := make(chan struct{})
  1543  		nextSem <- sem
  1544  		<-sem
  1545  	}
  1546  	tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el)
  1547  	opts.EventListener = &tel
  1548  	opts.Experimental.L0CompactionConcurrency = 1
  1549  	d, err := Open("", opts)
  1550  	require.NoError(t, err)
  1551  	defer func() {
  1552  		if d != nil {
  1553  			require.NoError(t, d.Close())
  1554  		}
  1555  	}()
  1556  
  1557  	printLSM := func() {
  1558  		d.mu.Lock()
  1559  		s := d.mu.versions.currentVersion().String()
  1560  		d.mu.Unlock()
  1561  		t.Logf("%s", s)
  1562  	}
  1563  
  1564  	// Create some sstables. These should go into L6. These are irrelevant for
  1565  	// the rest of the test.
  1566  	require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil))
  1567  	require.NoError(t, d.Flush())
  1568  	require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil))
  1569  	require.NoError(t, d.Flush())
  1570  	require.NoError(t, d.Compact([]byte("a"), []byte("z"), true))
  1571  
  1572  	var baseCompactionSem, flushSem, intraL0Sem chan struct{}
  1573  	// Block an L0 -> LBase compaction. This is necessary to induce intra-L0
  1574  	// compactions later on.
  1575  	blockedCompactionsMu.Lock()
  1576  	nextBlockedCompaction = 3
  1577  	blockedCompactionsMu.Unlock()
  1578  	timeoutSem := time.After(channelTimeout)
  1579  	t.Log("blocking an L0 -> LBase compaction")
  1580  	// Write an sstable to L0 until we're blocked on an L0 -> LBase compaction.
  1581  	breakLoop := false
  1582  	for !breakLoop {
  1583  		select {
  1584  		case sem := <-nextSem:
  1585  			baseCompactionSem = sem
  1586  			breakLoop = true
  1587  		case <-timeoutSem:
  1588  			t.Fatal("did not get blocked on an LBase compaction")
  1589  		default:
  1590  			require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil))
  1591  			require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil))
  1592  			require.NoError(t, d.Flush())
  1593  			time.Sleep(100 * time.Millisecond)
  1594  		}
  1595  	}
  1596  	printLSM()
  1597  
  1598  	// Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb.
  1599  	// The purpose of the sstable containing cc is to inflate the L0 sublevel
  1600  	// count of the interval at cc, as that's where we want the intra-L0 compaction
  1601  	// to be seeded. However we also need a file left of that interval to have
  1602  	// the same (or higher) sublevel to trigger the bug in
  1603  	// cockroachdb/cockroach#101896. That's why we ingest a file after it to
  1604  	// "bridge" the bb/cc intervals, and then ingest a file at bb. These go
  1605  	// into sublevels like this:
  1606  	//
  1607  	//    bb
  1608  	//    bb
  1609  	//    bb-----cc
  1610  	//           cc
  1611  	//
  1612  	// Eventually, we'll drop an ingested file containing a range del starting at
  1613  	// cc around here:
  1614  	//
  1615  	//    bb
  1616  	//    bb     cc---...
  1617  	//    bb-----cc
  1618  	//           cc
  1619  	{
  1620  		path := "ingest1.sst"
  1621  		f, err := memFS.Create(path)
  1622  		require.NoError(t, err)
  1623  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1624  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1625  		})
  1626  		require.NoError(t, w.Set([]byte("cc"), []byte("foo")))
  1627  		require.NoError(t, w.Close())
  1628  		require.NoError(t, d.Ingest([]string{path}))
  1629  	}
  1630  	{
  1631  		path := "ingest2.sst"
  1632  		f, err := memFS.Create(path)
  1633  		require.NoError(t, err)
  1634  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1635  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1636  		})
  1637  		require.NoError(t, w.Set([]byte("bb"), []byte("foo2")))
  1638  		require.NoError(t, w.Set([]byte("cc"), []byte("foo2")))
  1639  		require.NoError(t, w.Close())
  1640  		require.NoError(t, d.Ingest([]string{path}))
  1641  	}
  1642  	{
  1643  		path := "ingest3.sst"
  1644  		f, err := memFS.Create(path)
  1645  		require.NoError(t, err)
  1646  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1647  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1648  		})
  1649  		require.NoError(t, w.Set([]byte("bb"), []byte("foo3")))
  1650  		require.NoError(t, w.Close())
  1651  		require.NoError(t, d.Ingest([]string{path}))
  1652  	}
  1653  	{
  1654  		path := "ingest4.sst"
  1655  		f, err := memFS.Create(path)
  1656  		require.NoError(t, err)
  1657  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1658  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1659  		})
  1660  		require.NoError(t, w.Set([]byte("bb"), []byte("foo4")))
  1661  		require.NoError(t, w.Close())
  1662  		require.NoError(t, d.Ingest([]string{path}))
  1663  	}
  1664  
  1665  	// We now have a base compaction blocked. Block a memtable flush to cause
  1666  	// memtables to queue up.
  1667  	//
  1668  	// Memtable (stuck):
  1669  	//
  1670  	//   b-----------------g
  1671  	//
  1672  	// Relevant L0 ssstables
  1673  	//
  1674  	//    bb
  1675  	//    bb
  1676  	//    bb-----cc
  1677  	//           cc
  1678  	blockedCompactionsMu.Lock()
  1679  	nextBlockedCompaction = 2
  1680  	blockedCompactionsMu.Unlock()
  1681  	t.Log("blocking a flush")
  1682  	require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil))
  1683  	require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil))
  1684  	_, _ = d.AsyncFlush()
  1685  	select {
  1686  	case sem := <-nextSem:
  1687  		flushSem = sem
  1688  	case <-time.After(channelTimeout):
  1689  		t.Fatal("did not get blocked on a flush")
  1690  	}
  1691  	// Add one memtable to flush queue, and finish it off.
  1692  	//
  1693  	// Memtables (stuck):
  1694  	//
  1695  	//   b-----------------g (waiting to flush)
  1696  	//   b-----------------g (flushing, blocked)
  1697  	//
  1698  	// Relevant L0 ssstables
  1699  	//
  1700  	//    bb
  1701  	//    bb
  1702  	//    bb-----cc
  1703  	//           cc
  1704  	require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil))
  1705  	require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil))
  1706  	// note: this flush will wait for the earlier, blocked flush, but it closes
  1707  	// off the memtable which is what we want.
  1708  	_, _ = d.AsyncFlush()
  1709  
  1710  	// Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum
  1711  	// than the ingest below it.
  1712  	require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil))
  1713  	// Block an intra-L0 compaction, as one might happen around this time.
  1714  	blockedCompactionsMu.Lock()
  1715  	nextBlockedCompaction = 1
  1716  	blockedCompactionsMu.Unlock()
  1717  	t.Log("blocking an intra-L0 compaction")
  1718  	// Ingest a file containing a cc-e rangedel.
  1719  	//
  1720  	// Memtables:
  1721  	//
  1722  	//         c             (mutable)
  1723  	//   b-----------------g (waiting to flush)
  1724  	//   b-----------------g (flushing, blocked)
  1725  	//
  1726  	// Relevant L0 ssstables
  1727  	//
  1728  	//    bb
  1729  	//    bb     cc-----e (just ingested)
  1730  	//    bb-----cc
  1731  	//           cc
  1732  	{
  1733  		path := "ingest5.sst"
  1734  		f, err := memFS.Create(path)
  1735  		require.NoError(t, err)
  1736  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1737  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1738  		})
  1739  		require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e")))
  1740  		require.NoError(t, w.Close())
  1741  		require.NoError(t, d.Ingest([]string{path}))
  1742  	}
  1743  	t.Log("main ingest complete")
  1744  	printLSM()
  1745  	t.Logf("%s", d.Metrics().String())
  1746  
  1747  	require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil))
  1748  
  1749  	// Do another ingest with a seqnum newer than d. The purpose of this is to
  1750  	// increase the LargestSeqNum of the intra-L0 compaction output *beyond*
  1751  	// the flush that contains d=ThisShouldNotBeDeleted, therefore causing
  1752  	// that point key to be deleted (in the buggy code).
  1753  	//
  1754  	// Memtables:
  1755  	//
  1756  	//         c-----d       (mutable)
  1757  	//   b-----------------g (waiting to flush)
  1758  	//   b-----------------g (flushing, blocked)
  1759  	//
  1760  	// Relevant L0 ssstables
  1761  	//
  1762  	//    bb     cc
  1763  	//    bb     cc-----e (just ingested)
  1764  	//    bb-----cc
  1765  	//           cc
  1766  	{
  1767  		path := "ingest6.sst"
  1768  		f, err := memFS.Create(path)
  1769  		require.NoError(t, err)
  1770  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  1771  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  1772  		})
  1773  		require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter")))
  1774  		require.NoError(t, w.Close())
  1775  		require.NoError(t, d.Ingest([]string{path}))
  1776  	}
  1777  
  1778  	// Unblock earlier flushes. We will first finish flushing the blocked
  1779  	// memtable, and end up in this state:
  1780  	//
  1781  	// Memtables:
  1782  	//
  1783  	//         c-----d       (mutable)
  1784  	//   b-----------------g (waiting to flush)
  1785  	//
  1786  	// Relevant L0 ssstables
  1787  	//
  1788  	//  b-------------------g (irrelevant, just flushed)
  1789  	//    bb     cc (has LargestSeqNum > earliestUnflushedSeqNum)
  1790  	//    bb     cc-----e (has a rangedel)
  1791  	//    bb-----cc
  1792  	//           cc
  1793  	//
  1794  	// Note that while b----g is relatively old (and so has a low LargestSeqNum),
  1795  	// it bridges a bunch of intervals. Had we regenerated sublevels from scratch,
  1796  	// it'd have gone below the cc-e sstable. But due to #101896, we just slapped
  1797  	// it on top. Now, as long as our seed interval is the one at cc and our seed
  1798  	// file is the just-flushed L0 sstable, we will go down and include anything
  1799  	// in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum.
  1800  	//
  1801  	// All asterisked L0 sstables should now get picked in an intra-L0 compaction
  1802  	// right after the flush finishes, that we then block:
  1803  	//
  1804  	//  b-------------------g*
  1805  	//    bb*    cc*
  1806  	//    bb*    cc-----e*
  1807  	//    bb-----cc*
  1808  	//           cc*
  1809  	t.Log("unblocking flush")
  1810  	flushSem <- struct{}{}
  1811  	printLSM()
  1812  
  1813  	select {
  1814  	case sem := <-nextSem:
  1815  		intraL0Sem = sem
  1816  	case <-time.After(channelTimeout):
  1817  		t.Fatal("did not get blocked on an intra L0 compaction")
  1818  	}
  1819  
  1820  	// Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted
  1821  	// will land in L0 and since that was the last key written to a memtable,
  1822  	// and the ingestion at cc came after it, the output of the intra-L0
  1823  	// compaction will elevate the cc-e rangedel above it and delete it
  1824  	// (if #101896 is not fixed).
  1825  	ch, _ := d.AsyncFlush()
  1826  	<-ch
  1827  
  1828  	// Unblock earlier intra-L0 compaction.
  1829  	t.Log("unblocking intraL0")
  1830  	intraL0Sem <- struct{}{}
  1831  	printLSM()
  1832  
  1833  	// Try reading d a couple times.
  1834  	for i := 0; i < 2; i++ {
  1835  		val, closer, err := d.Get([]byte("d"))
  1836  		require.NoError(t, err)
  1837  		require.Equal(t, []byte("ThisShouldNotBeDeleted"), val)
  1838  		if closer != nil {
  1839  			closer.Close()
  1840  		}
  1841  		time.Sleep(100 * time.Millisecond)
  1842  	}
  1843  
  1844  	// Unblock everything.
  1845  	baseCompactionSem <- struct{}{}
  1846  }
  1847  
  1848  func BenchmarkDelete(b *testing.B) {
  1849  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1850  	const keyCount = 10000
  1851  	var keys [keyCount][]byte
  1852  	for i := 0; i < keyCount; i++ {
  1853  		keys[i] = []byte(strconv.Itoa(rng.Int()))
  1854  	}
  1855  	val := bytes.Repeat([]byte("x"), 10)
  1856  
  1857  	benchmark := func(b *testing.B, useSingleDelete bool) {
  1858  		d, err := Open(
  1859  			"",
  1860  			&Options{
  1861  				FS: vfs.NewMem(),
  1862  			})
  1863  		if err != nil {
  1864  			b.Fatal(err)
  1865  		}
  1866  		defer func() {
  1867  			if err := d.Close(); err != nil {
  1868  				b.Fatal(err)
  1869  			}
  1870  		}()
  1871  
  1872  		b.StartTimer()
  1873  		for _, key := range keys {
  1874  			_ = d.Set(key, val, nil)
  1875  			if useSingleDelete {
  1876  				_ = d.SingleDelete(key, nil)
  1877  			} else {
  1878  				_ = d.Delete(key, nil)
  1879  			}
  1880  		}
  1881  		// Manually flush as it is flushing/compaction where SingleDelete
  1882  		// performance shows up. With SingleDelete, we can elide all of the
  1883  		// SingleDelete and Set records.
  1884  		if err := d.Flush(); err != nil {
  1885  			b.Fatal(err)
  1886  		}
  1887  		b.StopTimer()
  1888  	}
  1889  
  1890  	b.Run("delete", func(b *testing.B) {
  1891  		for i := 0; i < b.N; i++ {
  1892  			benchmark(b, false)
  1893  		}
  1894  	})
  1895  
  1896  	b.Run("single-delete", func(b *testing.B) {
  1897  		for i := 0; i < b.N; i++ {
  1898  			benchmark(b, true)
  1899  		}
  1900  	})
  1901  }
  1902  
  1903  func BenchmarkNewIterReadAmp(b *testing.B) {
  1904  	for _, readAmp := range []int{10, 100, 1000} {
  1905  		b.Run(strconv.Itoa(readAmp), func(b *testing.B) {
  1906  			opts := &Options{
  1907  				FS:                    vfs.NewMem(),
  1908  				L0StopWritesThreshold: 1000,
  1909  			}
  1910  			opts.DisableAutomaticCompactions = true
  1911  
  1912  			d, err := Open("", opts)
  1913  			require.NoError(b, err)
  1914  
  1915  			for i := 0; i < readAmp; i++ {
  1916  				require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync))
  1917  				require.NoError(b, d.Flush())
  1918  			}
  1919  
  1920  			require.Equal(b, d.Metrics().ReadAmp(), readAmp)
  1921  
  1922  			b.StopTimer()
  1923  			b.ResetTimer()
  1924  			for i := 0; i < b.N; i++ {
  1925  				b.StartTimer()
  1926  				iter, _ := d.NewIter(nil)
  1927  				b.StopTimer()
  1928  				require.NoError(b, iter.Close())
  1929  			}
  1930  
  1931  			require.NoError(b, d.Close())
  1932  		})
  1933  	}
  1934  }
  1935  
  1936  func verifyGet(t *testing.T, r Reader, key, expected []byte) {
  1937  	val, closer, err := r.Get(key)
  1938  	require.NoError(t, err)
  1939  	if !bytes.Equal(expected, val) {
  1940  		t.Fatalf("expected %s, but got %s", expected, val)
  1941  	}
  1942  	closer.Close()
  1943  }
  1944  
  1945  func verifyGetNotFound(t *testing.T, r Reader, key []byte) {
  1946  	val, _, err := r.Get(key)
  1947  	if err != base.ErrNotFound {
  1948  		t.Fatalf("expected nil, but got %s", val)
  1949  	}
  1950  }
  1951  
  1952  func BenchmarkRotateMemtables(b *testing.B) {
  1953  	o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */}
  1954  	d, err := Open("", o)
  1955  	require.NoError(b, err)
  1956  
  1957  	// We want to jump to full-sized memtables.
  1958  	d.mu.Lock()
  1959  	d.mu.mem.nextSize = o.MemTableSize
  1960  	d.mu.Unlock()
  1961  	require.NoError(b, d.Flush())
  1962  
  1963  	b.ResetTimer()
  1964  	for i := 0; i < b.N; i++ {
  1965  		if err := d.Flush(); err != nil {
  1966  			b.Fatal(err)
  1967  		}
  1968  	}
  1969  }