github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/compaction_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	crand "crypto/rand"
    11  	"fmt"
    12  	"math"
    13  	"math/rand"
    14  	"path/filepath"
    15  	"reflect"
    16  	"regexp"
    17  	"runtime"
    18  	"slices"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"sync/atomic"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/cockroachdb/datadriven"
    27  	"github.com/cockroachdb/errors"
    28  	"github.com/cockroachdb/errors/oserror"
    29  	"github.com/cockroachdb/pebble/internal/base"
    30  	"github.com/cockroachdb/pebble/internal/keyspan"
    31  	"github.com/cockroachdb/pebble/internal/manifest"
    32  	"github.com/cockroachdb/pebble/internal/testkeys"
    33  	"github.com/cockroachdb/pebble/objstorage"
    34  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    35  	"github.com/cockroachdb/pebble/objstorage/remote"
    36  	"github.com/cockroachdb/pebble/sstable"
    37  	"github.com/cockroachdb/pebble/vfs"
    38  	"github.com/cockroachdb/pebble/vfs/errorfs"
    39  	"github.com/stretchr/testify/require"
    40  )
    41  
    42  func newVersion(opts *Options, files [numLevels][]*fileMetadata) *version {
    43  	return manifest.NewVersion(
    44  		opts.Comparer.Compare,
    45  		opts.Comparer.FormatKey,
    46  		opts.FlushSplitBytes,
    47  		files)
    48  }
    49  
    50  type compactionPickerForTesting struct {
    51  	score         float64
    52  	level         int
    53  	baseLevel     int
    54  	opts          *Options
    55  	vers          *manifest.Version
    56  	maxLevelBytes [7]int64
    57  }
    58  
    59  var _ compactionPicker = &compactionPickerForTesting{}
    60  
    61  func (p *compactionPickerForTesting) getScores([]compactionInfo) [numLevels]float64 {
    62  	return [numLevels]float64{}
    63  }
    64  
    65  func (p *compactionPickerForTesting) getBaseLevel() int {
    66  	return p.baseLevel
    67  }
    68  
    69  func (p *compactionPickerForTesting) estimatedCompactionDebt(l0ExtraSize uint64) uint64 {
    70  	return 0
    71  }
    72  
    73  func (p *compactionPickerForTesting) forceBaseLevel1() {}
    74  
    75  func (p *compactionPickerForTesting) pickAuto(env compactionEnv) (pc *pickedCompaction) {
    76  	if p.score < 1 {
    77  		return nil
    78  	}
    79  	outputLevel := p.level + 1
    80  	if p.level == 0 {
    81  		outputLevel = p.baseLevel
    82  	}
    83  	iter := p.vers.Levels[p.level].Iter()
    84  	iter.First()
    85  	cInfo := candidateLevelInfo{
    86  		level:       p.level,
    87  		outputLevel: outputLevel,
    88  		file:        iter.Take(),
    89  	}
    90  	if cInfo.level == 0 {
    91  		return pickL0(env, p.opts, p.vers, p.baseLevel)
    92  	}
    93  	return pickAutoLPositive(env, p.opts, p.vers, cInfo, p.baseLevel, p.maxLevelBytes)
    94  }
    95  
    96  func (p *compactionPickerForTesting) pickElisionOnlyCompaction(
    97  	env compactionEnv,
    98  ) (pc *pickedCompaction) {
    99  	return nil
   100  }
   101  
   102  func (p *compactionPickerForTesting) pickRewriteCompaction(
   103  	env compactionEnv,
   104  ) (pc *pickedCompaction) {
   105  	return nil
   106  }
   107  
   108  func (p *compactionPickerForTesting) pickReadTriggeredCompaction(
   109  	env compactionEnv,
   110  ) (pc *pickedCompaction) {
   111  	return nil
   112  }
   113  
   114  func TestPickCompaction(t *testing.T) {
   115  	fileNums := func(files manifest.LevelSlice) string {
   116  		var ss []string
   117  		files.Each(func(meta *fileMetadata) {
   118  			ss = append(ss, strconv.Itoa(int(meta.FileNum)))
   119  		})
   120  		sort.Strings(ss)
   121  		return strings.Join(ss, ",")
   122  	}
   123  
   124  	opts := (*Options)(nil).EnsureDefaults()
   125  	newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata {
   126  		m := (&fileMetadata{
   127  			FileNum: fileNum,
   128  			Size:    size,
   129  		}).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest)
   130  		m.InitPhysicalBacking()
   131  		return m
   132  	}
   133  
   134  	testCases := []struct {
   135  		desc      string
   136  		version   *version
   137  		picker    compactionPickerForTesting
   138  		want      string
   139  		wantMulti bool
   140  	}{
   141  		{
   142  			desc: "no compaction",
   143  			version: newVersion(opts, [numLevels][]*fileMetadata{
   144  				0: {
   145  					newFileMeta(
   146  						100,
   147  						1,
   148  						base.ParseInternalKey("i.SET.101"),
   149  						base.ParseInternalKey("j.SET.102"),
   150  					),
   151  				},
   152  			}),
   153  			want: "",
   154  		},
   155  
   156  		{
   157  			desc: "1 L0 file",
   158  			version: newVersion(opts, [numLevels][]*fileMetadata{
   159  				0: {
   160  					newFileMeta(
   161  						100,
   162  						1,
   163  						base.ParseInternalKey("i.SET.101"),
   164  						base.ParseInternalKey("j.SET.102"),
   165  					),
   166  				},
   167  			}),
   168  			picker: compactionPickerForTesting{
   169  				score:     99,
   170  				level:     0,
   171  				baseLevel: 1,
   172  			},
   173  			want: "100  ",
   174  		},
   175  
   176  		{
   177  			desc: "2 L0 files (0 overlaps)",
   178  			version: newVersion(opts, [numLevels][]*fileMetadata{
   179  				0: {
   180  					newFileMeta(
   181  						100,
   182  						1,
   183  						base.ParseInternalKey("i.SET.101"),
   184  						base.ParseInternalKey("j.SET.102"),
   185  					),
   186  					newFileMeta(
   187  						110,
   188  						1,
   189  						base.ParseInternalKey("k.SET.111"),
   190  						base.ParseInternalKey("l.SET.112"),
   191  					),
   192  				},
   193  			}),
   194  			picker: compactionPickerForTesting{
   195  				score:     99,
   196  				level:     0,
   197  				baseLevel: 1,
   198  			},
   199  			want: "100,110  ",
   200  		},
   201  
   202  		{
   203  			desc: "2 L0 files, with ikey overlap",
   204  			version: newVersion(opts, [numLevels][]*fileMetadata{
   205  				0: {
   206  					newFileMeta(
   207  						100,
   208  						1,
   209  						base.ParseInternalKey("i.SET.101"),
   210  						base.ParseInternalKey("p.SET.102"),
   211  					),
   212  					newFileMeta(
   213  						110,
   214  						1,
   215  						base.ParseInternalKey("j.SET.111"),
   216  						base.ParseInternalKey("q.SET.112"),
   217  					),
   218  				},
   219  			}),
   220  			picker: compactionPickerForTesting{
   221  				score:     99,
   222  				level:     0,
   223  				baseLevel: 1,
   224  			},
   225  			want: "100,110  ",
   226  		},
   227  
   228  		{
   229  			desc: "2 L0 files, with ukey overlap",
   230  			version: newVersion(opts, [numLevels][]*fileMetadata{
   231  				0: {
   232  					newFileMeta(
   233  						100,
   234  						1,
   235  						base.ParseInternalKey("i.SET.101"),
   236  						base.ParseInternalKey("i.SET.102"),
   237  					),
   238  					newFileMeta(
   239  						110,
   240  						1,
   241  						base.ParseInternalKey("i.SET.111"),
   242  						base.ParseInternalKey("i.SET.112"),
   243  					),
   244  				},
   245  			}),
   246  			picker: compactionPickerForTesting{
   247  				score:     99,
   248  				level:     0,
   249  				baseLevel: 1,
   250  			},
   251  			want: "100,110  ",
   252  		},
   253  
   254  		{
   255  			desc: "1 L0 file, 2 L1 files (0 overlaps)",
   256  			version: newVersion(opts, [numLevels][]*fileMetadata{
   257  				0: {
   258  					newFileMeta(
   259  						100,
   260  						1,
   261  						base.ParseInternalKey("i.SET.101"),
   262  						base.ParseInternalKey("i.SET.102"),
   263  					),
   264  				},
   265  				1: {
   266  					newFileMeta(
   267  						200,
   268  						1,
   269  						base.ParseInternalKey("a.SET.201"),
   270  						base.ParseInternalKey("b.SET.202"),
   271  					),
   272  					newFileMeta(
   273  						210,
   274  						1,
   275  						base.ParseInternalKey("y.SET.211"),
   276  						base.ParseInternalKey("z.SET.212"),
   277  					),
   278  				},
   279  			}),
   280  			picker: compactionPickerForTesting{
   281  				score:     99,
   282  				level:     0,
   283  				baseLevel: 1,
   284  			},
   285  			want: "100  ",
   286  		},
   287  
   288  		{
   289  			desc: "1 L0 file, 2 L1 files (1 overlap), 4 L2 files (3 overlaps)",
   290  			version: newVersion(opts, [numLevels][]*fileMetadata{
   291  				0: {
   292  					newFileMeta(
   293  						100,
   294  						1,
   295  						base.ParseInternalKey("i.SET.101"),
   296  						base.ParseInternalKey("t.SET.102"),
   297  					),
   298  				},
   299  				1: {
   300  					newFileMeta(
   301  						200,
   302  						1,
   303  						base.ParseInternalKey("a.SET.201"),
   304  						base.ParseInternalKey("e.SET.202"),
   305  					),
   306  					newFileMeta(
   307  						210,
   308  						1,
   309  						base.ParseInternalKey("f.SET.211"),
   310  						base.ParseInternalKey("j.SET.212"),
   311  					),
   312  				},
   313  				2: {
   314  					newFileMeta(
   315  						300,
   316  						1,
   317  						base.ParseInternalKey("a.SET.301"),
   318  						base.ParseInternalKey("b.SET.302"),
   319  					),
   320  					newFileMeta(
   321  						310,
   322  						1,
   323  						base.ParseInternalKey("c.SET.311"),
   324  						base.ParseInternalKey("g.SET.312"),
   325  					),
   326  					newFileMeta(
   327  						320,
   328  						1,
   329  						base.ParseInternalKey("h.SET.321"),
   330  						base.ParseInternalKey("m.SET.322"),
   331  					),
   332  					newFileMeta(
   333  						330,
   334  						1,
   335  						base.ParseInternalKey("n.SET.331"),
   336  						base.ParseInternalKey("z.SET.332"),
   337  					),
   338  				},
   339  			}),
   340  			picker: compactionPickerForTesting{
   341  				score:     99,
   342  				level:     0,
   343  				baseLevel: 1,
   344  			},
   345  			want: "100 210 310,320,330",
   346  		},
   347  
   348  		{
   349  			desc: "4 L1 files, 2 L2 files, can grow",
   350  			version: newVersion(opts, [numLevels][]*fileMetadata{
   351  				1: {
   352  					newFileMeta(
   353  						200,
   354  						1,
   355  						base.ParseInternalKey("i1.SET.201"),
   356  						base.ParseInternalKey("i2.SET.202"),
   357  					),
   358  					newFileMeta(
   359  						210,
   360  						1,
   361  						base.ParseInternalKey("j1.SET.211"),
   362  						base.ParseInternalKey("j2.SET.212"),
   363  					),
   364  					newFileMeta(
   365  						220,
   366  						1,
   367  						base.ParseInternalKey("k1.SET.221"),
   368  						base.ParseInternalKey("k2.SET.222"),
   369  					),
   370  					newFileMeta(
   371  						230,
   372  						1,
   373  						base.ParseInternalKey("l1.SET.231"),
   374  						base.ParseInternalKey("l2.SET.232"),
   375  					),
   376  				},
   377  				2: {
   378  					newFileMeta(
   379  						300,
   380  						1,
   381  						base.ParseInternalKey("a0.SET.301"),
   382  						base.ParseInternalKey("l0.SET.302"),
   383  					),
   384  					newFileMeta(
   385  						310,
   386  						1,
   387  						base.ParseInternalKey("l2.SET.311"),
   388  						base.ParseInternalKey("z2.SET.312"),
   389  					),
   390  				},
   391  			}),
   392  			picker: compactionPickerForTesting{
   393  				score:     99,
   394  				level:     1,
   395  				baseLevel: 1,
   396  			},
   397  			want:      "200,210,220 300  ",
   398  			wantMulti: true,
   399  		},
   400  
   401  		{
   402  			desc: "4 L1 files, 2 L2 files, can't grow (range)",
   403  			version: newVersion(opts, [numLevels][]*fileMetadata{
   404  				1: {
   405  					newFileMeta(
   406  						200,
   407  						1,
   408  						base.ParseInternalKey("i1.SET.201"),
   409  						base.ParseInternalKey("i2.SET.202"),
   410  					),
   411  					newFileMeta(
   412  						210,
   413  						1,
   414  						base.ParseInternalKey("j1.SET.211"),
   415  						base.ParseInternalKey("j2.SET.212"),
   416  					),
   417  					newFileMeta(
   418  						220,
   419  						1,
   420  						base.ParseInternalKey("k1.SET.221"),
   421  						base.ParseInternalKey("k2.SET.222"),
   422  					),
   423  					newFileMeta(
   424  						230,
   425  						1,
   426  						base.ParseInternalKey("l1.SET.231"),
   427  						base.ParseInternalKey("l2.SET.232"),
   428  					),
   429  				},
   430  				2: {
   431  					newFileMeta(
   432  						300,
   433  						1,
   434  						base.ParseInternalKey("a0.SET.301"),
   435  						base.ParseInternalKey("j0.SET.302"),
   436  					),
   437  					newFileMeta(
   438  						310,
   439  						1,
   440  						base.ParseInternalKey("j2.SET.311"),
   441  						base.ParseInternalKey("z2.SET.312"),
   442  					),
   443  				},
   444  			}),
   445  			picker: compactionPickerForTesting{
   446  				score:     99,
   447  				level:     1,
   448  				baseLevel: 1,
   449  			},
   450  			want:      "200 300  ",
   451  			wantMulti: true,
   452  		},
   453  
   454  		{
   455  			desc: "4 L1 files, 2 L2 files, can't grow (size)",
   456  			version: newVersion(opts, [numLevels][]*fileMetadata{
   457  				1: {
   458  					newFileMeta(
   459  						200,
   460  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   461  						base.ParseInternalKey("i1.SET.201"),
   462  						base.ParseInternalKey("i2.SET.202"),
   463  					),
   464  					newFileMeta(
   465  						210,
   466  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   467  						base.ParseInternalKey("j1.SET.211"),
   468  						base.ParseInternalKey("j2.SET.212"),
   469  					),
   470  					newFileMeta(
   471  						220,
   472  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   473  						base.ParseInternalKey("k1.SET.221"),
   474  						base.ParseInternalKey("k2.SET.222"),
   475  					),
   476  					newFileMeta(
   477  						230,
   478  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   479  						base.ParseInternalKey("l1.SET.231"),
   480  						base.ParseInternalKey("l2.SET.232"),
   481  					),
   482  				},
   483  				2: {
   484  					newFileMeta(
   485  						300,
   486  						expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1,
   487  						base.ParseInternalKey("a0.SET.301"),
   488  						base.ParseInternalKey("l0.SET.302"),
   489  					),
   490  					newFileMeta(
   491  						310,
   492  						expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1,
   493  						base.ParseInternalKey("l2.SET.311"),
   494  						base.ParseInternalKey("z2.SET.312"),
   495  					),
   496  				},
   497  			}),
   498  			picker: compactionPickerForTesting{
   499  				score:     99,
   500  				level:     1,
   501  				baseLevel: 1,
   502  			},
   503  			want: "200 300 ",
   504  		},
   505  	}
   506  
   507  	for _, tc := range testCases {
   508  		vs := &versionSet{
   509  			opts:    opts,
   510  			cmp:     DefaultComparer.Compare,
   511  			cmpName: DefaultComparer.Name,
   512  		}
   513  		vs.versions.Init(nil)
   514  		vs.append(tc.version)
   515  		tc.picker.opts = opts
   516  		tc.picker.vers = tc.version
   517  		vs.picker = &tc.picker
   518  		pc, got := vs.picker.pickAuto(compactionEnv{diskAvailBytes: math.MaxUint64}), ""
   519  		if pc != nil {
   520  			c := newCompaction(pc, opts, time.Now(), nil /* provider */)
   521  
   522  			gotStart := fileNums(c.startLevel.files)
   523  			gotML := ""
   524  			observedMulti := len(c.extraLevels) > 0
   525  			if observedMulti {
   526  				gotML = " " + fileNums(c.extraLevels[0].files)
   527  			}
   528  			gotOutput := " " + fileNums(c.outputLevel.files)
   529  			gotGrandparents := " " + fileNums(c.grandparents)
   530  			got = gotStart + gotML + gotOutput + gotGrandparents
   531  			if tc.wantMulti != observedMulti {
   532  				t.Fatalf("Expected Multi %t; Observed Multi %t, for %s", tc.wantMulti, observedMulti, got)
   533  			}
   534  
   535  		}
   536  		if got != tc.want {
   537  			t.Fatalf("%s:\ngot  %q\nwant %q", tc.desc, got, tc.want)
   538  		}
   539  	}
   540  }
   541  
   542  func TestElideTombstone(t *testing.T) {
   543  	var d *DB
   544  	defer func() {
   545  		if d != nil {
   546  			require.NoError(t, d.Close())
   547  		}
   548  	}()
   549  	var buf bytes.Buffer
   550  	datadriven.RunTest(t, "testdata/compaction_elide_tombstone",
   551  		func(t *testing.T, td *datadriven.TestData) string {
   552  			switch td.Cmd {
   553  			case "define":
   554  				if d != nil {
   555  					if err := d.Close(); err != nil {
   556  						return err.Error()
   557  					}
   558  				}
   559  				var err error
   560  				if d, err = runDBDefineCmd(td, (&Options{
   561  					FS:                          vfs.NewMem(),
   562  					DebugCheck:                  DebugCheckLevels,
   563  					FormatMajorVersion:          FormatNewest,
   564  					DisableAutomaticCompactions: true,
   565  				}).WithFSDefaults()); err != nil {
   566  					return err.Error()
   567  				}
   568  				if td.HasArg("verbose") {
   569  					return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
   570  				}
   571  				return d.mu.versions.currentVersion().String()
   572  			case "elide":
   573  				buf.Reset()
   574  				var startLevel int
   575  				td.ScanArgs(t, "start-level", &startLevel)
   576  				c := compaction{
   577  					cmp:      testkeys.Comparer.Compare,
   578  					comparer: testkeys.Comparer,
   579  					version:  d.mu.versions.currentVersion(),
   580  					inputs:   []compactionLevel{{level: startLevel}, {level: startLevel + 1}},
   581  					smallest: base.ParseInternalKey("a.SET.0"),
   582  					largest:  base.ParseInternalKey("z.SET.0"),
   583  				}
   584  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
   585  				c.setupInuseKeyRanges()
   586  				for _, ukey := range strings.Split(td.Input, "\n") {
   587  					fmt.Fprintf(&buf, "elideTombstone(%q) = %t\n", ukey, c.elideTombstone([]byte(ukey)))
   588  				}
   589  				return buf.String()
   590  			default:
   591  				return fmt.Sprintf("unknown command: %s", td.Cmd)
   592  			}
   593  		})
   594  }
   595  
   596  func TestElideRangeTombstone(t *testing.T) {
   597  	opts := (*Options)(nil).EnsureDefaults()
   598  
   599  	newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata {
   600  		m := (&fileMetadata{}).ExtendPointKeyBounds(
   601  			opts.Comparer.Compare, smallest, largest,
   602  		)
   603  		m.InitPhysicalBacking()
   604  		return m
   605  	}
   606  
   607  	type want struct {
   608  		key      string
   609  		endKey   string
   610  		expected bool
   611  	}
   612  
   613  	testCases := []struct {
   614  		desc     string
   615  		level    int
   616  		version  *version
   617  		wants    []want
   618  		flushing flushableList
   619  	}{
   620  		{
   621  			desc:    "empty",
   622  			level:   1,
   623  			version: newVersion(opts, [numLevels][]*fileMetadata{}),
   624  			wants: []want{
   625  				{"x", "y", true},
   626  			},
   627  		},
   628  		{
   629  			desc:  "non-empty",
   630  			level: 1,
   631  			version: newVersion(opts, [numLevels][]*fileMetadata{
   632  				1: {
   633  					newFileMeta(
   634  						base.ParseInternalKey("c.SET.801"),
   635  						base.ParseInternalKey("g.SET.800"),
   636  					),
   637  					newFileMeta(
   638  						base.ParseInternalKey("x.SET.701"),
   639  						base.ParseInternalKey("y.SET.700"),
   640  					),
   641  				},
   642  				2: {
   643  					newFileMeta(
   644  						base.ParseInternalKey("d.SET.601"),
   645  						base.ParseInternalKey("h.SET.600"),
   646  					),
   647  					newFileMeta(
   648  						base.ParseInternalKey("r.SET.501"),
   649  						base.ParseInternalKey("t.SET.500"),
   650  					),
   651  				},
   652  				3: {
   653  					newFileMeta(
   654  						base.ParseInternalKey("f.SET.401"),
   655  						base.ParseInternalKey("g.SET.400"),
   656  					),
   657  					newFileMeta(
   658  						base.ParseInternalKey("w.SET.301"),
   659  						base.ParseInternalKey("x.SET.300"),
   660  					),
   661  				},
   662  				4: {
   663  					newFileMeta(
   664  						base.ParseInternalKey("f.SET.201"),
   665  						base.ParseInternalKey("m.SET.200"),
   666  					),
   667  					newFileMeta(
   668  						base.ParseInternalKey("t.SET.101"),
   669  						base.ParseInternalKey("t.SET.100"),
   670  					),
   671  				},
   672  			}),
   673  			wants: []want{
   674  				{"b", "c", true},
   675  				{"c", "d", true},
   676  				{"d", "e", true},
   677  				{"e", "f", false},
   678  				{"f", "g", false},
   679  				{"g", "h", false},
   680  				{"h", "i", false},
   681  				{"l", "m", false},
   682  				{"m", "n", false},
   683  				{"n", "o", true},
   684  				{"q", "r", true},
   685  				{"r", "s", true},
   686  				{"s", "t", false},
   687  				{"t", "u", false},
   688  				{"u", "v", true},
   689  				{"v", "w", false},
   690  				{"w", "x", false},
   691  				{"x", "y", false},
   692  				{"y", "z", true},
   693  			},
   694  		},
   695  		{
   696  			desc:  "flushing",
   697  			level: -1,
   698  			version: newVersion(opts, [numLevels][]*fileMetadata{
   699  				0: {
   700  					newFileMeta(
   701  						base.ParseInternalKey("h.SET.901"),
   702  						base.ParseInternalKey("j.SET.900"),
   703  					),
   704  				},
   705  				1: {
   706  					newFileMeta(
   707  						base.ParseInternalKey("c.SET.801"),
   708  						base.ParseInternalKey("g.SET.800"),
   709  					),
   710  					newFileMeta(
   711  						base.ParseInternalKey("x.SET.701"),
   712  						base.ParseInternalKey("y.SET.700"),
   713  					),
   714  				},
   715  			}),
   716  			wants: []want{
   717  				{"m", "n", false},
   718  			},
   719  			// Pretend one memtable is being flushed
   720  			flushing: flushableList{nil},
   721  		},
   722  	}
   723  
   724  	for _, tc := range testCases {
   725  		c := compaction{
   726  			cmp:      DefaultComparer.Compare,
   727  			comparer: DefaultComparer,
   728  			version:  tc.version,
   729  			inputs:   []compactionLevel{{level: tc.level}, {level: tc.level + 1}},
   730  			smallest: base.ParseInternalKey("a.SET.0"),
   731  			largest:  base.ParseInternalKey("z.SET.0"),
   732  			flushing: tc.flushing,
   733  		}
   734  		c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
   735  		c.setupInuseKeyRanges()
   736  		for _, w := range tc.wants {
   737  			if got := c.elideRangeTombstone([]byte(w.key), []byte(w.endKey)); got != w.expected {
   738  				t.Errorf("%s: keys=%q-%q: got %v, want %v", tc.desc, w.key, w.endKey, got, w.expected)
   739  			}
   740  		}
   741  	}
   742  }
   743  
   744  func TestCompactionTransform(t *testing.T) {
   745  	datadriven.RunTest(t, "testdata/compaction_transform", func(t *testing.T, td *datadriven.TestData) string {
   746  		switch td.Cmd {
   747  		case "transform":
   748  			var snapshots []uint64
   749  			var keyRanges []manifest.UserKeyRange
   750  			disableElision := td.HasArg("disable-elision")
   751  			td.MaybeScanArgs(t, "snapshots", &snapshots)
   752  			if arg, ok := td.Arg("in-use-key-ranges"); ok {
   753  				for _, keyRange := range arg.Vals {
   754  					parts := strings.SplitN(keyRange, "-", 2)
   755  					start := []byte(strings.TrimSpace(parts[0]))
   756  					end := []byte(strings.TrimSpace(parts[1]))
   757  					keyRanges = append(keyRanges, manifest.UserKeyRange{
   758  						Start: start,
   759  						End:   end,
   760  					})
   761  				}
   762  			}
   763  			span := keyspan.ParseSpan(td.Input)
   764  			for i := range span.Keys {
   765  				if i > 0 {
   766  					if span.Keys[i-1].Trailer < span.Keys[i].Trailer {
   767  						return "span keys not sorted"
   768  					}
   769  				}
   770  			}
   771  			var outSpan keyspan.Span
   772  			c := compaction{
   773  				cmp:                base.DefaultComparer.Compare,
   774  				comparer:           base.DefaultComparer,
   775  				disableSpanElision: disableElision,
   776  				inuseKeyRanges:     keyRanges,
   777  			}
   778  			transformer := rangeKeyCompactionTransform(base.DefaultComparer.Equal, snapshots, c.elideRangeTombstone)
   779  			if err := transformer.Transform(base.DefaultComparer.Compare, span, &outSpan); err != nil {
   780  				return fmt.Sprintf("error: %s", err)
   781  			}
   782  			return outSpan.String()
   783  		default:
   784  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   785  		}
   786  	})
   787  }
   788  
   789  type cpuPermissionGranter struct {
   790  	// requestCount is used to confirm that every GetPermission function call
   791  	// has a corresponding CPUWorkDone function call.
   792  	requestCount int
   793  	used         bool
   794  	permit       bool
   795  }
   796  
   797  type cpuWorkHandle struct {
   798  	permit bool
   799  }
   800  
   801  func (c cpuWorkHandle) Permitted() bool {
   802  	return c.permit
   803  }
   804  
   805  func (t *cpuPermissionGranter) GetPermission(dur time.Duration) CPUWorkHandle {
   806  	t.requestCount++
   807  	t.used = true
   808  	return cpuWorkHandle{t.permit}
   809  }
   810  
   811  func (t *cpuPermissionGranter) CPUWorkDone(_ CPUWorkHandle) {
   812  	t.requestCount--
   813  }
   814  
   815  // Simple test to check if compactions are using the granter, and if exactly
   816  // the acquired handles are returned.
   817  func TestCompactionCPUGranter(t *testing.T) {
   818  	mem := vfs.NewMem()
   819  	opts := (&Options{FS: mem}).WithFSDefaults()
   820  	g := &cpuPermissionGranter{permit: true}
   821  	opts.Experimental.CPUWorkPermissionGranter = g
   822  	d, err := Open("", opts)
   823  	if err != nil {
   824  		t.Fatalf("Open: %v", err)
   825  	}
   826  	defer d.Close()
   827  
   828  	d.Set([]byte{'a'}, []byte{'a'}, nil)
   829  	err = d.Compact([]byte{'a'}, []byte{'b'}, true)
   830  	if err != nil {
   831  		t.Fatalf("Compact: %v", err)
   832  	}
   833  	require.True(t, g.used)
   834  	require.Equal(t, g.requestCount, 0)
   835  }
   836  
   837  // Tests that there's no errors or panics when the default CPU granter is used.
   838  func TestCompactionCPUGranterDefault(t *testing.T) {
   839  	mem := vfs.NewMem()
   840  	opts := (&Options{FS: mem}).WithFSDefaults()
   841  	d, err := Open("", opts)
   842  	if err != nil {
   843  		t.Fatalf("Open: %v", err)
   844  	}
   845  	defer d.Close()
   846  
   847  	d.Set([]byte{'a'}, []byte{'a'}, nil)
   848  	err = d.Compact([]byte{'a'}, []byte{'b'}, true)
   849  	if err != nil {
   850  		t.Fatalf("Compact: %v", err)
   851  	}
   852  }
   853  
   854  func TestCompaction(t *testing.T) {
   855  	const memTableSize = 10000
   856  	// Tuned so that 2 values can reside in the memtable before a flush, but a
   857  	// 3rd value will cause a flush. Needs to account for the max skiplist node
   858  	// size.
   859  	const valueSize = 3500
   860  
   861  	mem := vfs.NewMem()
   862  	opts := &Options{
   863  		FS:                    mem,
   864  		MemTableSize:          memTableSize,
   865  		DebugCheck:            DebugCheckLevels,
   866  		L0CompactionThreshold: 8,
   867  	}
   868  	opts.testingRandomized(t).WithFSDefaults()
   869  	d, err := Open("", opts)
   870  	if err != nil {
   871  		t.Fatalf("Open: %v", err)
   872  	}
   873  
   874  	get1 := func(iter internalIterator) (ret string) {
   875  		b := &bytes.Buffer{}
   876  		for key, _ := iter.First(); key != nil; key, _ = iter.Next() {
   877  			b.Write(key.UserKey)
   878  		}
   879  		if err := iter.Close(); err != nil {
   880  			t.Fatalf("iterator Close: %v", err)
   881  		}
   882  		return b.String()
   883  	}
   884  	getAll := func() (gotMem, gotDisk string, err error) {
   885  		d.mu.Lock()
   886  		defer d.mu.Unlock()
   887  
   888  		if d.mu.mem.mutable != nil {
   889  			gotMem = get1(d.mu.mem.mutable.newIter(nil))
   890  		}
   891  		ss := []string(nil)
   892  		v := d.mu.versions.currentVersion()
   893  		provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "" /* dirName */))
   894  		if err != nil {
   895  			t.Fatalf("%v", err)
   896  		}
   897  		defer provider.Close()
   898  		for _, levelMetadata := range v.Levels {
   899  			iter := levelMetadata.Iter()
   900  			for meta := iter.First(); meta != nil; meta = iter.Next() {
   901  				if meta.Virtual {
   902  					continue
   903  				}
   904  				f, err := provider.OpenForReading(context.Background(), base.FileTypeTable, meta.FileBacking.DiskFileNum, objstorage.OpenOptions{})
   905  				if err != nil {
   906  					return "", "", errors.WithStack(err)
   907  				}
   908  				r, err := sstable.NewReader(f, sstable.ReaderOptions{})
   909  				if err != nil {
   910  					return "", "", errors.WithStack(err)
   911  				}
   912  				defer r.Close()
   913  				iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   914  				if err != nil {
   915  					return "", "", errors.WithStack(err)
   916  				}
   917  				ss = append(ss, get1(iter)+".")
   918  			}
   919  		}
   920  		sort.Strings(ss)
   921  		return gotMem, strings.Join(ss, ""), nil
   922  	}
   923  
   924  	value := bytes.Repeat([]byte("x"), valueSize)
   925  	testCases := []struct {
   926  		key, wantMem, wantDisk string
   927  	}{
   928  		{"+A", "A", ""},
   929  		{"+a", "Aa", ""},
   930  		{"+B", "B", "Aa."},
   931  		{"+b", "Bb", "Aa."},
   932  		// The next level-0 table overwrites the B key.
   933  		{"+C", "C", "Aa.Bb."},
   934  		{"+B", "BC", "Aa.Bb."},
   935  		// The next level-0 table deletes the a key.
   936  		{"+D", "D", "Aa.BC.Bb."},
   937  		{"-a", "Da", "Aa.BC.Bb."},
   938  		{"+d", "Dad", "Aa.BC.Bb."},
   939  		{"+E", "E", "Aa.BC.Bb.Dad."},
   940  		{"+e", "Ee", "Aa.BC.Bb.Dad."},
   941  		// The next addition creates the fourth level-0 table, and l0CompactionTrigger == 8,
   942  		// but since the sublevel count is doubled when comparing with l0CompactionTrigger,
   943  		// the addition of the 4th sublevel triggers a non-trivial compaction into one level-1 table.
   944  		// Note that the keys in this one larger table are interleaved from the four smaller ones.
   945  		{"+F", "F", "ABCDEbde."},
   946  	}
   947  	for _, tc := range testCases {
   948  		if key := tc.key[1:]; tc.key[0] == '+' {
   949  			if err := d.Set([]byte(key), value, nil); err != nil {
   950  				t.Errorf("%q: Set: %v", key, err)
   951  				break
   952  			}
   953  		} else {
   954  			if err := d.Delete([]byte(key), nil); err != nil {
   955  				t.Errorf("%q: Delete: %v", key, err)
   956  				break
   957  			}
   958  		}
   959  
   960  		// try backs off to allow any writes to the memfs to complete.
   961  		err := try(100*time.Microsecond, 20*time.Second, func() error {
   962  			gotMem, gotDisk, err := getAll()
   963  			if err != nil {
   964  				return err
   965  			}
   966  			if testing.Verbose() {
   967  				fmt.Printf("mem=%s (%s) disk=%s (%s)\n", gotMem, tc.wantMem, gotDisk, tc.wantDisk)
   968  			}
   969  
   970  			if gotMem != tc.wantMem {
   971  				return errors.Errorf("mem: got %q, want %q", gotMem, tc.wantMem)
   972  			}
   973  			if gotDisk != tc.wantDisk {
   974  				return errors.Errorf("ldb: got %q, want %q", gotDisk, tc.wantDisk)
   975  			}
   976  			return nil
   977  		})
   978  		if err != nil {
   979  			t.Errorf("%q: %v", tc.key, err)
   980  		}
   981  	}
   982  	if err := d.Close(); err != nil {
   983  		t.Fatalf("db Close: %v", err)
   984  	}
   985  }
   986  
   987  func TestValidateVersionEdit(t *testing.T) {
   988  	const badKey = "malformed-key"
   989  
   990  	errValidationFailed := errors.New("validation failed")
   991  	validateFn := func(key []byte) error {
   992  		if string(key) == badKey {
   993  			return errValidationFailed
   994  		}
   995  		return nil
   996  	}
   997  
   998  	cmp := DefaultComparer.Compare
   999  	newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata {
  1000  		m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest)
  1001  		m.InitPhysicalBacking()
  1002  		return m
  1003  	}
  1004  
  1005  	testCases := []struct {
  1006  		desc    string
  1007  		ve      *versionEdit
  1008  		vFunc   func([]byte) error
  1009  		wantErr error
  1010  	}{
  1011  		{
  1012  			desc: "single new file; start key",
  1013  			ve: &versionEdit{
  1014  				NewFiles: []manifest.NewFileEntry{
  1015  					{
  1016  						Meta: newFileMeta(
  1017  							manifest.InternalKey{UserKey: []byte(badKey)},
  1018  							manifest.InternalKey{UserKey: []byte("z")},
  1019  						),
  1020  					},
  1021  				},
  1022  			},
  1023  			vFunc:   validateFn,
  1024  			wantErr: errValidationFailed,
  1025  		},
  1026  		{
  1027  			desc: "single new file; end key",
  1028  			ve: &versionEdit{
  1029  				NewFiles: []manifest.NewFileEntry{
  1030  					{
  1031  						Meta: newFileMeta(
  1032  							manifest.InternalKey{UserKey: []byte("a")},
  1033  							manifest.InternalKey{UserKey: []byte(badKey)},
  1034  						),
  1035  					},
  1036  				},
  1037  			},
  1038  			vFunc:   validateFn,
  1039  			wantErr: errValidationFailed,
  1040  		},
  1041  		{
  1042  			desc: "multiple new files",
  1043  			ve: &versionEdit{
  1044  				NewFiles: []manifest.NewFileEntry{
  1045  					{
  1046  						Meta: newFileMeta(
  1047  							manifest.InternalKey{UserKey: []byte("a")},
  1048  							manifest.InternalKey{UserKey: []byte("c")},
  1049  						),
  1050  					},
  1051  					{
  1052  						Meta: newFileMeta(
  1053  							manifest.InternalKey{UserKey: []byte(badKey)},
  1054  							manifest.InternalKey{UserKey: []byte("z")},
  1055  						),
  1056  					},
  1057  				},
  1058  			},
  1059  			vFunc:   validateFn,
  1060  			wantErr: errValidationFailed,
  1061  		},
  1062  		{
  1063  			desc: "single deleted file; start key",
  1064  			ve: &versionEdit{
  1065  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1066  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1067  						manifest.InternalKey{UserKey: []byte(badKey)},
  1068  						manifest.InternalKey{UserKey: []byte("z")},
  1069  					),
  1070  				},
  1071  			},
  1072  			vFunc:   validateFn,
  1073  			wantErr: errValidationFailed,
  1074  		},
  1075  		{
  1076  			desc: "single deleted file; end key",
  1077  			ve: &versionEdit{
  1078  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1079  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1080  						manifest.InternalKey{UserKey: []byte("a")},
  1081  						manifest.InternalKey{UserKey: []byte(badKey)},
  1082  					),
  1083  				},
  1084  			},
  1085  			vFunc:   validateFn,
  1086  			wantErr: errValidationFailed,
  1087  		},
  1088  		{
  1089  			desc: "multiple deleted files",
  1090  			ve: &versionEdit{
  1091  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1092  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1093  						manifest.InternalKey{UserKey: []byte("a")},
  1094  						manifest.InternalKey{UserKey: []byte("c")},
  1095  					),
  1096  					deletedFileEntry{Level: 0, FileNum: 1}: newFileMeta(
  1097  						manifest.InternalKey{UserKey: []byte(badKey)},
  1098  						manifest.InternalKey{UserKey: []byte("z")},
  1099  					),
  1100  				},
  1101  			},
  1102  			vFunc:   validateFn,
  1103  			wantErr: errValidationFailed,
  1104  		},
  1105  		{
  1106  			desc: "no errors",
  1107  			ve: &versionEdit{
  1108  				NewFiles: []manifest.NewFileEntry{
  1109  					{
  1110  						Level: 0,
  1111  						Meta: newFileMeta(
  1112  							manifest.InternalKey{UserKey: []byte("b")},
  1113  							manifest.InternalKey{UserKey: []byte("c")},
  1114  						),
  1115  					},
  1116  					{
  1117  						Level: 0,
  1118  						Meta: newFileMeta(
  1119  							manifest.InternalKey{UserKey: []byte("d")},
  1120  							manifest.InternalKey{UserKey: []byte("g")},
  1121  						),
  1122  					},
  1123  				},
  1124  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1125  					deletedFileEntry{Level: 6, FileNum: 0}: newFileMeta(
  1126  						manifest.InternalKey{UserKey: []byte("a")},
  1127  						manifest.InternalKey{UserKey: []byte("d")},
  1128  					),
  1129  					deletedFileEntry{Level: 6, FileNum: 1}: newFileMeta(
  1130  						manifest.InternalKey{UserKey: []byte("x")},
  1131  						manifest.InternalKey{UserKey: []byte("z")},
  1132  					),
  1133  				},
  1134  			},
  1135  			vFunc: validateFn,
  1136  		},
  1137  	}
  1138  
  1139  	for _, tc := range testCases {
  1140  		t.Run(tc.desc, func(t *testing.T) {
  1141  			err := validateVersionEdit(tc.ve, tc.vFunc, base.DefaultFormatter)
  1142  			if tc.wantErr != nil {
  1143  				if !errors.Is(err, tc.wantErr) {
  1144  					t.Fatalf("got: %s; want: %s", err, tc.wantErr)
  1145  				}
  1146  				return
  1147  			}
  1148  			if err != nil {
  1149  				t.Fatalf("got %s; wanted no error", err)
  1150  			}
  1151  		})
  1152  	}
  1153  }
  1154  
  1155  func TestManualCompaction(t *testing.T) {
  1156  	var mem vfs.FS
  1157  	var d *DB
  1158  	defer func() {
  1159  		if d != nil {
  1160  			require.NoError(t, closeAllSnapshots(d))
  1161  			require.NoError(t, d.Close())
  1162  		}
  1163  	}()
  1164  
  1165  	seed := time.Now().UnixNano()
  1166  	rng := rand.New(rand.NewSource(seed))
  1167  	t.Logf("seed: %d", seed)
  1168  
  1169  	randVersion := func(min, max FormatMajorVersion) FormatMajorVersion {
  1170  		return FormatMajorVersion(int(min) + rng.Intn(int(max)-int(min)+1))
  1171  	}
  1172  
  1173  	var compactionLog bytes.Buffer
  1174  	compactionLogEventListener := &EventListener{
  1175  		CompactionEnd: func(info CompactionInfo) {
  1176  			// Ensure determinism.
  1177  			info.JobID = 1
  1178  			info.Duration = time.Second
  1179  			info.TotalDuration = time.Second
  1180  			fmt.Fprintln(&compactionLog, info.String())
  1181  		},
  1182  	}
  1183  	reset := func(minVersion, maxVersion FormatMajorVersion) {
  1184  		compactionLog.Reset()
  1185  		if d != nil {
  1186  			require.NoError(t, closeAllSnapshots(d))
  1187  			require.NoError(t, d.Close())
  1188  		}
  1189  		mem = vfs.NewMem()
  1190  		require.NoError(t, mem.MkdirAll("ext", 0755))
  1191  
  1192  		opts := (&Options{
  1193  			FS:                          mem,
  1194  			DebugCheck:                  DebugCheckLevels,
  1195  			DisableAutomaticCompactions: true,
  1196  			EventListener:               compactionLogEventListener,
  1197  			FormatMajorVersion:          randVersion(minVersion, maxVersion),
  1198  		}).WithFSDefaults()
  1199  
  1200  		var err error
  1201  		d, err = Open("", opts)
  1202  		require.NoError(t, err)
  1203  	}
  1204  
  1205  	// d.mu must be held when calling.
  1206  	createOngoingCompaction := func(start, end []byte, startLevel, outputLevel int) (ongoingCompaction *compaction) {
  1207  		ongoingCompaction = &compaction{
  1208  			inputs:   []compactionLevel{{level: startLevel}, {level: outputLevel}},
  1209  			smallest: InternalKey{UserKey: start},
  1210  			largest:  InternalKey{UserKey: end},
  1211  		}
  1212  		ongoingCompaction.startLevel = &ongoingCompaction.inputs[0]
  1213  		ongoingCompaction.outputLevel = &ongoingCompaction.inputs[1]
  1214  		// Mark files as compacting.
  1215  		curr := d.mu.versions.currentVersion()
  1216  		ongoingCompaction.startLevel.files = curr.Overlaps(startLevel, d.cmp, start, end, false)
  1217  		ongoingCompaction.outputLevel.files = curr.Overlaps(outputLevel, d.cmp, start, end, false)
  1218  		for _, cl := range ongoingCompaction.inputs {
  1219  			iter := cl.files.Iter()
  1220  			for f := iter.First(); f != nil; f = iter.Next() {
  1221  				f.CompactionState = manifest.CompactionStateCompacting
  1222  			}
  1223  		}
  1224  		d.mu.compact.inProgress[ongoingCompaction] = struct{}{}
  1225  		d.mu.compact.compactingCount++
  1226  		return
  1227  	}
  1228  
  1229  	// d.mu must be held when calling.
  1230  	deleteOngoingCompaction := func(ongoingCompaction *compaction) {
  1231  		for _, cl := range ongoingCompaction.inputs {
  1232  			iter := cl.files.Iter()
  1233  			for f := iter.First(); f != nil; f = iter.Next() {
  1234  				f.CompactionState = manifest.CompactionStateNotCompacting
  1235  			}
  1236  		}
  1237  		delete(d.mu.compact.inProgress, ongoingCompaction)
  1238  		d.mu.compact.compactingCount--
  1239  	}
  1240  
  1241  	runTest := func(t *testing.T, testData string, minVersion, maxVersion FormatMajorVersion, verbose bool) {
  1242  		reset(minVersion, maxVersion)
  1243  		var ongoingCompaction *compaction
  1244  		datadriven.RunTest(t, testData, func(t *testing.T, td *datadriven.TestData) string {
  1245  			switch td.Cmd {
  1246  			case "reset":
  1247  				reset(minVersion, maxVersion)
  1248  				return ""
  1249  
  1250  			case "batch":
  1251  				b := d.NewIndexedBatch()
  1252  				if err := runBatchDefineCmd(td, b); err != nil {
  1253  					return err.Error()
  1254  				}
  1255  				require.NoError(t, b.Commit(nil))
  1256  				return ""
  1257  
  1258  			case "build":
  1259  				if err := runBuildCmd(td, d, mem); err != nil {
  1260  					return err.Error()
  1261  				}
  1262  				return ""
  1263  
  1264  			case "compact":
  1265  				if err := runCompactCmd(td, d); err != nil {
  1266  					return err.Error()
  1267  				}
  1268  				d.mu.Lock()
  1269  				s := d.mu.versions.currentVersion().String()
  1270  				if verbose {
  1271  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1272  				}
  1273  				d.mu.Unlock()
  1274  				if td.HasArg("hide-file-num") {
  1275  					re := regexp.MustCompile(`([0-9]*):\[`)
  1276  					s = re.ReplaceAllString(s, "[")
  1277  				}
  1278  				return s
  1279  
  1280  			case "define":
  1281  				if d != nil {
  1282  					if err := closeAllSnapshots(d); err != nil {
  1283  						return err.Error()
  1284  					}
  1285  					if err := d.Close(); err != nil {
  1286  						return err.Error()
  1287  					}
  1288  				}
  1289  
  1290  				mem = vfs.NewMem()
  1291  				opts := (&Options{
  1292  					FS:                          mem,
  1293  					DebugCheck:                  DebugCheckLevels,
  1294  					EventListener:               compactionLogEventListener,
  1295  					FormatMajorVersion:          randVersion(minVersion, maxVersion),
  1296  					DisableAutomaticCompactions: true,
  1297  				}).WithFSDefaults()
  1298  
  1299  				var err error
  1300  				if d, err = runDBDefineCmd(td, opts); err != nil {
  1301  					return err.Error()
  1302  				}
  1303  
  1304  				s := d.mu.versions.currentVersion().String()
  1305  				if verbose {
  1306  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1307  				}
  1308  				return s
  1309  
  1310  			case "file-sizes":
  1311  				return runTableFileSizesCmd(td, d)
  1312  
  1313  			case "flush":
  1314  				if err := d.Flush(); err != nil {
  1315  					return err.Error()
  1316  				}
  1317  				d.mu.Lock()
  1318  				s := d.mu.versions.currentVersion().String()
  1319  				if verbose {
  1320  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1321  				}
  1322  				d.mu.Unlock()
  1323  				return s
  1324  
  1325  			case "ingest":
  1326  				if err := runIngestCmd(td, d, mem); err != nil {
  1327  					return err.Error()
  1328  				}
  1329  				d.mu.Lock()
  1330  				s := d.mu.versions.currentVersion().String()
  1331  				if verbose {
  1332  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1333  				}
  1334  				d.mu.Unlock()
  1335  				return s
  1336  
  1337  			case "iter":
  1338  				// TODO(peter): runDBDefineCmd doesn't properly update the visible
  1339  				// sequence number. So we have to use a snapshot with a very large
  1340  				// sequence number, otherwise the DB appears empty.
  1341  				snap := Snapshot{
  1342  					db:     d,
  1343  					seqNum: InternalKeySeqNumMax,
  1344  				}
  1345  				iter, _ := snap.NewIter(nil)
  1346  				return runIterCmd(td, iter, true)
  1347  
  1348  			case "lsm":
  1349  				return runLSMCmd(td, d)
  1350  
  1351  			case "populate":
  1352  				b := d.NewBatch()
  1353  				runPopulateCmd(t, td, b)
  1354  				count := b.Count()
  1355  				require.NoError(t, b.Commit(nil))
  1356  				return fmt.Sprintf("wrote %d keys\n", count)
  1357  
  1358  			case "async-compact":
  1359  				var s string
  1360  				ch := make(chan error, 1)
  1361  				go func() {
  1362  					if err := runCompactCmd(td, d); err != nil {
  1363  						ch <- err
  1364  						close(ch)
  1365  						return
  1366  					}
  1367  					d.mu.Lock()
  1368  					s = d.mu.versions.currentVersion().String()
  1369  					d.mu.Unlock()
  1370  					close(ch)
  1371  				}()
  1372  
  1373  				manualDone := func() bool {
  1374  					select {
  1375  					case <-ch:
  1376  						return true
  1377  					default:
  1378  						return false
  1379  					}
  1380  				}
  1381  
  1382  				err := try(100*time.Microsecond, 20*time.Second, func() error {
  1383  					if manualDone() {
  1384  						return nil
  1385  					}
  1386  
  1387  					d.mu.Lock()
  1388  					defer d.mu.Unlock()
  1389  					if len(d.mu.compact.manual) == 0 {
  1390  						return errors.New("no manual compaction queued")
  1391  					}
  1392  					manual := d.mu.compact.manual[0]
  1393  					if manual.retries == 0 {
  1394  						return errors.New("manual compaction has not been retried")
  1395  					}
  1396  					return nil
  1397  				})
  1398  				if err != nil {
  1399  					return err.Error()
  1400  				}
  1401  
  1402  				if manualDone() {
  1403  					return "manual compaction did not block for ongoing\n" + s
  1404  				}
  1405  
  1406  				d.mu.Lock()
  1407  				deleteOngoingCompaction(ongoingCompaction)
  1408  				ongoingCompaction = nil
  1409  				d.maybeScheduleCompaction()
  1410  				d.mu.Unlock()
  1411  				if err := <-ch; err != nil {
  1412  					return err.Error()
  1413  				}
  1414  				return "manual compaction blocked until ongoing finished\n" + s
  1415  
  1416  			case "add-ongoing-compaction":
  1417  				var startLevel int
  1418  				var outputLevel int
  1419  				var start string
  1420  				var end string
  1421  				td.ScanArgs(t, "startLevel", &startLevel)
  1422  				td.ScanArgs(t, "outputLevel", &outputLevel)
  1423  				td.ScanArgs(t, "start", &start)
  1424  				td.ScanArgs(t, "end", &end)
  1425  				d.mu.Lock()
  1426  				ongoingCompaction = createOngoingCompaction([]byte(start), []byte(end), startLevel, outputLevel)
  1427  				d.mu.Unlock()
  1428  				return ""
  1429  
  1430  			case "remove-ongoing-compaction":
  1431  				d.mu.Lock()
  1432  				deleteOngoingCompaction(ongoingCompaction)
  1433  				ongoingCompaction = nil
  1434  				d.mu.Unlock()
  1435  				return ""
  1436  
  1437  			case "set-concurrent-compactions":
  1438  				var concurrentCompactions int
  1439  				td.ScanArgs(t, "num", &concurrentCompactions)
  1440  				d.opts.MaxConcurrentCompactions = func() int {
  1441  					return concurrentCompactions
  1442  				}
  1443  				return ""
  1444  
  1445  			case "sstable-properties":
  1446  				return runSSTablePropertiesCmd(t, td, d)
  1447  
  1448  			case "wait-pending-table-stats":
  1449  				return runTableStatsCmd(td, d)
  1450  
  1451  			case "close-snapshots":
  1452  				d.mu.Lock()
  1453  				// Re-enable automatic compactions if they were disabled so that
  1454  				// closing snapshots can trigger elision-only compactions if
  1455  				// necessary.
  1456  				d.opts.DisableAutomaticCompactions = false
  1457  
  1458  				var ss []*Snapshot
  1459  				l := &d.mu.snapshots
  1460  				for i := l.root.next; i != &l.root; i = i.next {
  1461  					ss = append(ss, i)
  1462  				}
  1463  				d.mu.Unlock()
  1464  				for i := range ss {
  1465  					if err := ss[i].Close(); err != nil {
  1466  						return err.Error()
  1467  					}
  1468  				}
  1469  				return ""
  1470  
  1471  			case "compaction-log":
  1472  				defer compactionLog.Reset()
  1473  				return compactionLog.String()
  1474  
  1475  			default:
  1476  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  1477  			}
  1478  		})
  1479  	}
  1480  
  1481  	testCases := []struct {
  1482  		testData   string
  1483  		minVersion FormatMajorVersion
  1484  		maxVersion FormatMajorVersion // inclusive
  1485  		verbose    bool
  1486  	}{
  1487  		{
  1488  			testData:   "testdata/manual_compaction",
  1489  			minVersion: FormatMostCompatible,
  1490  			maxVersion: FormatSetWithDelete - 1,
  1491  		},
  1492  		{
  1493  			testData:   "testdata/manual_compaction_set_with_del",
  1494  			minVersion: FormatBlockPropertyCollector,
  1495  			// This test exercises split user keys.
  1496  			maxVersion: FormatSplitUserKeysMarkedCompacted - 1,
  1497  		},
  1498  		{
  1499  			testData:   "testdata/singledel_manual_compaction",
  1500  			minVersion: FormatMostCompatible,
  1501  			maxVersion: FormatSetWithDelete - 1,
  1502  		},
  1503  		{
  1504  			testData:   "testdata/singledel_manual_compaction_set_with_del",
  1505  			minVersion: FormatSetWithDelete,
  1506  			maxVersion: internalFormatNewest,
  1507  		},
  1508  		{
  1509  			testData:   "testdata/manual_compaction_range_keys",
  1510  			minVersion: FormatRangeKeys,
  1511  			maxVersion: internalFormatNewest,
  1512  			verbose:    true,
  1513  		},
  1514  		{
  1515  			testData:   "testdata/manual_compaction_file_boundaries",
  1516  			minVersion: FormatBlockPropertyCollector,
  1517  			// This test exercises split user keys.
  1518  			maxVersion: FormatSplitUserKeysMarkedCompacted - 1,
  1519  		},
  1520  		{
  1521  			testData:   "testdata/manual_compaction_file_boundaries_delsized",
  1522  			minVersion: FormatDeleteSizedAndObsolete,
  1523  			maxVersion: internalFormatNewest,
  1524  		},
  1525  		{
  1526  			testData:   "testdata/manual_compaction_set_with_del_sstable_Pebblev4",
  1527  			minVersion: FormatDeleteSizedAndObsolete,
  1528  			maxVersion: internalFormatNewest,
  1529  		},
  1530  		{
  1531  			testData:   "testdata/manual_compaction_multilevel",
  1532  			minVersion: FormatMostCompatible,
  1533  			maxVersion: internalFormatNewest,
  1534  		},
  1535  	}
  1536  
  1537  	for _, tc := range testCases {
  1538  		t.Run(tc.testData, func(t *testing.T) {
  1539  			runTest(t, tc.testData, tc.minVersion, tc.maxVersion, tc.verbose)
  1540  		})
  1541  	}
  1542  }
  1543  
  1544  func TestCompactionFindGrandparentLimit(t *testing.T) {
  1545  	cmp := DefaultComparer.Compare
  1546  	var grandparents []*fileMetadata
  1547  
  1548  	var fileNum base.FileNum
  1549  	parseMeta := func(s string) *fileMetadata {
  1550  		parts := strings.Split(s, "-")
  1551  		if len(parts) != 2 {
  1552  			t.Fatalf("malformed table spec: %s", s)
  1553  		}
  1554  		fileNum++
  1555  		m := (&fileMetadata{
  1556  			FileNum: fileNum,
  1557  		}).ExtendPointKeyBounds(
  1558  			cmp,
  1559  			InternalKey{UserKey: []byte(parts[0])},
  1560  			InternalKey{UserKey: []byte(parts[1])},
  1561  		)
  1562  		m.InitPhysicalBacking()
  1563  		return m
  1564  	}
  1565  
  1566  	datadriven.RunTest(t, "testdata/compaction_find_grandparent_limit",
  1567  		func(t *testing.T, d *datadriven.TestData) string {
  1568  			switch d.Cmd {
  1569  			case "define":
  1570  				grandparents = nil
  1571  				if len(d.Input) == 0 {
  1572  					return ""
  1573  				}
  1574  				for _, data := range strings.Split(d.Input, "\n") {
  1575  					parts := strings.Fields(data)
  1576  					if len(parts) != 2 {
  1577  						return fmt.Sprintf("malformed test:\n%s", d.Input)
  1578  					}
  1579  
  1580  					meta := parseMeta(parts[0])
  1581  					var err error
  1582  					meta.Size, err = strconv.ParseUint(parts[1], 10, 64)
  1583  					if err != nil {
  1584  						return err.Error()
  1585  					}
  1586  					grandparents = append(grandparents, meta)
  1587  				}
  1588  				return ""
  1589  
  1590  			case "compact":
  1591  				c := &compaction{
  1592  					cmp:          cmp,
  1593  					equal:        DefaultComparer.Equal,
  1594  					comparer:     DefaultComparer,
  1595  					grandparents: manifest.NewLevelSliceKeySorted(cmp, grandparents),
  1596  				}
  1597  				if len(d.CmdArgs) != 1 {
  1598  					return fmt.Sprintf("%s expects 1 argument", d.Cmd)
  1599  				}
  1600  				if len(d.CmdArgs[0].Vals) != 1 {
  1601  					return fmt.Sprintf("%s expects 1 value", d.CmdArgs[0].Key)
  1602  				}
  1603  				var err error
  1604  				c.maxOverlapBytes, err = strconv.ParseUint(d.CmdArgs[0].Vals[0], 10, 64)
  1605  				if err != nil {
  1606  					return err.Error()
  1607  				}
  1608  
  1609  				var buf bytes.Buffer
  1610  				var smallest, largest string
  1611  				var grandparentLimit []byte
  1612  				for i, key := range strings.Fields(d.Input) {
  1613  					if i == 0 {
  1614  						smallest = key
  1615  						grandparentLimit = c.findGrandparentLimit([]byte(key))
  1616  					}
  1617  					if grandparentLimit != nil && c.cmp(grandparentLimit, []byte(key)) < 0 {
  1618  						fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1619  						smallest = key
  1620  						grandparentLimit = c.findGrandparentLimit([]byte(key))
  1621  					}
  1622  					largest = key
  1623  				}
  1624  				fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1625  				return buf.String()
  1626  
  1627  			default:
  1628  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1629  			}
  1630  		})
  1631  }
  1632  
  1633  func TestCompactionFindL0Limit(t *testing.T) {
  1634  	cmp := DefaultComparer.Compare
  1635  
  1636  	fileNumCounter := 1
  1637  	parseMeta := func(s string) (*fileMetadata, error) {
  1638  		fields := strings.Fields(s)
  1639  		parts := strings.Split(fields[0], "-")
  1640  		if len(parts) != 2 {
  1641  			return nil, errors.Errorf("malformed table spec: %s", s)
  1642  		}
  1643  		m := (&fileMetadata{
  1644  			FileNum: base.FileNum(fileNumCounter),
  1645  		}).ExtendPointKeyBounds(
  1646  			cmp,
  1647  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  1648  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  1649  		)
  1650  		fileNumCounter++
  1651  		m.SmallestSeqNum = m.Smallest.SeqNum()
  1652  		m.LargestSeqNum = m.Largest.SeqNum()
  1653  
  1654  		for _, field := range fields[1:] {
  1655  			parts := strings.Split(field, "=")
  1656  			switch parts[0] {
  1657  			case "size":
  1658  				size, err := strconv.ParseUint(parts[1], 10, 64)
  1659  				if err != nil {
  1660  					t.Fatal(err)
  1661  				}
  1662  				m.Size = size
  1663  			}
  1664  		}
  1665  		m.InitPhysicalBacking()
  1666  		return m, nil
  1667  	}
  1668  
  1669  	var vers *version
  1670  	flushSplitBytes := int64(0)
  1671  
  1672  	datadriven.RunTest(t, "testdata/compaction_find_l0_limit",
  1673  		func(t *testing.T, d *datadriven.TestData) string {
  1674  			switch d.Cmd {
  1675  			case "define":
  1676  				fileMetas := [manifest.NumLevels][]*fileMetadata{}
  1677  				baseLevel := manifest.NumLevels - 1
  1678  				level := 0
  1679  				d.MaybeScanArgs(t, "flush_split_bytes", &flushSplitBytes)
  1680  
  1681  				var err error
  1682  				for _, data := range strings.Split(d.Input, "\n") {
  1683  					data = strings.TrimSpace(data)
  1684  					switch data {
  1685  					case "L0", "L1", "L2", "L3", "L4", "L5", "L6":
  1686  						level, err = strconv.Atoi(data[1:])
  1687  						if err != nil {
  1688  							return err.Error()
  1689  						}
  1690  					default:
  1691  						meta, err := parseMeta(data)
  1692  						if err != nil {
  1693  							return err.Error()
  1694  						}
  1695  						if level != 0 && level < baseLevel {
  1696  							baseLevel = level
  1697  						}
  1698  						fileMetas[level] = append(fileMetas[level], meta)
  1699  					}
  1700  				}
  1701  
  1702  				vers = manifest.NewVersion(DefaultComparer.Compare, base.DefaultFormatter, flushSplitBytes, fileMetas)
  1703  				flushSplitKeys := vers.L0Sublevels.FlushSplitKeys()
  1704  
  1705  				var buf strings.Builder
  1706  				buf.WriteString(vers.String())
  1707  				buf.WriteString("flush split keys:\n")
  1708  				for _, key := range flushSplitKeys {
  1709  					fmt.Fprintf(&buf, "\t%s\n", base.DefaultFormatter(key))
  1710  				}
  1711  
  1712  				return buf.String()
  1713  
  1714  			case "flush":
  1715  				c := &compaction{
  1716  					cmp:      cmp,
  1717  					equal:    DefaultComparer.Equal,
  1718  					comparer: DefaultComparer,
  1719  					version:  vers,
  1720  					l0Limits: vers.L0Sublevels.FlushSplitKeys(),
  1721  					inputs:   []compactionLevel{{level: -1}, {level: 0}},
  1722  				}
  1723  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  1724  
  1725  				var buf bytes.Buffer
  1726  				var smallest, largest string
  1727  				var l0Limit []byte
  1728  				for i, key := range strings.Fields(d.Input) {
  1729  					if i == 0 {
  1730  						smallest = key
  1731  						l0Limit = c.findL0Limit([]byte(key))
  1732  					}
  1733  					if l0Limit != nil && c.cmp(l0Limit, []byte(key)) < 0 {
  1734  						fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1735  						smallest = key
  1736  						l0Limit = c.findL0Limit([]byte(key))
  1737  					}
  1738  					largest = key
  1739  				}
  1740  				fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1741  				return buf.String()
  1742  
  1743  			default:
  1744  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1745  			}
  1746  		})
  1747  }
  1748  
  1749  func TestCompactionOutputLevel(t *testing.T) {
  1750  	opts := (*Options)(nil).EnsureDefaults()
  1751  	version := &version{}
  1752  
  1753  	datadriven.RunTest(t, "testdata/compaction_output_level",
  1754  		func(t *testing.T, d *datadriven.TestData) (res string) {
  1755  			defer func() {
  1756  				if r := recover(); r != nil {
  1757  					res = fmt.Sprintln(r)
  1758  				}
  1759  			}()
  1760  
  1761  			switch d.Cmd {
  1762  			case "compact":
  1763  				var start, base int
  1764  				d.ScanArgs(t, "start", &start)
  1765  				d.ScanArgs(t, "base", &base)
  1766  				pc := newPickedCompaction(opts, version, start, defaultOutputLevel(start, base), base)
  1767  				c := newCompaction(pc, opts, time.Now(), nil /* provider */)
  1768  				return fmt.Sprintf("output=%d\nmax-output-file-size=%d\n",
  1769  					c.outputLevel.level, c.maxOutputFileSize)
  1770  
  1771  			default:
  1772  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1773  			}
  1774  		})
  1775  }
  1776  
  1777  func TestCompactionAtomicUnitBounds(t *testing.T) {
  1778  	cmp := DefaultComparer.Compare
  1779  	var files manifest.LevelSlice
  1780  
  1781  	parseMeta := func(s string) *fileMetadata {
  1782  		parts := strings.Split(s, "-")
  1783  		if len(parts) != 2 {
  1784  			t.Fatalf("malformed table spec: %s", s)
  1785  		}
  1786  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  1787  			cmp,
  1788  			base.ParseInternalKey(parts[0]),
  1789  			base.ParseInternalKey(parts[1]),
  1790  		)
  1791  		m.InitPhysicalBacking()
  1792  		return m
  1793  	}
  1794  
  1795  	datadriven.RunTest(t, "testdata/compaction_atomic_unit_bounds",
  1796  		func(t *testing.T, d *datadriven.TestData) string {
  1797  			switch d.Cmd {
  1798  			case "define":
  1799  				files = manifest.LevelSlice{}
  1800  				if len(d.Input) == 0 {
  1801  					return ""
  1802  				}
  1803  				var ff []*fileMetadata
  1804  				for _, data := range strings.Split(d.Input, "\n") {
  1805  					meta := parseMeta(data)
  1806  					meta.FileNum = FileNum(len(ff))
  1807  					ff = append(ff, meta)
  1808  				}
  1809  				files = manifest.NewLevelSliceKeySorted(cmp, ff)
  1810  				return ""
  1811  
  1812  			case "atomic-unit-bounds":
  1813  				c := &compaction{
  1814  					cmp:      cmp,
  1815  					equal:    DefaultComparer.Equal,
  1816  					comparer: DefaultComparer,
  1817  					inputs:   []compactionLevel{{files: files}, {}},
  1818  				}
  1819  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  1820  				if len(d.CmdArgs) != 1 {
  1821  					return fmt.Sprintf("%s expects 1 argument", d.Cmd)
  1822  				}
  1823  				index, err := strconv.ParseInt(d.CmdArgs[0].String(), 10, 64)
  1824  				if err != nil {
  1825  					return err.Error()
  1826  				}
  1827  				iter := files.Iter()
  1828  				// Advance iter to `index`.
  1829  				_ = iter.First()
  1830  				for i := int64(0); i < index; i++ {
  1831  					_ = iter.Next()
  1832  				}
  1833  				atomicUnit, _ := expandToAtomicUnit(c.cmp, iter.Take().Slice(), true /* disableIsCompacting */)
  1834  				lower, upper := manifest.KeyRange(c.cmp, atomicUnit.Iter())
  1835  				return fmt.Sprintf("%s-%s\n", lower.UserKey, upper.UserKey)
  1836  
  1837  			default:
  1838  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1839  			}
  1840  		})
  1841  }
  1842  
  1843  func TestCompactionDeleteOnlyHints(t *testing.T) {
  1844  	parseUint64 := func(s string) uint64 {
  1845  		v, err := strconv.ParseUint(s, 10, 64)
  1846  		require.NoError(t, err)
  1847  		return v
  1848  	}
  1849  	var d *DB
  1850  	defer func() {
  1851  		if d != nil {
  1852  			require.NoError(t, closeAllSnapshots(d))
  1853  			require.NoError(t, d.Close())
  1854  		}
  1855  	}()
  1856  
  1857  	var compactInfo *CompactionInfo // protected by d.mu
  1858  	reset := func() (*Options, error) {
  1859  		if d != nil {
  1860  			compactInfo = nil
  1861  			if err := closeAllSnapshots(d); err != nil {
  1862  				return nil, err
  1863  			}
  1864  			if err := d.Close(); err != nil {
  1865  				return nil, err
  1866  			}
  1867  		}
  1868  		opts := (&Options{
  1869  			FS:         vfs.NewMem(),
  1870  			DebugCheck: DebugCheckLevels,
  1871  			EventListener: &EventListener{
  1872  				CompactionEnd: func(info CompactionInfo) {
  1873  					if compactInfo != nil {
  1874  						return
  1875  					}
  1876  					compactInfo = &info
  1877  				},
  1878  			},
  1879  			FormatMajorVersion: internalFormatNewest,
  1880  		}).WithFSDefaults()
  1881  
  1882  		// Collection of table stats can trigger compactions. As we want full
  1883  		// control over when compactions are run, disable stats by default.
  1884  		opts.private.disableTableStats = true
  1885  
  1886  		return opts, nil
  1887  	}
  1888  
  1889  	compactionString := func() string {
  1890  		for d.mu.compact.compactingCount > 0 {
  1891  			d.mu.compact.cond.Wait()
  1892  		}
  1893  
  1894  		s := "(none)"
  1895  		if compactInfo != nil {
  1896  			// Fix the job ID and durations for determinism.
  1897  			compactInfo.JobID = 100
  1898  			compactInfo.Duration = time.Second
  1899  			compactInfo.TotalDuration = 2 * time.Second
  1900  			s = compactInfo.String()
  1901  			compactInfo = nil
  1902  		}
  1903  		return s
  1904  	}
  1905  
  1906  	var err error
  1907  	var opts *Options
  1908  	datadriven.RunTest(t, "testdata/compaction_delete_only_hints",
  1909  		func(t *testing.T, td *datadriven.TestData) string {
  1910  			switch td.Cmd {
  1911  			case "define":
  1912  				opts, err = reset()
  1913  				if err != nil {
  1914  					return err.Error()
  1915  				}
  1916  				d, err = runDBDefineCmd(td, opts)
  1917  				if err != nil {
  1918  					return err.Error()
  1919  				}
  1920  				d.mu.Lock()
  1921  				s := d.mu.versions.currentVersion().String()
  1922  				d.mu.Unlock()
  1923  				return s
  1924  
  1925  			case "force-set-hints":
  1926  				d.mu.Lock()
  1927  				defer d.mu.Unlock()
  1928  				d.mu.compact.deletionHints = d.mu.compact.deletionHints[:0]
  1929  				var buf bytes.Buffer
  1930  				for _, data := range strings.Split(td.Input, "\n") {
  1931  					parts := strings.FieldsFunc(strings.TrimSpace(data),
  1932  						func(r rune) bool { return r == '-' || r == ' ' || r == '.' })
  1933  
  1934  					start, end := []byte(parts[2]), []byte(parts[3])
  1935  
  1936  					var tombstoneFile *fileMetadata
  1937  					tombstoneLevel := int(parseUint64(parts[0][1:]))
  1938  
  1939  					// Set file number to the value provided in the input.
  1940  					tombstoneFile = &fileMetadata{
  1941  						FileNum: base.FileNum(parseUint64(parts[1])),
  1942  					}
  1943  
  1944  					var hintType deleteCompactionHintType
  1945  					switch typ := parts[7]; typ {
  1946  					case "point_key_only":
  1947  						hintType = deleteCompactionHintTypePointKeyOnly
  1948  					case "range_key_only":
  1949  						hintType = deleteCompactionHintTypeRangeKeyOnly
  1950  					case "point_and_range_key":
  1951  						hintType = deleteCompactionHintTypePointAndRangeKey
  1952  					default:
  1953  						return fmt.Sprintf("unknown hint type: %s", typ)
  1954  					}
  1955  
  1956  					h := deleteCompactionHint{
  1957  						hintType:                hintType,
  1958  						start:                   start,
  1959  						end:                     end,
  1960  						fileSmallestSeqNum:      parseUint64(parts[4]),
  1961  						tombstoneLevel:          tombstoneLevel,
  1962  						tombstoneFile:           tombstoneFile,
  1963  						tombstoneSmallestSeqNum: parseUint64(parts[5]),
  1964  						tombstoneLargestSeqNum:  parseUint64(parts[6]),
  1965  					}
  1966  					d.mu.compact.deletionHints = append(d.mu.compact.deletionHints, h)
  1967  					fmt.Fprintln(&buf, h.String())
  1968  				}
  1969  				return buf.String()
  1970  
  1971  			case "get-hints":
  1972  				d.mu.Lock()
  1973  				defer d.mu.Unlock()
  1974  
  1975  				// Force collection of table stats. This requires re-enabling the
  1976  				// collection flag. We also do not want compactions to run as part of
  1977  				// the stats collection job, so we disable it temporarily.
  1978  				d.opts.private.disableTableStats = false
  1979  				d.opts.DisableAutomaticCompactions = true
  1980  				defer func() {
  1981  					d.opts.private.disableTableStats = true
  1982  					d.opts.DisableAutomaticCompactions = false
  1983  				}()
  1984  
  1985  				// NB: collectTableStats attempts to acquire the lock. Temporarily
  1986  				// unlock here to avoid a deadlock.
  1987  				d.mu.Unlock()
  1988  				didRun := d.collectTableStats()
  1989  				d.mu.Lock()
  1990  
  1991  				if !didRun {
  1992  					// If a job was already running, wait for the results.
  1993  					d.waitTableStats()
  1994  				}
  1995  
  1996  				hints := d.mu.compact.deletionHints
  1997  				if len(hints) == 0 {
  1998  					return "(none)"
  1999  				}
  2000  				var buf bytes.Buffer
  2001  				for _, h := range hints {
  2002  					buf.WriteString(h.String() + "\n")
  2003  				}
  2004  				return buf.String()
  2005  
  2006  			case "maybe-compact":
  2007  				d.mu.Lock()
  2008  				d.maybeScheduleCompaction()
  2009  
  2010  				var buf bytes.Buffer
  2011  				fmt.Fprintf(&buf, "Deletion hints:\n")
  2012  				for _, h := range d.mu.compact.deletionHints {
  2013  					fmt.Fprintf(&buf, "  %s\n", h.String())
  2014  				}
  2015  				if len(d.mu.compact.deletionHints) == 0 {
  2016  					fmt.Fprintf(&buf, "  (none)\n")
  2017  				}
  2018  				fmt.Fprintf(&buf, "Compactions:\n")
  2019  				fmt.Fprintf(&buf, "  %s", compactionString())
  2020  				d.mu.Unlock()
  2021  				return buf.String()
  2022  
  2023  			case "compact":
  2024  				if err := runCompactCmd(td, d); err != nil {
  2025  					return err.Error()
  2026  				}
  2027  				d.mu.Lock()
  2028  				compactInfo = nil
  2029  				s := d.mu.versions.currentVersion().String()
  2030  				d.mu.Unlock()
  2031  				return s
  2032  
  2033  			case "close-snapshot":
  2034  				seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64)
  2035  				if err != nil {
  2036  					return err.Error()
  2037  				}
  2038  				d.mu.Lock()
  2039  				var s *Snapshot
  2040  				l := &d.mu.snapshots
  2041  				for i := l.root.next; i != &l.root; i = i.next {
  2042  					if i.seqNum == seqNum {
  2043  						s = i
  2044  					}
  2045  				}
  2046  				d.mu.Unlock()
  2047  				if s == nil {
  2048  					return "(not found)"
  2049  				} else if err := s.Close(); err != nil {
  2050  					return err.Error()
  2051  				}
  2052  
  2053  				d.mu.Lock()
  2054  				// Closing the snapshot may have triggered a compaction.
  2055  				str := compactionString()
  2056  				d.mu.Unlock()
  2057  				return str
  2058  
  2059  			case "iter":
  2060  				snap := Snapshot{
  2061  					db:     d,
  2062  					seqNum: InternalKeySeqNumMax,
  2063  				}
  2064  				iter, _ := snap.NewIter(nil)
  2065  				return runIterCmd(td, iter, true)
  2066  
  2067  			case "reset":
  2068  				opts, err = reset()
  2069  				if err != nil {
  2070  					return err.Error()
  2071  				}
  2072  				d, err = Open("", opts)
  2073  				if err != nil {
  2074  					return err.Error()
  2075  				}
  2076  				return ""
  2077  
  2078  			case "ingest":
  2079  				if err = runBuildCmd(td, d, d.opts.FS); err != nil {
  2080  					return err.Error()
  2081  				}
  2082  				if err = runIngestCmd(td, d, d.opts.FS); err != nil {
  2083  					return err.Error()
  2084  				}
  2085  				return "OK"
  2086  
  2087  			case "describe-lsm":
  2088  				d.mu.Lock()
  2089  				s := d.mu.versions.currentVersion().String()
  2090  				d.mu.Unlock()
  2091  				return s
  2092  
  2093  			default:
  2094  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2095  			}
  2096  		})
  2097  }
  2098  
  2099  func TestCompactionTombstones(t *testing.T) {
  2100  	var d *DB
  2101  	defer func() {
  2102  		if d != nil {
  2103  			require.NoError(t, closeAllSnapshots(d))
  2104  			require.NoError(t, d.Close())
  2105  		}
  2106  	}()
  2107  
  2108  	var compactInfo *CompactionInfo // protected by d.mu
  2109  
  2110  	compactionString := func() string {
  2111  		for d.mu.compact.compactingCount > 0 {
  2112  			d.mu.compact.cond.Wait()
  2113  		}
  2114  
  2115  		s := "(none)"
  2116  		if compactInfo != nil {
  2117  			// Fix the job ID and durations for determinism.
  2118  			compactInfo.JobID = 100
  2119  			compactInfo.Duration = time.Second
  2120  			compactInfo.TotalDuration = 2 * time.Second
  2121  			s = compactInfo.String()
  2122  			compactInfo = nil
  2123  		}
  2124  		return s
  2125  	}
  2126  
  2127  	datadriven.RunTest(t, "testdata/compaction_tombstones",
  2128  		func(t *testing.T, td *datadriven.TestData) string {
  2129  			switch td.Cmd {
  2130  			case "define":
  2131  				if d != nil {
  2132  					compactInfo = nil
  2133  					require.NoError(t, closeAllSnapshots(d))
  2134  					if err := d.Close(); err != nil {
  2135  						return err.Error()
  2136  					}
  2137  				}
  2138  				opts := (&Options{
  2139  					FS:         vfs.NewMem(),
  2140  					DebugCheck: DebugCheckLevels,
  2141  					EventListener: &EventListener{
  2142  						CompactionEnd: func(info CompactionInfo) {
  2143  							compactInfo = &info
  2144  						},
  2145  					},
  2146  					FormatMajorVersion: internalFormatNewest,
  2147  				}).WithFSDefaults()
  2148  				var err error
  2149  				d, err = runDBDefineCmd(td, opts)
  2150  				if err != nil {
  2151  					return err.Error()
  2152  				}
  2153  				d.mu.Lock()
  2154  				s := d.mu.versions.currentVersion().String()
  2155  				d.mu.Unlock()
  2156  				return s
  2157  
  2158  			case "maybe-compact":
  2159  				d.mu.Lock()
  2160  				d.opts.DisableAutomaticCompactions = false
  2161  				d.maybeScheduleCompaction()
  2162  				s := compactionString()
  2163  				d.mu.Unlock()
  2164  				return s
  2165  
  2166  			case "wait-pending-table-stats":
  2167  				return runTableStatsCmd(td, d)
  2168  
  2169  			case "close-snapshot":
  2170  				seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64)
  2171  				if err != nil {
  2172  					return err.Error()
  2173  				}
  2174  				d.mu.Lock()
  2175  				var s *Snapshot
  2176  				l := &d.mu.snapshots
  2177  				for i := l.root.next; i != &l.root; i = i.next {
  2178  					if i.seqNum == seqNum {
  2179  						s = i
  2180  					}
  2181  				}
  2182  				d.mu.Unlock()
  2183  				if s == nil {
  2184  					return "(not found)"
  2185  				} else if err := s.Close(); err != nil {
  2186  					return err.Error()
  2187  				}
  2188  
  2189  				d.mu.Lock()
  2190  				// Closing the snapshot may have triggered a compaction.
  2191  				str := compactionString()
  2192  				d.mu.Unlock()
  2193  				return str
  2194  
  2195  			case "close":
  2196  				if err := d.Close(); err != nil {
  2197  					return err.Error()
  2198  				}
  2199  				d = nil
  2200  				return ""
  2201  
  2202  			case "version":
  2203  				d.mu.Lock()
  2204  				s := d.mu.versions.currentVersion().String()
  2205  				d.mu.Unlock()
  2206  				return s
  2207  
  2208  			default:
  2209  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2210  			}
  2211  		})
  2212  }
  2213  
  2214  func closeAllSnapshots(d *DB) error {
  2215  	d.mu.Lock()
  2216  	var ss []*Snapshot
  2217  	l := &d.mu.snapshots
  2218  	for i := l.root.next; i != &l.root; i = i.next {
  2219  		ss = append(ss, i)
  2220  	}
  2221  	d.mu.Unlock()
  2222  	for i := range ss {
  2223  		if err := ss[i].Close(); err != nil {
  2224  			return err
  2225  		}
  2226  	}
  2227  	return nil
  2228  }
  2229  
  2230  func TestCompactionReadTriggeredQueue(t *testing.T) {
  2231  
  2232  	// Convert a read compaction to a string which this test
  2233  	// understands.
  2234  	showRC := func(rc *readCompaction) string {
  2235  		return fmt.Sprintf(
  2236  			"L%d: %s-%s %d\n", rc.level, string(rc.start), string(rc.end), rc.fileNum,
  2237  		)
  2238  	}
  2239  
  2240  	var queue *readCompactionQueue
  2241  
  2242  	datadriven.RunTest(t, "testdata/read_compaction_queue",
  2243  		func(t *testing.T, td *datadriven.TestData) string {
  2244  			switch td.Cmd {
  2245  			case "create":
  2246  				queue = &readCompactionQueue{}
  2247  				return "(success)"
  2248  			case "add-compaction":
  2249  				for _, line := range strings.Split(td.Input, "\n") {
  2250  					if line == "" {
  2251  						continue
  2252  					}
  2253  					parts := strings.Split(line, " ")
  2254  
  2255  					if len(parts) != 3 {
  2256  						return "error: malformed data for add-compaction. usage: <level>: <start>-<end> <filenum>"
  2257  					}
  2258  					if l, err := strconv.Atoi(parts[0][1:2]); err == nil {
  2259  						keys := strings.Split(parts[1], "-")
  2260  						fileNum, _ := strconv.Atoi(parts[2])
  2261  						rc := readCompaction{
  2262  							level:   l,
  2263  							start:   []byte(keys[0]),
  2264  							end:     []byte(keys[1]),
  2265  							fileNum: base.FileNum(fileNum),
  2266  						}
  2267  						queue.add(&rc, DefaultComparer.Compare)
  2268  					} else {
  2269  						return err.Error()
  2270  					}
  2271  				}
  2272  				return ""
  2273  			case "remove-compaction":
  2274  				rc := queue.remove()
  2275  				if rc == nil {
  2276  					return "(nil)"
  2277  				}
  2278  				return showRC(rc)
  2279  			case "print-size":
  2280  				// Print the size of the queue.
  2281  				return fmt.Sprintf("%d", queue.size)
  2282  			case "print-queue":
  2283  				// Print each element of the queue on a separate line.
  2284  				var sb strings.Builder
  2285  				if queue.size == 0 {
  2286  					sb.WriteString("(empty)")
  2287  				}
  2288  
  2289  				for i := 0; i < queue.size; i++ {
  2290  					rc := queue.at(i)
  2291  					sb.WriteString(showRC(rc))
  2292  				}
  2293  				return sb.String()
  2294  			default:
  2295  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2296  			}
  2297  		},
  2298  	)
  2299  }
  2300  
  2301  func (qu *readCompactionQueue) at(i int) *readCompaction {
  2302  	if i >= qu.size {
  2303  		return nil
  2304  	}
  2305  
  2306  	return qu.queue[i]
  2307  }
  2308  
  2309  func TestCompactionReadTriggered(t *testing.T) {
  2310  	var d *DB
  2311  	defer func() {
  2312  		if d != nil {
  2313  			require.NoError(t, d.Close())
  2314  		}
  2315  	}()
  2316  
  2317  	var compactInfo *CompactionInfo // protected by d.mu
  2318  
  2319  	compactionString := func() string {
  2320  		for d.mu.compact.compactingCount > 0 {
  2321  			d.mu.compact.cond.Wait()
  2322  		}
  2323  
  2324  		s := "(none)"
  2325  		if compactInfo != nil {
  2326  			// Fix the job ID and durations for determinism.
  2327  			compactInfo.JobID = 100
  2328  			compactInfo.Duration = time.Second
  2329  			compactInfo.TotalDuration = 2 * time.Second
  2330  			s = compactInfo.String()
  2331  			compactInfo = nil
  2332  		}
  2333  		return s
  2334  	}
  2335  
  2336  	datadriven.RunTest(t, "testdata/compaction_read_triggered",
  2337  		func(t *testing.T, td *datadriven.TestData) string {
  2338  			switch td.Cmd {
  2339  			case "define":
  2340  				if d != nil {
  2341  					compactInfo = nil
  2342  					if err := d.Close(); err != nil {
  2343  						return err.Error()
  2344  					}
  2345  				}
  2346  				opts := (&Options{
  2347  					FS:         vfs.NewMem(),
  2348  					DebugCheck: DebugCheckLevels,
  2349  					EventListener: &EventListener{
  2350  						CompactionEnd: func(info CompactionInfo) {
  2351  							compactInfo = &info
  2352  						},
  2353  					},
  2354  				}).WithFSDefaults()
  2355  				var err error
  2356  				d, err = runDBDefineCmd(td, opts)
  2357  				if err != nil {
  2358  					return err.Error()
  2359  				}
  2360  				d.mu.Lock()
  2361  				s := d.mu.versions.currentVersion().String()
  2362  				d.mu.Unlock()
  2363  				return s
  2364  
  2365  			case "add-read-compaction":
  2366  				d.mu.Lock()
  2367  				td.MaybeScanArgs(t, "flushing", &d.mu.compact.flushing)
  2368  				for _, line := range strings.Split(td.Input, "\n") {
  2369  					if line == "" {
  2370  						continue
  2371  					}
  2372  					parts := strings.Split(line, " ")
  2373  					if len(parts) != 3 {
  2374  						return "error: malformed data for add-read-compaction. usage: <level>: <start>-<end> <filenum>"
  2375  					}
  2376  					if l, err := strconv.Atoi(parts[0][:1]); err == nil {
  2377  						keys := strings.Split(parts[1], "-")
  2378  						fileNum, _ := strconv.Atoi(parts[2])
  2379  						rc := readCompaction{
  2380  							level:   l,
  2381  							start:   []byte(keys[0]),
  2382  							end:     []byte(keys[1]),
  2383  							fileNum: base.FileNum(fileNum),
  2384  						}
  2385  						d.mu.compact.readCompactions.add(&rc, DefaultComparer.Compare)
  2386  					} else {
  2387  						return err.Error()
  2388  					}
  2389  				}
  2390  				d.mu.Unlock()
  2391  				return ""
  2392  
  2393  			case "show-read-compactions":
  2394  				d.mu.Lock()
  2395  				var sb strings.Builder
  2396  				if d.mu.compact.readCompactions.size == 0 {
  2397  					sb.WriteString("(none)")
  2398  				}
  2399  				for i := 0; i < d.mu.compact.readCompactions.size; i++ {
  2400  					rc := d.mu.compact.readCompactions.at(i)
  2401  					sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end)))
  2402  				}
  2403  				d.mu.Unlock()
  2404  				return sb.String()
  2405  
  2406  			case "maybe-compact":
  2407  				d.mu.Lock()
  2408  				d.opts.DisableAutomaticCompactions = false
  2409  				d.maybeScheduleCompaction()
  2410  				s := compactionString()
  2411  				d.mu.Unlock()
  2412  				return s
  2413  
  2414  			case "version":
  2415  				d.mu.Lock()
  2416  				s := d.mu.versions.currentVersion().String()
  2417  				d.mu.Unlock()
  2418  				return s
  2419  
  2420  			default:
  2421  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2422  			}
  2423  		})
  2424  }
  2425  
  2426  func TestCompactionInuseKeyRanges(t *testing.T) {
  2427  	cmp := DefaultComparer.Compare
  2428  	parseMeta := func(s string) *fileMetadata {
  2429  		parts := strings.Split(s, "-")
  2430  		if len(parts) != 2 {
  2431  			t.Fatalf("malformed table spec: %s", s)
  2432  		}
  2433  		m := (&fileMetadata{}).ExtendRangeKeyBounds(
  2434  			cmp,
  2435  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2436  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2437  		)
  2438  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2439  		m.LargestSeqNum = m.Largest.SeqNum()
  2440  		m.InitPhysicalBacking()
  2441  		return m
  2442  	}
  2443  
  2444  	opts := (*Options)(nil).EnsureDefaults()
  2445  
  2446  	var c *compaction
  2447  	datadriven.RunTest(t, "testdata/compaction_inuse_key_ranges", func(t *testing.T, td *datadriven.TestData) string {
  2448  		switch td.Cmd {
  2449  		case "define":
  2450  			c = &compaction{
  2451  				cmp:       DefaultComparer.Compare,
  2452  				equal:     DefaultComparer.Equal,
  2453  				comparer:  DefaultComparer,
  2454  				formatKey: DefaultComparer.FormatKey,
  2455  				inputs:    []compactionLevel{{}, {}},
  2456  			}
  2457  			c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2458  			var files [numLevels][]*fileMetadata
  2459  			var currentLevel int
  2460  			fileNum := FileNum(1)
  2461  
  2462  			for _, data := range strings.Split(td.Input, "\n") {
  2463  				switch data {
  2464  				case "L0", "L1", "L2", "L3", "L4", "L5", "L6":
  2465  					level, err := strconv.Atoi(data[1:])
  2466  					if err != nil {
  2467  						return err.Error()
  2468  					}
  2469  					currentLevel = level
  2470  
  2471  				default:
  2472  					meta := parseMeta(data)
  2473  					meta.FileNum = fileNum
  2474  					fileNum++
  2475  					files[currentLevel] = append(files[currentLevel], meta)
  2476  				}
  2477  			}
  2478  			c.version = newVersion(opts, files)
  2479  			return c.version.String()
  2480  
  2481  		case "inuse-key-ranges":
  2482  			var buf bytes.Buffer
  2483  			for _, line := range strings.Split(td.Input, "\n") {
  2484  				parts := strings.Fields(line)
  2485  				if len(parts) != 3 {
  2486  					fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q\n", line)
  2487  					continue
  2488  				}
  2489  				level, err := strconv.Atoi(parts[0])
  2490  				if err != nil {
  2491  					fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q: %v\n", line, err)
  2492  					continue
  2493  				}
  2494  				c.outputLevel.level = level
  2495  				c.smallest.UserKey = []byte(parts[1])
  2496  				c.largest.UserKey = []byte(parts[2])
  2497  
  2498  				c.inuseKeyRanges = nil
  2499  				c.setupInuseKeyRanges()
  2500  				if len(c.inuseKeyRanges) == 0 {
  2501  					fmt.Fprintf(&buf, ".\n")
  2502  				} else {
  2503  					for i, r := range c.inuseKeyRanges {
  2504  						if i > 0 {
  2505  							fmt.Fprintf(&buf, " ")
  2506  						}
  2507  						fmt.Fprintf(&buf, "%s-%s", r.Start, r.End)
  2508  					}
  2509  					fmt.Fprintf(&buf, "\n")
  2510  				}
  2511  			}
  2512  			return buf.String()
  2513  
  2514  		default:
  2515  			return fmt.Sprintf("unknown command: %s", td.Cmd)
  2516  		}
  2517  	})
  2518  }
  2519  
  2520  func TestCompactionInuseKeyRangesRandomized(t *testing.T) {
  2521  	var (
  2522  		fileNum     = FileNum(0)
  2523  		opts        = (*Options)(nil).EnsureDefaults()
  2524  		seed        = int64(time.Now().UnixNano())
  2525  		rng         = rand.New(rand.NewSource(seed))
  2526  		endKeyspace = 26 * 26
  2527  	)
  2528  	t.Logf("Using rng seed %d.", seed)
  2529  
  2530  	for iter := 0; iter < 100; iter++ {
  2531  		makeUserKey := func(i int) []byte {
  2532  			if i >= endKeyspace {
  2533  				i = endKeyspace - 1
  2534  			}
  2535  			return []byte{byte(i/26 + 'a'), byte(i%26 + 'a')}
  2536  		}
  2537  		makeIK := func(level, i int) InternalKey {
  2538  			return base.MakeInternalKey(
  2539  				makeUserKey(i),
  2540  				uint64(numLevels-level),
  2541  				base.InternalKeyKindSet,
  2542  			)
  2543  		}
  2544  		makeFile := func(level, start, end int) *fileMetadata {
  2545  			fileNum++
  2546  			m := (&fileMetadata{
  2547  				FileNum: fileNum,
  2548  			}).ExtendPointKeyBounds(
  2549  				opts.Comparer.Compare,
  2550  				makeIK(level, start),
  2551  				makeIK(level, end),
  2552  			)
  2553  			m.SmallestSeqNum = m.Smallest.SeqNum()
  2554  			m.LargestSeqNum = m.Largest.SeqNum()
  2555  			m.InitPhysicalBacking()
  2556  			return m
  2557  		}
  2558  		overlaps := func(startA, endA, startB, endB []byte) bool {
  2559  			disjoint := opts.Comparer.Compare(endB, startA) < 0 || opts.Comparer.Compare(endA, startB) < 0
  2560  			return !disjoint
  2561  		}
  2562  		var files [numLevels][]*fileMetadata
  2563  		for l := 0; l < numLevels; l++ {
  2564  			for i := 0; i < rand.Intn(10); i++ {
  2565  				s := rng.Intn(endKeyspace)
  2566  				maxWidth := rng.Intn(endKeyspace-s) + 1
  2567  				e := rng.Intn(maxWidth) + s
  2568  				sKey, eKey := makeUserKey(s), makeUserKey(e)
  2569  				// Discard the key range if it overlaps any existing files
  2570  				// within this level.
  2571  				var o bool
  2572  				for _, f := range files[l] {
  2573  					o = o || overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey)
  2574  				}
  2575  				if o {
  2576  					continue
  2577  				}
  2578  				files[l] = append(files[l], makeFile(l, s, e))
  2579  			}
  2580  			slices.SortFunc(files[l], func(a, b *fileMetadata) int {
  2581  				return opts.Comparer.Compare(a.Smallest.UserKey, b.Smallest.UserKey)
  2582  			})
  2583  		}
  2584  		v := newVersion(opts, files)
  2585  		t.Log(v.DebugString(opts.Comparer.FormatKey))
  2586  		for i := 0; i < 1000; i++ {
  2587  			l := rng.Intn(numLevels)
  2588  			s := rng.Intn(endKeyspace)
  2589  			maxWidth := rng.Intn(endKeyspace-s) + 1
  2590  			e := rng.Intn(maxWidth) + s
  2591  			sKey, eKey := makeUserKey(s), makeUserKey(e)
  2592  			keyRanges := calculateInuseKeyRanges(v, opts.Comparer.Compare, l, numLevels-1, sKey, eKey)
  2593  
  2594  			for level := l; level < numLevels; level++ {
  2595  				for _, f := range files[level] {
  2596  					if !overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) {
  2597  						// This file doesn't overlap the queried range. Skip it.
  2598  						continue
  2599  					}
  2600  					// This file does overlap the queried range. The key range
  2601  					// [MAX(f.Smallest, sKey), MIN(f.Largest, eKey)] must be fully
  2602  					// contained by a key range in keyRanges.
  2603  					checkStart, checkEnd := f.Smallest.UserKey, f.Largest.UserKey
  2604  					if opts.Comparer.Compare(checkStart, sKey) < 0 {
  2605  						checkStart = sKey
  2606  					}
  2607  					if opts.Comparer.Compare(checkEnd, eKey) > 0 {
  2608  						checkEnd = eKey
  2609  					}
  2610  					var contained bool
  2611  					for _, kr := range keyRanges {
  2612  						contained = contained ||
  2613  							(opts.Comparer.Compare(checkStart, kr.Start) >= 0 &&
  2614  								opts.Comparer.Compare(checkEnd, kr.End) <= 0)
  2615  					}
  2616  					if !contained {
  2617  						t.Errorf("Seed %d, iter %d: File %s overlaps %q-%q, but is not fully contained in any of the key ranges.",
  2618  							seed, iter, f, sKey, eKey)
  2619  					}
  2620  				}
  2621  			}
  2622  		}
  2623  	}
  2624  }
  2625  
  2626  func TestCompactionAllowZeroSeqNum(t *testing.T) {
  2627  	var d *DB
  2628  	defer func() {
  2629  		if d != nil {
  2630  			require.NoError(t, closeAllSnapshots(d))
  2631  			require.NoError(t, d.Close())
  2632  		}
  2633  	}()
  2634  
  2635  	metaRE := regexp.MustCompile(`^L([0-9]+):([^-]+)-(.+)$`)
  2636  	var fileNum base.FileNum
  2637  	parseMeta := func(s string) (level int, meta *fileMetadata) {
  2638  		match := metaRE.FindStringSubmatch(s)
  2639  		if match == nil {
  2640  			t.Fatalf("malformed table spec: %s", s)
  2641  		}
  2642  		level, err := strconv.Atoi(match[1])
  2643  		if err != nil {
  2644  			t.Fatalf("malformed table spec: %s: %s", s, err)
  2645  		}
  2646  		fileNum++
  2647  		meta = (&fileMetadata{
  2648  			FileNum: fileNum,
  2649  		}).ExtendPointKeyBounds(
  2650  			d.cmp,
  2651  			InternalKey{UserKey: []byte(match[2])},
  2652  			InternalKey{UserKey: []byte(match[3])},
  2653  		)
  2654  		meta.InitPhysicalBacking()
  2655  		return level, meta
  2656  	}
  2657  
  2658  	datadriven.RunTest(t, "testdata/compaction_allow_zero_seqnum",
  2659  		func(t *testing.T, td *datadriven.TestData) string {
  2660  			switch td.Cmd {
  2661  			case "define":
  2662  				if d != nil {
  2663  					require.NoError(t, closeAllSnapshots(d))
  2664  					if err := d.Close(); err != nil {
  2665  						return err.Error()
  2666  					}
  2667  				}
  2668  
  2669  				var err error
  2670  				if d, err = runDBDefineCmd(td, nil /* options */); err != nil {
  2671  					return err.Error()
  2672  				}
  2673  
  2674  				d.mu.Lock()
  2675  				s := d.mu.versions.currentVersion().String()
  2676  				d.mu.Unlock()
  2677  				return s
  2678  
  2679  			case "allow-zero-seqnum":
  2680  				d.mu.Lock()
  2681  				c := &compaction{
  2682  					cmp:      d.cmp,
  2683  					comparer: d.opts.Comparer,
  2684  					version:  d.mu.versions.currentVersion(),
  2685  					inputs:   []compactionLevel{{}, {}},
  2686  				}
  2687  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2688  				d.mu.Unlock()
  2689  
  2690  				var buf bytes.Buffer
  2691  				for _, line := range strings.Split(td.Input, "\n") {
  2692  					parts := strings.Fields(line)
  2693  					if len(parts) == 0 {
  2694  						continue
  2695  					}
  2696  					c.flushing = nil
  2697  					c.startLevel.level = -1
  2698  
  2699  					var startFiles, outputFiles []*fileMetadata
  2700  
  2701  					switch {
  2702  					case len(parts) == 1 && parts[0] == "flush":
  2703  						c.outputLevel.level = 0
  2704  						d.mu.Lock()
  2705  						c.flushing = d.mu.mem.queue
  2706  						d.mu.Unlock()
  2707  
  2708  					default:
  2709  						for _, p := range parts {
  2710  							level, meta := parseMeta(p)
  2711  							if c.startLevel.level == -1 {
  2712  								c.startLevel.level = level
  2713  							}
  2714  
  2715  							switch level {
  2716  							case c.startLevel.level:
  2717  								startFiles = append(startFiles, meta)
  2718  							case c.startLevel.level + 1:
  2719  								outputFiles = append(outputFiles, meta)
  2720  							default:
  2721  								return fmt.Sprintf("invalid level %d: expected %d or %d",
  2722  									level, c.startLevel.level, c.startLevel.level+1)
  2723  							}
  2724  						}
  2725  						c.outputLevel.level = c.startLevel.level + 1
  2726  						c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles)
  2727  						c.outputLevel.files = manifest.NewLevelSliceKeySorted(c.cmp, outputFiles)
  2728  					}
  2729  
  2730  					c.smallest, c.largest = manifest.KeyRange(c.cmp,
  2731  						c.startLevel.files.Iter(),
  2732  						c.outputLevel.files.Iter())
  2733  
  2734  					c.inuseKeyRanges = nil
  2735  					c.setupInuseKeyRanges()
  2736  					fmt.Fprintf(&buf, "%t\n", c.allowZeroSeqNum())
  2737  				}
  2738  				return buf.String()
  2739  
  2740  			default:
  2741  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2742  			}
  2743  		})
  2744  }
  2745  
  2746  func TestCompactionErrorOnUserKeyOverlap(t *testing.T) {
  2747  	cmp := DefaultComparer.Compare
  2748  	parseMeta := func(s string) *fileMetadata {
  2749  		parts := strings.Split(s, "-")
  2750  		if len(parts) != 2 {
  2751  			t.Fatalf("malformed table spec: %s", s)
  2752  		}
  2753  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  2754  			cmp,
  2755  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2756  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2757  		)
  2758  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2759  		m.LargestSeqNum = m.Largest.SeqNum()
  2760  		m.InitPhysicalBacking()
  2761  		return m
  2762  	}
  2763  
  2764  	datadriven.RunTest(t, "testdata/compaction_error_on_user_key_overlap",
  2765  		func(t *testing.T, d *datadriven.TestData) string {
  2766  			switch d.Cmd {
  2767  			case "error-on-user-key-overlap":
  2768  				c := &compaction{
  2769  					cmp:       DefaultComparer.Compare,
  2770  					comparer:  DefaultComparer,
  2771  					formatKey: DefaultComparer.FormatKey,
  2772  				}
  2773  				var files []manifest.NewFileEntry
  2774  				fileNum := FileNum(1)
  2775  
  2776  				for _, data := range strings.Split(d.Input, "\n") {
  2777  					meta := parseMeta(data)
  2778  					meta.FileNum = fileNum
  2779  					fileNum++
  2780  					files = append(files, manifest.NewFileEntry{Level: 1, Meta: meta})
  2781  				}
  2782  
  2783  				result := "OK"
  2784  				ve := &versionEdit{
  2785  					NewFiles: files,
  2786  				}
  2787  				if err := c.errorOnUserKeyOverlap(ve); err != nil {
  2788  					result = fmt.Sprint(err)
  2789  				}
  2790  				return result
  2791  
  2792  			default:
  2793  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  2794  			}
  2795  		})
  2796  }
  2797  
  2798  // TestCompactionErrorCleanup tests an error encountered during a compaction
  2799  // after some output tables have been created. It ensures that the pending
  2800  // output tables are removed from the filesystem.
  2801  func TestCompactionErrorCleanup(t *testing.T) {
  2802  	// protected by d.mu
  2803  	var (
  2804  		initialSetupDone bool
  2805  		tablesCreated    []FileNum
  2806  	)
  2807  
  2808  	mem := vfs.NewMem()
  2809  	ii := errorfs.OnIndex(math.MaxInt32) // start disabled
  2810  	opts := (&Options{
  2811  		FS:     errorfs.Wrap(mem, errorfs.ErrInjected.If(ii)),
  2812  		Levels: make([]LevelOptions, numLevels),
  2813  		EventListener: &EventListener{
  2814  			TableCreated: func(info TableCreateInfo) {
  2815  				t.Log(info)
  2816  
  2817  				// If the initial setup is over, record tables created and
  2818  				// inject an error immediately after the second table is
  2819  				// created.
  2820  				if initialSetupDone {
  2821  					tablesCreated = append(tablesCreated, info.FileNum)
  2822  					if len(tablesCreated) >= 2 {
  2823  						ii.Store(0)
  2824  					}
  2825  				}
  2826  			},
  2827  		},
  2828  	}).WithFSDefaults()
  2829  	for i := range opts.Levels {
  2830  		opts.Levels[i].TargetFileSize = 1
  2831  	}
  2832  	opts.testingRandomized(t)
  2833  	d, err := Open("", opts)
  2834  	require.NoError(t, err)
  2835  
  2836  	ingest := func(keys ...string) {
  2837  		t.Helper()
  2838  		f, err := mem.Create("ext")
  2839  		require.NoError(t, err)
  2840  
  2841  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  2842  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  2843  		})
  2844  		for _, k := range keys {
  2845  			require.NoError(t, w.Set([]byte(k), nil))
  2846  		}
  2847  		require.NoError(t, w.Close())
  2848  		require.NoError(t, d.Ingest([]string{"ext"}))
  2849  	}
  2850  	ingest("a", "c")
  2851  	ingest("b")
  2852  
  2853  	// Trigger a manual compaction, which will encounter an injected error
  2854  	// after the second table is created.
  2855  	d.mu.Lock()
  2856  	initialSetupDone = true
  2857  	d.mu.Unlock()
  2858  	err = d.Compact([]byte("a"), []byte("d"), false)
  2859  	require.Error(t, err, "injected error")
  2860  
  2861  	d.mu.Lock()
  2862  	if len(tablesCreated) < 2 {
  2863  		t.Fatalf("expected 2 output tables created by compaction: found %d", len(tablesCreated))
  2864  	}
  2865  	d.mu.Unlock()
  2866  
  2867  	require.NoError(t, d.Close())
  2868  	for _, fileNum := range tablesCreated {
  2869  		filename := fmt.Sprintf("%s.sst", fileNum)
  2870  		if _, err = mem.Stat(filename); err == nil || !oserror.IsNotExist(err) {
  2871  			t.Errorf("expected %q to not exist: %s", filename, err)
  2872  		}
  2873  	}
  2874  }
  2875  
  2876  func TestCompactionCheckOrdering(t *testing.T) {
  2877  	cmp := DefaultComparer.Compare
  2878  	parseMeta := func(s string) *fileMetadata {
  2879  		parts := strings.Split(s, "-")
  2880  		if len(parts) != 2 {
  2881  			t.Fatalf("malformed table spec: %s", s)
  2882  		}
  2883  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  2884  			cmp,
  2885  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2886  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2887  		)
  2888  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2889  		m.LargestSeqNum = m.Largest.SeqNum()
  2890  		m.InitPhysicalBacking()
  2891  		return m
  2892  	}
  2893  
  2894  	datadriven.RunTest(t, "testdata/compaction_check_ordering",
  2895  		func(t *testing.T, d *datadriven.TestData) string {
  2896  			switch d.Cmd {
  2897  			case "check-ordering":
  2898  				c := &compaction{
  2899  					cmp:       DefaultComparer.Compare,
  2900  					comparer:  DefaultComparer,
  2901  					formatKey: DefaultComparer.FormatKey,
  2902  					logger:    panicLogger{},
  2903  					inputs:    []compactionLevel{{level: -1}, {level: -1}},
  2904  				}
  2905  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2906  				var startFiles, outputFiles []*fileMetadata
  2907  				var sublevels []manifest.LevelSlice
  2908  				var files *[]*fileMetadata
  2909  				var sublevel []*fileMetadata
  2910  				var sublevelNum int
  2911  				var parsingSublevel bool
  2912  				fileNum := FileNum(1)
  2913  
  2914  				switchSublevel := func() {
  2915  					if sublevel != nil {
  2916  						sublevels = append(
  2917  							sublevels, manifest.NewLevelSliceSpecificOrder(sublevel),
  2918  						)
  2919  						sublevel = nil
  2920  					}
  2921  					parsingSublevel = false
  2922  				}
  2923  
  2924  				for _, data := range strings.Split(d.Input, "\n") {
  2925  					if data[0] == 'L' && len(data) == 4 {
  2926  						// Format L0.{sublevel}.
  2927  						switchSublevel()
  2928  						level, err := strconv.Atoi(data[1:2])
  2929  						if err != nil {
  2930  							return err.Error()
  2931  						}
  2932  						sublevelNum, err = strconv.Atoi(data[3:])
  2933  						if err != nil {
  2934  							return err.Error()
  2935  						}
  2936  						if c.startLevel.level == -1 {
  2937  							c.startLevel.level = level
  2938  							files = &startFiles
  2939  						}
  2940  						parsingSublevel = true
  2941  					} else if data[0] == 'L' {
  2942  						switchSublevel()
  2943  						level, err := strconv.Atoi(data[1:])
  2944  						if err != nil {
  2945  							return err.Error()
  2946  						}
  2947  						if c.startLevel.level == -1 {
  2948  							c.startLevel.level = level
  2949  							files = &startFiles
  2950  						} else if c.outputLevel.level == -1 {
  2951  							if c.startLevel.level >= level {
  2952  								return fmt.Sprintf("startLevel=%d >= outputLevel=%d\n", c.startLevel.level, level)
  2953  							}
  2954  							c.outputLevel.level = level
  2955  							files = &outputFiles
  2956  						} else {
  2957  							return "outputLevel already set\n"
  2958  						}
  2959  					} else {
  2960  						meta := parseMeta(data)
  2961  						meta.FileNum = fileNum
  2962  						fileNum++
  2963  						*files = append(*files, meta)
  2964  						if parsingSublevel {
  2965  							meta.SubLevel = sublevelNum
  2966  							sublevel = append(sublevel, meta)
  2967  						}
  2968  					}
  2969  				}
  2970  
  2971  				switchSublevel()
  2972  				c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles)
  2973  				c.outputLevel.files = manifest.NewLevelSliceSpecificOrder(outputFiles)
  2974  				if c.outputLevel.level == -1 {
  2975  					c.outputLevel.level = 0
  2976  				}
  2977  				if c.startLevel.level == 0 {
  2978  					// We don't change the input files for the compaction beyond this point.
  2979  					c.startLevel.l0SublevelInfo = generateSublevelInfo(c.cmp, c.startLevel.files)
  2980  				}
  2981  
  2982  				newIters := func(
  2983  					_ context.Context, _ *manifest.FileMetadata, _ *IterOptions, _ internalIterOpts,
  2984  				) (internalIterator, keyspan.FragmentIterator, error) {
  2985  					return &errorIter{}, nil, nil
  2986  				}
  2987  				result := "OK"
  2988  				_, err := c.newInputIter(newIters, nil, nil)
  2989  				if err != nil {
  2990  					result = fmt.Sprint(err)
  2991  				}
  2992  				return result
  2993  
  2994  			default:
  2995  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  2996  			}
  2997  		})
  2998  }
  2999  
  3000  type mockSplitter struct {
  3001  	shouldSplitVal maybeSplit
  3002  }
  3003  
  3004  func (m *mockSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit {
  3005  	return m.shouldSplitVal
  3006  }
  3007  
  3008  func (m *mockSplitter) onNewOutput(key []byte) []byte {
  3009  	return nil
  3010  }
  3011  
  3012  func TestCompactionOutputSplitters(t *testing.T) {
  3013  	var main, child0, child1 compactionOutputSplitter
  3014  	var prevUserKey []byte
  3015  	pickSplitter := func(input string) *compactionOutputSplitter {
  3016  		switch input {
  3017  		case "main":
  3018  			return &main
  3019  		case "child0":
  3020  			return &child0
  3021  		case "child1":
  3022  			return &child1
  3023  		default:
  3024  			t.Fatalf("invalid splitter slot: %s", input)
  3025  			return nil
  3026  		}
  3027  	}
  3028  
  3029  	datadriven.RunTest(t, "testdata/compaction_output_splitters",
  3030  		func(t *testing.T, d *datadriven.TestData) string {
  3031  			switch d.Cmd {
  3032  			case "reset":
  3033  				main = nil
  3034  				child0 = nil
  3035  				child1 = nil
  3036  			case "init":
  3037  				if len(d.CmdArgs) < 2 {
  3038  					return "expected at least 2 args"
  3039  				}
  3040  				splitterToInit := pickSplitter(d.CmdArgs[0].Key)
  3041  				switch d.CmdArgs[1].Key {
  3042  				case "array":
  3043  					*splitterToInit = &splitterGroup{
  3044  						cmp:       base.DefaultComparer.Compare,
  3045  						splitters: []compactionOutputSplitter{child0, child1},
  3046  					}
  3047  				case "mock":
  3048  					*splitterToInit = &mockSplitter{}
  3049  				case "userkey":
  3050  					*splitterToInit = &userKeyChangeSplitter{
  3051  						cmp: base.DefaultComparer.Compare,
  3052  						unsafePrevUserKey: func() []byte {
  3053  							return prevUserKey
  3054  						},
  3055  						splitter: child0,
  3056  					}
  3057  				}
  3058  				(*splitterToInit).onNewOutput(nil)
  3059  			case "set-should-split":
  3060  				if len(d.CmdArgs) < 2 {
  3061  					return "expected at least 2 args"
  3062  				}
  3063  				splitterToSet := (*pickSplitter(d.CmdArgs[0].Key)).(*mockSplitter)
  3064  				var val maybeSplit
  3065  				switch d.CmdArgs[1].Key {
  3066  				case "split-now":
  3067  					val = splitNow
  3068  				case "no-split":
  3069  					val = noSplit
  3070  				default:
  3071  					t.Fatalf("unexpected value for should-split: %s", d.CmdArgs[1].Key)
  3072  				}
  3073  				splitterToSet.shouldSplitVal = val
  3074  			case "should-split-before":
  3075  				if len(d.CmdArgs) < 1 {
  3076  					return "expected at least 1 arg"
  3077  				}
  3078  				key := base.ParseInternalKey(d.CmdArgs[0].Key)
  3079  				shouldSplit := main.shouldSplitBefore(&key, nil)
  3080  				if shouldSplit == splitNow {
  3081  					main.onNewOutput(key.UserKey)
  3082  					prevUserKey = nil
  3083  				} else {
  3084  					prevUserKey = key.UserKey
  3085  				}
  3086  				return shouldSplit.String()
  3087  			default:
  3088  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  3089  			}
  3090  			return "ok"
  3091  		})
  3092  }
  3093  
  3094  func TestCompactFlushQueuedMemTableAndFlushMetrics(t *testing.T) {
  3095  	t.Run("", func(t *testing.T) {
  3096  		// Verify that manual compaction forces a flush of a queued memtable.
  3097  
  3098  		mem := vfs.NewMem()
  3099  		d, err := Open("", testingRandomized(t, &Options{
  3100  			FS: mem,
  3101  		}).WithFSDefaults())
  3102  		require.NoError(t, err)
  3103  
  3104  		// Add the key "a" to the memtable, then fill up the memtable with the key
  3105  		// prefix "b". The compaction will only overlap with the queued memtable,
  3106  		// not the mutable memtable.
  3107  		// NB: The initial memtable size is 256KB, which is filled up with random
  3108  		// values which typically don't compress well. The test also appends the
  3109  		// random value to the "b" key to limit overwriting of the same key, which
  3110  		// would get collapsed at flush time since there are no open snapshots.
  3111  		value := make([]byte, 50)
  3112  		_, err = crand.Read(value)
  3113  		require.NoError(t, err)
  3114  		require.NoError(t, d.Set([]byte("a"), value, nil))
  3115  		for {
  3116  			_, err = crand.Read(value)
  3117  			require.NoError(t, err)
  3118  			require.NoError(t, d.Set(append([]byte("b"), value...), value, nil))
  3119  			d.mu.Lock()
  3120  			done := len(d.mu.mem.queue) == 2
  3121  			d.mu.Unlock()
  3122  			if done {
  3123  				break
  3124  			}
  3125  		}
  3126  
  3127  		require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
  3128  		d.mu.Lock()
  3129  		require.Equal(t, 1, len(d.mu.mem.queue))
  3130  		d.mu.Unlock()
  3131  		// Flush metrics are updated after and non-atomically with the memtable
  3132  		// being removed from the queue.
  3133  		for begin := time.Now(); ; {
  3134  			metrics := d.Metrics()
  3135  			require.NotNil(t, metrics)
  3136  			if metrics.Flush.WriteThroughput.Bytes >= 50*1024 {
  3137  				// The writes (during which the flush is idle) and the flush work
  3138  				// should not be so fast as to be unrealistic. If these turn out to be
  3139  				// flaky we could instead inject a clock.
  3140  				//
  3141  				// Windows timer precision is bad (on the order of 1 millisecond) and
  3142  				// can cause the duration to be 0.
  3143  				if runtime.GOOS != "windows" {
  3144  					tinyInterval := 50 * time.Microsecond
  3145  					require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.WorkDuration)
  3146  					require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.IdleDuration)
  3147  				}
  3148  				break
  3149  			}
  3150  			if time.Since(begin) > 2*time.Second {
  3151  				t.Fatal("flush did not happen")
  3152  			}
  3153  			time.Sleep(time.Millisecond)
  3154  		}
  3155  		require.NoError(t, d.Close())
  3156  	})
  3157  }
  3158  
  3159  func TestCompactFlushQueuedLargeBatch(t *testing.T) {
  3160  	// Verify that compaction forces a flush of a queued large batch.
  3161  
  3162  	mem := vfs.NewMem()
  3163  	d, err := Open("", testingRandomized(t, &Options{
  3164  		FS: mem,
  3165  	}).WithFSDefaults())
  3166  	require.NoError(t, err)
  3167  
  3168  	// The default large batch threshold is slightly less than 1/2 of the
  3169  	// memtable size which makes triggering a problem with flushing queued large
  3170  	// batches irritating. Manually adjust the threshold to 1/8 of the memtable
  3171  	// size in order to more easily create a situation where a large batch is
  3172  	// queued but not automatically flushed.
  3173  	d.mu.Lock()
  3174  	d.largeBatchThreshold = d.opts.MemTableSize / 8
  3175  	require.Equal(t, 1, len(d.mu.mem.queue))
  3176  	d.mu.Unlock()
  3177  
  3178  	// Set a record with a large value. This will be transformed into a large
  3179  	// batch and placed in the flushable queue.
  3180  	require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil))
  3181  	d.mu.Lock()
  3182  	require.Greater(t, len(d.mu.mem.queue), 1)
  3183  	d.mu.Unlock()
  3184  
  3185  	require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
  3186  	d.mu.Lock()
  3187  	require.Equal(t, 1, len(d.mu.mem.queue))
  3188  	d.mu.Unlock()
  3189  
  3190  	require.NoError(t, d.Close())
  3191  }
  3192  
  3193  func TestFlushError(t *testing.T) {
  3194  	// Error the first five times we try to write a sstable.
  3195  	var errorOps atomic.Int32
  3196  	errorOps.Store(3)
  3197  	fs := errorfs.Wrap(vfs.NewMem(), errorfs.InjectorFunc(func(op errorfs.Op) error {
  3198  		if op.Kind == errorfs.OpCreate && filepath.Ext(op.Path) == ".sst" && errorOps.Add(-1) >= 0 {
  3199  			return errorfs.ErrInjected
  3200  		}
  3201  		return nil
  3202  	}))
  3203  	d, err := Open("", testingRandomized(t, &Options{
  3204  		FS: fs,
  3205  		EventListener: &EventListener{
  3206  			BackgroundError: func(err error) {
  3207  				t.Log(err)
  3208  			},
  3209  		},
  3210  	}).WithFSDefaults())
  3211  	require.NoError(t, err)
  3212  	require.NoError(t, d.Set([]byte("a"), []byte("foo"), NoSync))
  3213  	require.NoError(t, d.Flush())
  3214  	require.NoError(t, d.Close())
  3215  }
  3216  
  3217  func TestAdjustGrandparentOverlapBytesForFlush(t *testing.T) {
  3218  	// 500MB in Lbase
  3219  	var lbaseFiles []*manifest.FileMetadata
  3220  	const lbaseSize = 5 << 20
  3221  	for i := 0; i < 100; i++ {
  3222  		m := &manifest.FileMetadata{Size: lbaseSize, FileNum: FileNum(i)}
  3223  		m.InitPhysicalBacking()
  3224  		lbaseFiles =
  3225  			append(lbaseFiles, m)
  3226  	}
  3227  	const maxOutputFileSize = 2 << 20
  3228  	// 20MB max overlap, so flush split into 25 files.
  3229  	const maxOverlapBytes = 20 << 20
  3230  	ls := manifest.NewLevelSliceSpecificOrder(lbaseFiles)
  3231  	testCases := []struct {
  3232  		flushingBytes        uint64
  3233  		adjustedOverlapBytes uint64
  3234  	}{
  3235  		// Flushes large enough that 25 files is acceptable.
  3236  		{flushingBytes: 128 << 20, adjustedOverlapBytes: 20971520},
  3237  		{flushingBytes: 64 << 20, adjustedOverlapBytes: 20971520},
  3238  		// Small increase in adjustedOverlapBytes.
  3239  		{flushingBytes: 32 << 20, adjustedOverlapBytes: 32768000},
  3240  		// Large increase in adjusterOverlapBytes, to limit to 4 files.
  3241  		{flushingBytes: 1 << 20, adjustedOverlapBytes: 131072000},
  3242  	}
  3243  	for _, tc := range testCases {
  3244  		t.Run("", func(t *testing.T) {
  3245  			c := compaction{
  3246  				grandparents:      ls,
  3247  				maxOverlapBytes:   maxOverlapBytes,
  3248  				maxOutputFileSize: maxOutputFileSize,
  3249  			}
  3250  			adjustGrandparentOverlapBytesForFlush(&c, tc.flushingBytes)
  3251  			require.Equal(t, tc.adjustedOverlapBytes, c.maxOverlapBytes)
  3252  		})
  3253  	}
  3254  }
  3255  
  3256  func TestCompactionInvalidBounds(t *testing.T) {
  3257  	db, err := Open("", testingRandomized(t, &Options{
  3258  		FS: vfs.NewMem(),
  3259  	}).WithFSDefaults())
  3260  	require.NoError(t, err)
  3261  	defer db.Close()
  3262  	require.NoError(t, db.Compact([]byte("a"), []byte("b"), false))
  3263  	require.Error(t, db.Compact([]byte("a"), []byte("a"), false))
  3264  	require.Error(t, db.Compact([]byte("b"), []byte("a"), false))
  3265  }
  3266  
  3267  func Test_calculateInuseKeyRanges(t *testing.T) {
  3268  	opts := (*Options)(nil).EnsureDefaults()
  3269  	cmp := base.DefaultComparer.Compare
  3270  	newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata {
  3271  		m := (&fileMetadata{
  3272  			FileNum: fileNum,
  3273  			Size:    size,
  3274  		}).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest)
  3275  		m.InitPhysicalBacking()
  3276  		return m
  3277  	}
  3278  	tests := []struct {
  3279  		name     string
  3280  		v        *version
  3281  		level    int
  3282  		depth    int
  3283  		smallest []byte
  3284  		largest  []byte
  3285  		want     []manifest.UserKeyRange
  3286  	}{
  3287  		{
  3288  			name: "No files in next level",
  3289  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3290  				1: {
  3291  					newFileMeta(
  3292  						1,
  3293  						1,
  3294  						base.ParseInternalKey("a.SET.2"),
  3295  						base.ParseInternalKey("c.SET.2"),
  3296  					),
  3297  					newFileMeta(
  3298  						2,
  3299  						1,
  3300  						base.ParseInternalKey("d.SET.2"),
  3301  						base.ParseInternalKey("e.SET.2"),
  3302  					),
  3303  				},
  3304  			}),
  3305  			level:    1,
  3306  			depth:    2,
  3307  			smallest: []byte("a"),
  3308  			largest:  []byte("e"),
  3309  			want: []manifest.UserKeyRange{
  3310  				{
  3311  					Start: []byte("a"),
  3312  					End:   []byte("c"),
  3313  				},
  3314  				{
  3315  					Start: []byte("d"),
  3316  					End:   []byte("e"),
  3317  				},
  3318  			},
  3319  		},
  3320  		{
  3321  			name: "No overlapping key ranges",
  3322  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3323  				1: {
  3324  					newFileMeta(
  3325  						1,
  3326  						1,
  3327  						base.ParseInternalKey("a.SET.1"),
  3328  						base.ParseInternalKey("c.SET.1"),
  3329  					),
  3330  					newFileMeta(
  3331  						2,
  3332  						1,
  3333  						base.ParseInternalKey("l.SET.1"),
  3334  						base.ParseInternalKey("p.SET.1"),
  3335  					),
  3336  				},
  3337  				2: {
  3338  					newFileMeta(
  3339  						3,
  3340  						1,
  3341  						base.ParseInternalKey("d.SET.1"),
  3342  						base.ParseInternalKey("i.SET.1"),
  3343  					),
  3344  					newFileMeta(
  3345  						4,
  3346  						1,
  3347  						base.ParseInternalKey("s.SET.1"),
  3348  						base.ParseInternalKey("w.SET.1"),
  3349  					),
  3350  				},
  3351  			}),
  3352  			level:    1,
  3353  			depth:    2,
  3354  			smallest: []byte("a"),
  3355  			largest:  []byte("z"),
  3356  			want: []manifest.UserKeyRange{
  3357  				{
  3358  					Start: []byte("a"),
  3359  					End:   []byte("c"),
  3360  				},
  3361  				{
  3362  					Start: []byte("d"),
  3363  					End:   []byte("i"),
  3364  				},
  3365  				{
  3366  					Start: []byte("l"),
  3367  					End:   []byte("p"),
  3368  				},
  3369  				{
  3370  					Start: []byte("s"),
  3371  					End:   []byte("w"),
  3372  				},
  3373  			},
  3374  		},
  3375  		{
  3376  			name: "First few non-overlapping, followed by overlapping",
  3377  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3378  				1: {
  3379  					newFileMeta(
  3380  						1,
  3381  						1,
  3382  						base.ParseInternalKey("a.SET.1"),
  3383  						base.ParseInternalKey("c.SET.1"),
  3384  					),
  3385  					newFileMeta(
  3386  						2,
  3387  						1,
  3388  						base.ParseInternalKey("d.SET.1"),
  3389  						base.ParseInternalKey("e.SET.1"),
  3390  					),
  3391  					newFileMeta(
  3392  						3,
  3393  						1,
  3394  						base.ParseInternalKey("n.SET.1"),
  3395  						base.ParseInternalKey("o.SET.1"),
  3396  					),
  3397  					newFileMeta(
  3398  						4,
  3399  						1,
  3400  						base.ParseInternalKey("p.SET.1"),
  3401  						base.ParseInternalKey("q.SET.1"),
  3402  					),
  3403  				},
  3404  				2: {
  3405  					newFileMeta(
  3406  						5,
  3407  						1,
  3408  						base.ParseInternalKey("m.SET.1"),
  3409  						base.ParseInternalKey("q.SET.1"),
  3410  					),
  3411  					newFileMeta(
  3412  						6,
  3413  						1,
  3414  						base.ParseInternalKey("s.SET.1"),
  3415  						base.ParseInternalKey("w.SET.1"),
  3416  					),
  3417  				},
  3418  			}),
  3419  			level:    1,
  3420  			depth:    2,
  3421  			smallest: []byte("a"),
  3422  			largest:  []byte("z"),
  3423  			want: []manifest.UserKeyRange{
  3424  				{
  3425  					Start: []byte("a"),
  3426  					End:   []byte("c"),
  3427  				},
  3428  				{
  3429  					Start: []byte("d"),
  3430  					End:   []byte("e"),
  3431  				},
  3432  				{
  3433  					Start: []byte("m"),
  3434  					End:   []byte("q"),
  3435  				},
  3436  				{
  3437  					Start: []byte("s"),
  3438  					End:   []byte("w"),
  3439  				},
  3440  			},
  3441  		},
  3442  		{
  3443  			name: "All overlapping",
  3444  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3445  				1: {
  3446  					newFileMeta(
  3447  						1,
  3448  						1,
  3449  						base.ParseInternalKey("d.SET.1"),
  3450  						base.ParseInternalKey("e.SET.1"),
  3451  					),
  3452  					newFileMeta(
  3453  						2,
  3454  						1,
  3455  						base.ParseInternalKey("n.SET.1"),
  3456  						base.ParseInternalKey("o.SET.1"),
  3457  					),
  3458  					newFileMeta(
  3459  						3,
  3460  						1,
  3461  						base.ParseInternalKey("p.SET.1"),
  3462  						base.ParseInternalKey("q.SET.1"),
  3463  					),
  3464  				},
  3465  				2: {
  3466  					newFileMeta(
  3467  						4,
  3468  						1,
  3469  						base.ParseInternalKey("a.SET.1"),
  3470  						base.ParseInternalKey("c.SET.1"),
  3471  					),
  3472  					newFileMeta(
  3473  						5,
  3474  						1,
  3475  						base.ParseInternalKey("d.SET.1"),
  3476  						base.ParseInternalKey("w.SET.1"),
  3477  					),
  3478  				},
  3479  			}),
  3480  			level:    1,
  3481  			depth:    2,
  3482  			smallest: []byte("a"),
  3483  			largest:  []byte("z"),
  3484  			want: []manifest.UserKeyRange{
  3485  				{
  3486  					Start: []byte("a"),
  3487  					End:   []byte("c"),
  3488  				},
  3489  				{
  3490  					Start: []byte("d"),
  3491  					End:   []byte("w"),
  3492  				},
  3493  			},
  3494  		},
  3495  	}
  3496  	for _, tt := range tests {
  3497  		t.Run(tt.name, func(t *testing.T) {
  3498  			if got := calculateInuseKeyRanges(tt.v, cmp, tt.level, tt.depth, tt.smallest, tt.largest); !reflect.DeepEqual(got, tt.want) {
  3499  				t.Errorf("calculateInuseKeyRanges() = %v, want %v", got, tt.want)
  3500  			}
  3501  		})
  3502  	}
  3503  }
  3504  
  3505  func TestMarkedForCompaction(t *testing.T) {
  3506  	var mem vfs.FS = vfs.NewMem()
  3507  	var d *DB
  3508  	defer func() {
  3509  		if d != nil {
  3510  			require.NoError(t, d.Close())
  3511  		}
  3512  	}()
  3513  
  3514  	var buf bytes.Buffer
  3515  	opts := (&Options{
  3516  		FS:                          mem,
  3517  		DebugCheck:                  DebugCheckLevels,
  3518  		DisableAutomaticCompactions: true,
  3519  		FormatMajorVersion:          internalFormatNewest,
  3520  		EventListener: &EventListener{
  3521  			CompactionEnd: func(info CompactionInfo) {
  3522  				// Fix the job ID and durations for determinism.
  3523  				info.JobID = 100
  3524  				info.Duration = time.Second
  3525  				info.TotalDuration = 2 * time.Second
  3526  				fmt.Fprintln(&buf, info)
  3527  			},
  3528  		},
  3529  	}).WithFSDefaults()
  3530  
  3531  	reset := func() {
  3532  		if d != nil {
  3533  			require.NoError(t, d.Close())
  3534  		}
  3535  		mem = vfs.NewMem()
  3536  		require.NoError(t, mem.MkdirAll("ext", 0755))
  3537  
  3538  		var err error
  3539  		d, err = Open("", opts)
  3540  		require.NoError(t, err)
  3541  	}
  3542  	datadriven.RunTest(t, "testdata/marked_for_compaction", func(t *testing.T, td *datadriven.TestData) string {
  3543  		switch td.Cmd {
  3544  		case "reset":
  3545  			reset()
  3546  			return ""
  3547  
  3548  		case "define":
  3549  			if d != nil {
  3550  				if err := d.Close(); err != nil {
  3551  					return err.Error()
  3552  				}
  3553  			}
  3554  			var err error
  3555  			if d, err = runDBDefineCmd(td, opts); err != nil {
  3556  				return err.Error()
  3557  			}
  3558  			d.mu.Lock()
  3559  			defer d.mu.Unlock()
  3560  			t := time.Now()
  3561  			d.timeNow = func() time.Time {
  3562  				t = t.Add(time.Second)
  3563  				return t
  3564  			}
  3565  			s := d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  3566  			return s
  3567  
  3568  		case "mark-for-compaction":
  3569  			d.mu.Lock()
  3570  			defer d.mu.Unlock()
  3571  			vers := d.mu.versions.currentVersion()
  3572  			var fileNum uint64
  3573  			td.ScanArgs(t, "file", &fileNum)
  3574  			for l, lm := range vers.Levels {
  3575  				iter := lm.Iter()
  3576  				for f := iter.First(); f != nil; f = iter.Next() {
  3577  					if f.FileNum != base.FileNum(fileNum) {
  3578  						continue
  3579  					}
  3580  					f.MarkedForCompaction = true
  3581  					vers.Stats.MarkedForCompaction++
  3582  					vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{})
  3583  					return fmt.Sprintf("marked L%d.%s", l, f.FileNum)
  3584  				}
  3585  			}
  3586  			return "not-found"
  3587  
  3588  		case "maybe-compact":
  3589  			d.mu.Lock()
  3590  			defer d.mu.Unlock()
  3591  			d.opts.DisableAutomaticCompactions = false
  3592  			d.maybeScheduleCompaction()
  3593  			for d.mu.compact.compactingCount > 0 {
  3594  				d.mu.compact.cond.Wait()
  3595  			}
  3596  
  3597  			fmt.Fprintln(&buf, d.mu.versions.currentVersion().DebugString(base.DefaultFormatter))
  3598  			s := strings.TrimSpace(buf.String())
  3599  			buf.Reset()
  3600  			opts.DisableAutomaticCompactions = true
  3601  			return s
  3602  
  3603  		default:
  3604  			return fmt.Sprintf("unknown command: %s", td.Cmd)
  3605  		}
  3606  	})
  3607  }
  3608  
  3609  // createManifestErrorInjector injects errors (when enabled) into vfs.FS calls
  3610  // to create MANIFEST files.
  3611  type createManifestErrorInjector struct {
  3612  	enabled atomic.Bool
  3613  }
  3614  
  3615  // TODO(jackson): Replace the createManifestErrorInjector with the composition
  3616  // of primitives defined in errorfs. This may require additional primitives.
  3617  
  3618  func (i *createManifestErrorInjector) String() string { return "MANIFEST-Creates" }
  3619  
  3620  // enable enables error injection for the vfs.FS.
  3621  func (i *createManifestErrorInjector) enable() {
  3622  	i.enabled.Store(true)
  3623  }
  3624  
  3625  // MaybeError implements errorfs.Injector.
  3626  func (i *createManifestErrorInjector) MaybeError(op errorfs.Op) error {
  3627  	if !i.enabled.Load() {
  3628  		return nil
  3629  	}
  3630  	// This necessitates having a MaxManifestSize of 1, to reliably induce
  3631  	// logAndApply errors.
  3632  	if strings.Contains(op.Path, "MANIFEST") && op.Kind == errorfs.OpCreate {
  3633  		return errorfs.ErrInjected
  3634  	}
  3635  	return nil
  3636  }
  3637  
  3638  var _ errorfs.Injector = &createManifestErrorInjector{}
  3639  
  3640  // TestCompaction_LogAndApplyFails exercises a flush or ingest encountering an
  3641  // unrecoverable error during logAndApply.
  3642  //
  3643  // Regression test for #1669.
  3644  func TestCompaction_LogAndApplyFails(t *testing.T) {
  3645  	// flushKeys writes the given keys to the DB, flushing the resulting memtable.
  3646  	var key = []byte("foo")
  3647  	flushErrC := make(chan error)
  3648  	flushKeys := func(db *DB) error {
  3649  		b := db.NewBatch()
  3650  		err := b.Set(key, nil, nil)
  3651  		require.NoError(t, err)
  3652  		err = b.Commit(nil)
  3653  		require.NoError(t, err)
  3654  		// An error from a failing flush is returned asynchronously.
  3655  		go func() { _ = db.Flush() }()
  3656  		return <-flushErrC
  3657  	}
  3658  
  3659  	// ingestKeys adds the given keys to the DB via an ingestion.
  3660  	ingestKeys := func(db *DB) error {
  3661  		// Create an SST for ingestion.
  3662  		const fName = "ext"
  3663  		f, err := db.opts.FS.Create(fName)
  3664  		require.NoError(t, err)
  3665  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{})
  3666  		require.NoError(t, w.Set(key, nil))
  3667  		require.NoError(t, w.Close())
  3668  		// Ingest the SST.
  3669  		return db.Ingest([]string{fName})
  3670  	}
  3671  
  3672  	testCases := []struct {
  3673  		name              string
  3674  		addFn             func(db *DB) error
  3675  		backgroundErrorFn func(*DB, error)
  3676  	}{
  3677  		{
  3678  			name:  "flush",
  3679  			addFn: flushKeys,
  3680  			backgroundErrorFn: func(db *DB, err error) {
  3681  				require.True(t, errors.Is(err, errorfs.ErrInjected))
  3682  				flushErrC <- err
  3683  				// A flush will attempt to retry in the background. For the purposes of
  3684  				// testing this particular scenario, where we would have crashed anyway,
  3685  				// drop the memtable on the floor to short circuit the retry loop.
  3686  				// NB: we hold db.mu here.
  3687  				var cur *flushableEntry
  3688  				cur, db.mu.mem.queue = db.mu.mem.queue[0], db.mu.mem.queue[1:]
  3689  				cur.readerUnrefLocked(true)
  3690  			},
  3691  		},
  3692  		{
  3693  			name:  "ingest",
  3694  			addFn: ingestKeys,
  3695  		},
  3696  	}
  3697  
  3698  	runTest := func(t *testing.T, addFn func(db *DB) error, bgFn func(*DB, error)) {
  3699  		var db *DB
  3700  		inj := &createManifestErrorInjector{}
  3701  		logger := &fatalCapturingLogger{t: t}
  3702  		opts := (&Options{
  3703  			FS: errorfs.Wrap(vfs.NewMem(), inj),
  3704  			// Rotate the manifest after each write. This is required to trigger a
  3705  			// file creation, into which errors can be injected.
  3706  			MaxManifestFileSize: 1,
  3707  			Logger:              logger,
  3708  			EventListener: &EventListener{
  3709  				BackgroundError: func(err error) {
  3710  					if bgFn != nil {
  3711  						bgFn(db, err)
  3712  					}
  3713  				},
  3714  			},
  3715  			DisableAutomaticCompactions: true,
  3716  		}).WithFSDefaults()
  3717  
  3718  		db, err := Open("", opts)
  3719  		require.NoError(t, err)
  3720  		defer func() { _ = db.Close() }()
  3721  
  3722  		inj.enable()
  3723  		err = addFn(db)
  3724  		require.True(t, errors.Is(err, errorfs.ErrInjected))
  3725  
  3726  		// Under normal circumstances, such an error in logAndApply would panic and
  3727  		// cause the DB to terminate here. Assert that we captured the fatal error.
  3728  		require.True(t, errors.Is(logger.err, errorfs.ErrInjected))
  3729  	}
  3730  	for _, tc := range testCases {
  3731  		t.Run(tc.name, func(t *testing.T) {
  3732  			runTest(t, tc.addFn, tc.backgroundErrorFn)
  3733  		})
  3734  	}
  3735  }
  3736  
  3737  // TestSharedObjectDeletePacing tests that we don't throttle shared object
  3738  // deletes (see the TargetBytesDeletionRate option).
  3739  func TestSharedObjectDeletePacing(t *testing.T) {
  3740  	var opts Options
  3741  	opts.FS = vfs.NewMem()
  3742  	opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
  3743  		"": remote.NewInMem(),
  3744  	})
  3745  	opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
  3746  	opts.TargetByteDeletionRate = 1
  3747  
  3748  	d, err := Open("", &opts)
  3749  	require.NoError(t, err)
  3750  	require.NoError(t, d.SetCreatorID(1))
  3751  
  3752  	randVal := func() []byte {
  3753  		res := make([]byte, 1024)
  3754  		_, err := crand.Read(res)
  3755  		require.NoError(t, err)
  3756  		return res
  3757  	}
  3758  
  3759  	// We must set up things so that we will have more live bytes than obsolete
  3760  	// bytes, otherwise delete pacing will be disabled anyway.
  3761  	key := func(i int) string {
  3762  		return fmt.Sprintf("k%02d", i)
  3763  	}
  3764  	const numKeys = 20
  3765  	for i := 1; i <= numKeys; i++ {
  3766  		require.NoError(t, d.Set([]byte(key(i)), randVal(), nil))
  3767  		require.NoError(t, d.Compact([]byte(key(i)), []byte(key(i)+"1"), false))
  3768  	}
  3769  
  3770  	done := make(chan struct{})
  3771  	go func() {
  3772  		err = d.DeleteRange([]byte(key(5)), []byte(key(9)), nil)
  3773  		if err == nil {
  3774  			err = d.Compact([]byte(key(5)), []byte(key(9)), false)
  3775  		}
  3776  		// Wait for objects to be deleted.
  3777  		for {
  3778  			time.Sleep(10 * time.Millisecond)
  3779  			if len(d.objProvider.List()) < numKeys-2 {
  3780  				break
  3781  			}
  3782  		}
  3783  		close(done)
  3784  	}()
  3785  
  3786  	select {
  3787  	case <-time.After(60 * time.Second):
  3788  		// Don't close the DB in this case (the goroutine above might panic).
  3789  		t.Fatalf("compaction timed out, possibly due to incorrect deletion pacing")
  3790  	case <-done:
  3791  	}
  3792  	require.NoError(t, err)
  3793  	d.Close()
  3794  }
  3795  
  3796  type WriteErrorInjector struct {
  3797  	enabled atomic.Bool
  3798  }
  3799  
  3800  // TODO(jackson): Replace WriteErrorInjector with use of primitives in errorfs,
  3801  // adding new primitives as necessary.
  3802  
  3803  func (i *WriteErrorInjector) String() string { return "FileWrites(ErrInjected)" }
  3804  
  3805  // enable enables error injection for the vfs.FS.
  3806  func (i *WriteErrorInjector) enable() {
  3807  	i.enabled.Store(true)
  3808  }
  3809  
  3810  // disable disabled error injection for the vfs.FS.
  3811  func (i *WriteErrorInjector) disable() {
  3812  	i.enabled.Store(false)
  3813  }
  3814  
  3815  // MaybeError implements errorfs.Injector.
  3816  func (i *WriteErrorInjector) MaybeError(op errorfs.Op) error {
  3817  	if !i.enabled.Load() {
  3818  		return nil
  3819  	}
  3820  	// Fail any future write.
  3821  	if op.Kind == errorfs.OpFileWrite {
  3822  		return errorfs.ErrInjected
  3823  	}
  3824  	return nil
  3825  }
  3826  
  3827  var _ errorfs.Injector = &WriteErrorInjector{}
  3828  
  3829  // Cumulative compaction stats shouldn't be updated on compaction error.
  3830  func TestCompactionErrorStats(t *testing.T) {
  3831  	// protected by d.mu
  3832  	var (
  3833  		useInjector   bool
  3834  		tablesCreated []FileNum
  3835  	)
  3836  
  3837  	mem := vfs.NewMem()
  3838  	injector := &WriteErrorInjector{}
  3839  	opts := (&Options{
  3840  		FS:     errorfs.Wrap(mem, injector),
  3841  		Levels: make([]LevelOptions, numLevels),
  3842  		EventListener: &EventListener{
  3843  			TableCreated: func(info TableCreateInfo) {
  3844  				t.Log(info)
  3845  
  3846  				if useInjector {
  3847  					// We'll write 3 tables during compaction, and we only need
  3848  					// the writes to error on the third file write, so only enable
  3849  					// the injector after the first two files have been written to.
  3850  					tablesCreated = append(tablesCreated, info.FileNum)
  3851  					if len(tablesCreated) >= 2 {
  3852  						injector.enable()
  3853  					}
  3854  				}
  3855  			},
  3856  		},
  3857  	}).WithFSDefaults()
  3858  	for i := range opts.Levels {
  3859  		opts.Levels[i].TargetFileSize = 1
  3860  	}
  3861  	opts.testingRandomized(t)
  3862  	d, err := Open("", opts)
  3863  	require.NoError(t, err)
  3864  
  3865  	ingest := func(keys ...string) {
  3866  		t.Helper()
  3867  		f, err := mem.Create("ext")
  3868  		require.NoError(t, err)
  3869  
  3870  		w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{
  3871  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  3872  		})
  3873  		for _, k := range keys {
  3874  			require.NoError(t, w.Set([]byte(k), nil))
  3875  		}
  3876  		require.NoError(t, w.Close())
  3877  		require.NoError(t, d.Ingest([]string{"ext"}))
  3878  	}
  3879  	ingest("a", "c")
  3880  	// Snapshot will preserve the older "a" key during compaction.
  3881  	snap := d.NewSnapshot()
  3882  	ingest("a", "b")
  3883  
  3884  	// Trigger a manual compaction, which will encounter an injected error
  3885  	// after the second table is created.
  3886  	d.mu.Lock()
  3887  	useInjector = true
  3888  	d.mu.Unlock()
  3889  
  3890  	err = d.Compact([]byte("a"), []byte("d"), false)
  3891  	require.Error(t, err, "injected error")
  3892  
  3893  	// Due to the error, stats shouldn't have been updated.
  3894  	d.mu.Lock()
  3895  	require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedCount))
  3896  	require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedSize))
  3897  	useInjector = false
  3898  	d.mu.Unlock()
  3899  
  3900  	injector.disable()
  3901  
  3902  	// The following compaction won't error, but snapshot is open, so snapshot
  3903  	// pinned stats should update.
  3904  	require.NoError(t, d.Compact([]byte("a"), []byte("d"), false))
  3905  	require.NoError(t, snap.Close())
  3906  
  3907  	d.mu.Lock()
  3908  	require.Equal(t, 1, int(d.mu.snapshots.cumulativePinnedCount))
  3909  	require.Equal(t, 9, int(d.mu.snapshots.cumulativePinnedSize))
  3910  	d.mu.Unlock()
  3911  	require.NoError(t, d.Close())
  3912  }