github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/compaction_test.go (about)

     1  // Copyright 2013 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math"
    11  	"math/rand"
    12  	"path/filepath"
    13  	"reflect"
    14  	"regexp"
    15  	"runtime"
    16  	"sort"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  	"sync/atomic"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/cockroachdb/errors/oserror"
    26  	"github.com/stretchr/testify/require"
    27  	"github.com/zuoyebang/bitalostable/internal/base"
    28  	"github.com/zuoyebang/bitalostable/internal/datadriven"
    29  	"github.com/zuoyebang/bitalostable/internal/errorfs"
    30  	"github.com/zuoyebang/bitalostable/internal/keyspan"
    31  	"github.com/zuoyebang/bitalostable/internal/manifest"
    32  	"github.com/zuoyebang/bitalostable/sstable"
    33  	"github.com/zuoyebang/bitalostable/vfs"
    34  )
    35  
    36  func newVersion(opts *Options, files [numLevels][]*fileMetadata) *version {
    37  	return manifest.NewVersion(
    38  		opts.Comparer.Compare,
    39  		opts.Comparer.FormatKey,
    40  		opts.FlushSplitBytes,
    41  		files)
    42  }
    43  
    44  type compactionPickerForTesting struct {
    45  	score         float64
    46  	level         int
    47  	baseLevel     int
    48  	opts          *Options
    49  	vers          *manifest.Version
    50  	maxLevelBytes [7]int64
    51  }
    52  
    53  var _ compactionPicker = &compactionPickerForTesting{}
    54  
    55  func (p *compactionPickerForTesting) getScores([]compactionInfo) [numLevels]float64 {
    56  	return [numLevels]float64{}
    57  }
    58  
    59  func (p *compactionPickerForTesting) getBaseLevel() int {
    60  	return p.baseLevel
    61  }
    62  
    63  func (p *compactionPickerForTesting) getEstimatedMaxWAmp() float64 {
    64  	return 0
    65  }
    66  
    67  func (p *compactionPickerForTesting) estimatedCompactionDebt(l0ExtraSize uint64) uint64 {
    68  	return 0
    69  }
    70  
    71  func (p *compactionPickerForTesting) forceBaseLevel1() {}
    72  
    73  func (p *compactionPickerForTesting) pickAuto(env compactionEnv) (pc *pickedCompaction) {
    74  	if p.score < 1 {
    75  		return nil
    76  	}
    77  	outputLevel := p.level + 1
    78  	if p.level == 0 {
    79  		outputLevel = p.baseLevel
    80  	}
    81  	iter := p.vers.Levels[p.level].Iter()
    82  	iter.First()
    83  	cInfo := candidateLevelInfo{
    84  		level:       p.level,
    85  		outputLevel: outputLevel,
    86  		file:        iter.Take(),
    87  	}
    88  	if cInfo.level == 0 {
    89  		return pickL0(env, p.opts, p.vers, p.baseLevel, diskAvailBytesInf)
    90  	}
    91  	return pickAutoLPositive(env, p.opts, p.vers, cInfo, p.baseLevel, diskAvailBytesInf, p.maxLevelBytes)
    92  }
    93  
    94  func (p *compactionPickerForTesting) pickElisionOnlyCompaction(
    95  	env compactionEnv,
    96  ) (pc *pickedCompaction) {
    97  	return nil
    98  }
    99  
   100  func (p *compactionPickerForTesting) pickRewriteCompaction(
   101  	env compactionEnv,
   102  ) (pc *pickedCompaction) {
   103  	return nil
   104  }
   105  
   106  func (p *compactionPickerForTesting) pickManual(
   107  	env compactionEnv, manual *manualCompaction,
   108  ) (pc *pickedCompaction, retryLater bool) {
   109  	if p == nil {
   110  		return nil, false
   111  	}
   112  	return pickManualHelper(p.opts, manual, p.vers, p.baseLevel, diskAvailBytesInf, p.maxLevelBytes), false
   113  }
   114  
   115  func (p *compactionPickerForTesting) pickReadTriggeredCompaction(
   116  	env compactionEnv,
   117  ) (pc *pickedCompaction) {
   118  	return nil
   119  }
   120  
   121  func TestPickCompaction(t *testing.T) {
   122  	fileNums := func(files manifest.LevelSlice) string {
   123  		var ss []string
   124  		files.Each(func(meta *fileMetadata) {
   125  			ss = append(ss, strconv.Itoa(int(meta.FileNum)))
   126  		})
   127  		sort.Strings(ss)
   128  		return strings.Join(ss, ",")
   129  	}
   130  
   131  	opts := (*Options)(nil).EnsureDefaults()
   132  	newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata {
   133  		m := (&fileMetadata{
   134  			FileNum: fileNum,
   135  			Size:    size,
   136  		}).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest)
   137  		return m
   138  	}
   139  
   140  	testCases := []struct {
   141  		desc    string
   142  		version *version
   143  		picker  compactionPickerForTesting
   144  		want    string
   145  	}{
   146  		{
   147  			desc: "no compaction",
   148  			version: newVersion(opts, [numLevels][]*fileMetadata{
   149  				0: {
   150  					newFileMeta(
   151  						100,
   152  						1,
   153  						base.ParseInternalKey("i.SET.101"),
   154  						base.ParseInternalKey("j.SET.102"),
   155  					),
   156  				},
   157  			}),
   158  			want: "",
   159  		},
   160  
   161  		{
   162  			desc: "1 L0 file",
   163  			version: newVersion(opts, [numLevels][]*fileMetadata{
   164  				0: {
   165  					newFileMeta(
   166  						100,
   167  						1,
   168  						base.ParseInternalKey("i.SET.101"),
   169  						base.ParseInternalKey("j.SET.102"),
   170  					),
   171  				},
   172  			}),
   173  			picker: compactionPickerForTesting{
   174  				score:     99,
   175  				level:     0,
   176  				baseLevel: 1,
   177  			},
   178  			want: "100  ",
   179  		},
   180  
   181  		{
   182  			desc: "2 L0 files (0 overlaps)",
   183  			version: newVersion(opts, [numLevels][]*fileMetadata{
   184  				0: {
   185  					newFileMeta(
   186  						100,
   187  						1,
   188  						base.ParseInternalKey("i.SET.101"),
   189  						base.ParseInternalKey("j.SET.102"),
   190  					),
   191  					newFileMeta(
   192  						110,
   193  						1,
   194  						base.ParseInternalKey("k.SET.111"),
   195  						base.ParseInternalKey("l.SET.112"),
   196  					),
   197  				},
   198  			}),
   199  			picker: compactionPickerForTesting{
   200  				score:     99,
   201  				level:     0,
   202  				baseLevel: 1,
   203  			},
   204  			want: "100,110  ",
   205  		},
   206  
   207  		{
   208  			desc: "2 L0 files, with ikey overlap",
   209  			version: newVersion(opts, [numLevels][]*fileMetadata{
   210  				0: {
   211  					newFileMeta(
   212  						100,
   213  						1,
   214  						base.ParseInternalKey("i.SET.101"),
   215  						base.ParseInternalKey("p.SET.102"),
   216  					),
   217  					newFileMeta(
   218  						110,
   219  						1,
   220  						base.ParseInternalKey("j.SET.111"),
   221  						base.ParseInternalKey("q.SET.112"),
   222  					),
   223  				},
   224  			}),
   225  			picker: compactionPickerForTesting{
   226  				score:     99,
   227  				level:     0,
   228  				baseLevel: 1,
   229  			},
   230  			want: "100,110  ",
   231  		},
   232  
   233  		{
   234  			desc: "2 L0 files, with ukey overlap",
   235  			version: newVersion(opts, [numLevels][]*fileMetadata{
   236  				0: {
   237  					newFileMeta(
   238  						100,
   239  						1,
   240  						base.ParseInternalKey("i.SET.101"),
   241  						base.ParseInternalKey("i.SET.102"),
   242  					),
   243  					newFileMeta(
   244  						110,
   245  						1,
   246  						base.ParseInternalKey("i.SET.111"),
   247  						base.ParseInternalKey("i.SET.112"),
   248  					),
   249  				},
   250  			}),
   251  			picker: compactionPickerForTesting{
   252  				score:     99,
   253  				level:     0,
   254  				baseLevel: 1,
   255  			},
   256  			want: "100,110  ",
   257  		},
   258  
   259  		{
   260  			desc: "1 L0 file, 2 L1 files (0 overlaps)",
   261  			version: newVersion(opts, [numLevels][]*fileMetadata{
   262  				0: {
   263  					newFileMeta(
   264  						100,
   265  						1,
   266  						base.ParseInternalKey("i.SET.101"),
   267  						base.ParseInternalKey("i.SET.102"),
   268  					),
   269  				},
   270  				1: {
   271  					newFileMeta(
   272  						200,
   273  						1,
   274  						base.ParseInternalKey("a.SET.201"),
   275  						base.ParseInternalKey("b.SET.202"),
   276  					),
   277  					newFileMeta(
   278  						210,
   279  						1,
   280  						base.ParseInternalKey("y.SET.211"),
   281  						base.ParseInternalKey("z.SET.212"),
   282  					),
   283  				},
   284  			}),
   285  			picker: compactionPickerForTesting{
   286  				score:     99,
   287  				level:     0,
   288  				baseLevel: 1,
   289  			},
   290  			want: "100  ",
   291  		},
   292  
   293  		{
   294  			desc: "1 L0 file, 2 L1 files (1 overlap), 4 L2 files (3 overlaps)",
   295  			version: newVersion(opts, [numLevels][]*fileMetadata{
   296  				0: {
   297  					newFileMeta(
   298  						100,
   299  						1,
   300  						base.ParseInternalKey("i.SET.101"),
   301  						base.ParseInternalKey("t.SET.102"),
   302  					),
   303  				},
   304  				1: {
   305  					newFileMeta(
   306  						200,
   307  						1,
   308  						base.ParseInternalKey("a.SET.201"),
   309  						base.ParseInternalKey("e.SET.202"),
   310  					),
   311  					newFileMeta(
   312  						210,
   313  						1,
   314  						base.ParseInternalKey("f.SET.211"),
   315  						base.ParseInternalKey("j.SET.212"),
   316  					),
   317  				},
   318  				2: {
   319  					newFileMeta(
   320  						300,
   321  						1,
   322  						base.ParseInternalKey("a.SET.301"),
   323  						base.ParseInternalKey("b.SET.302"),
   324  					),
   325  					newFileMeta(
   326  						310,
   327  						1,
   328  						base.ParseInternalKey("c.SET.311"),
   329  						base.ParseInternalKey("g.SET.312"),
   330  					),
   331  					newFileMeta(
   332  						320,
   333  						1,
   334  						base.ParseInternalKey("h.SET.321"),
   335  						base.ParseInternalKey("m.SET.322"),
   336  					),
   337  					newFileMeta(
   338  						330,
   339  						1,
   340  						base.ParseInternalKey("n.SET.331"),
   341  						base.ParseInternalKey("z.SET.332"),
   342  					),
   343  				},
   344  			}),
   345  			picker: compactionPickerForTesting{
   346  				score:     99,
   347  				level:     0,
   348  				baseLevel: 1,
   349  			},
   350  			want: "100 210 310,320,330",
   351  		},
   352  
   353  		{
   354  			desc: "4 L1 files, 2 L2 files, can grow",
   355  			version: newVersion(opts, [numLevels][]*fileMetadata{
   356  				1: {
   357  					newFileMeta(
   358  						200,
   359  						1,
   360  						base.ParseInternalKey("i1.SET.201"),
   361  						base.ParseInternalKey("i2.SET.202"),
   362  					),
   363  					newFileMeta(
   364  						210,
   365  						1,
   366  						base.ParseInternalKey("j1.SET.211"),
   367  						base.ParseInternalKey("j2.SET.212"),
   368  					),
   369  					newFileMeta(
   370  						220,
   371  						1,
   372  						base.ParseInternalKey("k1.SET.221"),
   373  						base.ParseInternalKey("k2.SET.222"),
   374  					),
   375  					newFileMeta(
   376  						230,
   377  						1,
   378  						base.ParseInternalKey("l1.SET.231"),
   379  						base.ParseInternalKey("l2.SET.232"),
   380  					),
   381  				},
   382  				2: {
   383  					newFileMeta(
   384  						300,
   385  						1,
   386  						base.ParseInternalKey("a0.SET.301"),
   387  						base.ParseInternalKey("l0.SET.302"),
   388  					),
   389  					newFileMeta(
   390  						310,
   391  						1,
   392  						base.ParseInternalKey("l2.SET.311"),
   393  						base.ParseInternalKey("z2.SET.312"),
   394  					),
   395  				},
   396  			}),
   397  			picker: compactionPickerForTesting{
   398  				score:     99,
   399  				level:     1,
   400  				baseLevel: 1,
   401  			},
   402  			want: "200,210,220 300 ",
   403  		},
   404  
   405  		{
   406  			desc: "4 L1 files, 2 L2 files, can't grow (range)",
   407  			version: newVersion(opts, [numLevels][]*fileMetadata{
   408  				1: {
   409  					newFileMeta(
   410  						200,
   411  						1,
   412  						base.ParseInternalKey("i1.SET.201"),
   413  						base.ParseInternalKey("i2.SET.202"),
   414  					),
   415  					newFileMeta(
   416  						210,
   417  						1,
   418  						base.ParseInternalKey("j1.SET.211"),
   419  						base.ParseInternalKey("j2.SET.212"),
   420  					),
   421  					newFileMeta(
   422  						220,
   423  						1,
   424  						base.ParseInternalKey("k1.SET.221"),
   425  						base.ParseInternalKey("k2.SET.222"),
   426  					),
   427  					newFileMeta(
   428  						230,
   429  						1,
   430  						base.ParseInternalKey("l1.SET.231"),
   431  						base.ParseInternalKey("l2.SET.232"),
   432  					),
   433  				},
   434  				2: {
   435  					newFileMeta(
   436  						300,
   437  						1,
   438  						base.ParseInternalKey("a0.SET.301"),
   439  						base.ParseInternalKey("j0.SET.302"),
   440  					),
   441  					newFileMeta(
   442  						310,
   443  						1,
   444  						base.ParseInternalKey("j2.SET.311"),
   445  						base.ParseInternalKey("z2.SET.312"),
   446  					),
   447  				},
   448  			}),
   449  			picker: compactionPickerForTesting{
   450  				score:     99,
   451  				level:     1,
   452  				baseLevel: 1,
   453  			},
   454  			want: "200 300 ",
   455  		},
   456  
   457  		{
   458  			desc: "4 L1 files, 2 L2 files, can't grow (size)",
   459  			version: newVersion(opts, [numLevels][]*fileMetadata{
   460  				1: {
   461  					newFileMeta(
   462  						200,
   463  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   464  						base.ParseInternalKey("i1.SET.201"),
   465  						base.ParseInternalKey("i2.SET.202"),
   466  					),
   467  					newFileMeta(
   468  						210,
   469  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   470  						base.ParseInternalKey("j1.SET.211"),
   471  						base.ParseInternalKey("j2.SET.212"),
   472  					),
   473  					newFileMeta(
   474  						220,
   475  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   476  						base.ParseInternalKey("k1.SET.221"),
   477  						base.ParseInternalKey("k2.SET.222"),
   478  					),
   479  					newFileMeta(
   480  						230,
   481  						expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1,
   482  						base.ParseInternalKey("l1.SET.231"),
   483  						base.ParseInternalKey("l2.SET.232"),
   484  					),
   485  				},
   486  				2: {
   487  					newFileMeta(
   488  						300,
   489  						expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1,
   490  						base.ParseInternalKey("a0.SET.301"),
   491  						base.ParseInternalKey("l0.SET.302"),
   492  					),
   493  					newFileMeta(
   494  						310,
   495  						expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1,
   496  						base.ParseInternalKey("l2.SET.311"),
   497  						base.ParseInternalKey("z2.SET.312"),
   498  					),
   499  				},
   500  			}),
   501  			picker: compactionPickerForTesting{
   502  				score:     99,
   503  				level:     1,
   504  				baseLevel: 1,
   505  			},
   506  			want: "200 300 ",
   507  		},
   508  	}
   509  
   510  	for _, tc := range testCases {
   511  		vs := &versionSet{
   512  			opts:    opts,
   513  			cmp:     DefaultComparer.Compare,
   514  			cmpName: DefaultComparer.Name,
   515  		}
   516  		vs.versions.Init(nil)
   517  		vs.append(tc.version)
   518  		tc.picker.opts = opts
   519  		tc.picker.vers = tc.version
   520  		vs.picker = &tc.picker
   521  		pc, got := vs.picker.pickAuto(compactionEnv{}), ""
   522  		if pc != nil {
   523  			c := newCompaction(pc, opts)
   524  			got0 := fileNums(c.startLevel.files)
   525  			got1 := fileNums(c.outputLevel.files)
   526  			got2 := fileNums(c.grandparents)
   527  			got = got0 + " " + got1 + " " + got2
   528  		}
   529  		if got != tc.want {
   530  			t.Fatalf("%s:\ngot  %q\nwant %q", tc.desc, got, tc.want)
   531  		}
   532  	}
   533  }
   534  
   535  func TestElideTombstone(t *testing.T) {
   536  	opts := &Options{}
   537  	opts.EnsureDefaults()
   538  
   539  	newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata {
   540  		m := (&fileMetadata{}).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest)
   541  		return m
   542  	}
   543  
   544  	type want struct {
   545  		key      string
   546  		expected bool
   547  	}
   548  
   549  	testCases := []struct {
   550  		desc    string
   551  		level   int
   552  		version *version
   553  		wants   []want
   554  	}{
   555  		{
   556  			desc:    "empty",
   557  			level:   1,
   558  			version: newVersion(opts, [numLevels][]*fileMetadata{}),
   559  			wants: []want{
   560  				{"x", true},
   561  			},
   562  		},
   563  		{
   564  			desc:  "non-empty",
   565  			level: 1,
   566  			version: newVersion(opts, [numLevels][]*fileMetadata{
   567  				1: {
   568  					newFileMeta(
   569  						base.ParseInternalKey("c.SET.801"),
   570  						base.ParseInternalKey("g.SET.800"),
   571  					),
   572  					newFileMeta(
   573  						base.ParseInternalKey("x.SET.701"),
   574  						base.ParseInternalKey("y.SET.700"),
   575  					),
   576  				},
   577  				2: {
   578  					newFileMeta(
   579  						base.ParseInternalKey("d.SET.601"),
   580  						base.ParseInternalKey("h.SET.600"),
   581  					),
   582  					newFileMeta(
   583  						base.ParseInternalKey("r.SET.501"),
   584  						base.ParseInternalKey("t.SET.500"),
   585  					),
   586  				},
   587  				3: {
   588  					newFileMeta(
   589  						base.ParseInternalKey("f.SET.401"),
   590  						base.ParseInternalKey("g.SET.400"),
   591  					),
   592  					newFileMeta(
   593  						base.ParseInternalKey("w.SET.301"),
   594  						base.ParseInternalKey("x.SET.300"),
   595  					),
   596  				},
   597  				4: {
   598  					newFileMeta(
   599  						base.ParseInternalKey("f.SET.201"),
   600  						base.ParseInternalKey("m.SET.200"),
   601  					),
   602  					newFileMeta(
   603  						base.ParseInternalKey("t.SET.101"),
   604  						base.ParseInternalKey("t.SET.100"),
   605  					),
   606  				},
   607  			}),
   608  			wants: []want{
   609  				{"b", true},
   610  				{"c", true},
   611  				{"d", true},
   612  				{"e", true},
   613  				{"f", false},
   614  				{"g", false},
   615  				{"h", false},
   616  				{"l", false},
   617  				{"m", false},
   618  				{"n", true},
   619  				{"q", true},
   620  				{"r", true},
   621  				{"s", true},
   622  				{"t", false},
   623  				{"u", true},
   624  				{"v", true},
   625  				{"w", false},
   626  				{"x", false},
   627  				{"y", true},
   628  				{"z", true},
   629  			},
   630  		},
   631  		{
   632  			desc:  "repeated ukey",
   633  			level: 1,
   634  			version: newVersion(opts, [numLevels][]*fileMetadata{
   635  				6: {
   636  					newFileMeta(
   637  						base.ParseInternalKey("i.SET.401"),
   638  						base.ParseInternalKey("i.SET.400"),
   639  					),
   640  					newFileMeta(
   641  						base.ParseInternalKey("i.SET.301"),
   642  						base.ParseInternalKey("k.SET.300"),
   643  					),
   644  					newFileMeta(
   645  						base.ParseInternalKey("k.SET.201"),
   646  						base.ParseInternalKey("m.SET.200"),
   647  					),
   648  					newFileMeta(
   649  						base.ParseInternalKey("m.SET.101"),
   650  						base.ParseInternalKey("m.SET.100"),
   651  					),
   652  				},
   653  			}),
   654  			wants: []want{
   655  				{"h", true},
   656  				{"i", false},
   657  				{"j", false},
   658  				{"k", false},
   659  				{"l", false},
   660  				{"m", false},
   661  				{"n", true},
   662  			},
   663  		},
   664  	}
   665  
   666  	for _, tc := range testCases {
   667  		c := compaction{
   668  			cmp:      DefaultComparer.Compare,
   669  			comparer: DefaultComparer,
   670  			version:  tc.version,
   671  			inputs:   []compactionLevel{{level: tc.level}, {level: tc.level + 1}},
   672  			smallest: base.ParseInternalKey("a.SET.0"),
   673  			largest:  base.ParseInternalKey("z.SET.0"),
   674  		}
   675  		c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
   676  		c.setupInuseKeyRanges()
   677  		for _, w := range tc.wants {
   678  			if got := c.elideTombstone([]byte(w.key)); got != w.expected {
   679  				t.Errorf("%s: ukey=%q: got %v, want %v", tc.desc, w.key, got, w.expected)
   680  			}
   681  		}
   682  	}
   683  }
   684  
   685  func TestElideRangeTombstone(t *testing.T) {
   686  	opts := (*Options)(nil).EnsureDefaults()
   687  
   688  	newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata {
   689  		m := (&fileMetadata{}).ExtendPointKeyBounds(
   690  			opts.Comparer.Compare, smallest, largest,
   691  		)
   692  		return m
   693  	}
   694  
   695  	type want struct {
   696  		key      string
   697  		endKey   string
   698  		expected bool
   699  	}
   700  
   701  	testCases := []struct {
   702  		desc     string
   703  		level    int
   704  		version  *version
   705  		wants    []want
   706  		flushing flushableList
   707  	}{
   708  		{
   709  			desc:    "empty",
   710  			level:   1,
   711  			version: newVersion(opts, [numLevels][]*fileMetadata{}),
   712  			wants: []want{
   713  				{"x", "y", true},
   714  			},
   715  		},
   716  		{
   717  			desc:  "non-empty",
   718  			level: 1,
   719  			version: newVersion(opts, [numLevels][]*fileMetadata{
   720  				1: {
   721  					newFileMeta(
   722  						base.ParseInternalKey("c.SET.801"),
   723  						base.ParseInternalKey("g.SET.800"),
   724  					),
   725  					newFileMeta(
   726  						base.ParseInternalKey("x.SET.701"),
   727  						base.ParseInternalKey("y.SET.700"),
   728  					),
   729  				},
   730  				2: {
   731  					newFileMeta(
   732  						base.ParseInternalKey("d.SET.601"),
   733  						base.ParseInternalKey("h.SET.600"),
   734  					),
   735  					newFileMeta(
   736  						base.ParseInternalKey("r.SET.501"),
   737  						base.ParseInternalKey("t.SET.500"),
   738  					),
   739  				},
   740  				3: {
   741  					newFileMeta(
   742  						base.ParseInternalKey("f.SET.401"),
   743  						base.ParseInternalKey("g.SET.400"),
   744  					),
   745  					newFileMeta(
   746  						base.ParseInternalKey("w.SET.301"),
   747  						base.ParseInternalKey("x.SET.300"),
   748  					),
   749  				},
   750  				4: {
   751  					newFileMeta(
   752  						base.ParseInternalKey("f.SET.201"),
   753  						base.ParseInternalKey("m.SET.200"),
   754  					),
   755  					newFileMeta(
   756  						base.ParseInternalKey("t.SET.101"),
   757  						base.ParseInternalKey("t.SET.100"),
   758  					),
   759  				},
   760  			}),
   761  			wants: []want{
   762  				{"b", "c", true},
   763  				{"c", "d", true},
   764  				{"d", "e", true},
   765  				{"e", "f", false},
   766  				{"f", "g", false},
   767  				{"g", "h", false},
   768  				{"h", "i", false},
   769  				{"l", "m", false},
   770  				{"m", "n", false},
   771  				{"n", "o", true},
   772  				{"q", "r", true},
   773  				{"r", "s", true},
   774  				{"s", "t", false},
   775  				{"t", "u", false},
   776  				{"u", "v", true},
   777  				{"v", "w", false},
   778  				{"w", "x", false},
   779  				{"x", "y", false},
   780  				{"y", "z", true},
   781  			},
   782  		},
   783  		{
   784  			desc:  "flushing",
   785  			level: -1,
   786  			version: newVersion(opts, [numLevels][]*fileMetadata{
   787  				0: {
   788  					newFileMeta(
   789  						base.ParseInternalKey("h.SET.901"),
   790  						base.ParseInternalKey("j.SET.900"),
   791  					),
   792  				},
   793  				1: {
   794  					newFileMeta(
   795  						base.ParseInternalKey("c.SET.801"),
   796  						base.ParseInternalKey("g.SET.800"),
   797  					),
   798  					newFileMeta(
   799  						base.ParseInternalKey("x.SET.701"),
   800  						base.ParseInternalKey("y.SET.700"),
   801  					),
   802  				},
   803  			}),
   804  			wants: []want{
   805  				{"m", "n", false},
   806  			},
   807  			// Pretend one memtable is being flushed
   808  			flushing: flushableList{nil},
   809  		},
   810  	}
   811  
   812  	for _, tc := range testCases {
   813  		c := compaction{
   814  			cmp:      DefaultComparer.Compare,
   815  			comparer: DefaultComparer,
   816  			version:  tc.version,
   817  			inputs:   []compactionLevel{{level: tc.level}, {level: tc.level + 1}},
   818  			smallest: base.ParseInternalKey("a.SET.0"),
   819  			largest:  base.ParseInternalKey("z.SET.0"),
   820  			flushing: tc.flushing,
   821  		}
   822  		c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
   823  		c.setupInuseKeyRanges()
   824  		for _, w := range tc.wants {
   825  			if got := c.elideRangeTombstone([]byte(w.key), []byte(w.endKey)); got != w.expected {
   826  				t.Errorf("%s: keys=%q-%q: got %v, want %v", tc.desc, w.key, w.endKey, got, w.expected)
   827  			}
   828  		}
   829  	}
   830  }
   831  
   832  func TestCompactionTransform(t *testing.T) {
   833  	datadriven.RunTest(t, "testdata/compaction_transform", func(td *datadriven.TestData) string {
   834  		switch td.Cmd {
   835  		case "transform":
   836  			var snapshots []uint64
   837  			var keyRanges []manifest.UserKeyRange
   838  			disableElision := false
   839  			for i := range td.CmdArgs {
   840  				switch td.CmdArgs[i].Key {
   841  				case "snapshots":
   842  					for _, snapshot := range td.CmdArgs[i].Vals {
   843  						s, err := strconv.ParseUint(snapshot, 10, 64)
   844  						if err != nil {
   845  							return err.Error()
   846  						}
   847  						snapshots = append(snapshots, s)
   848  					}
   849  				case "in-use-key-ranges":
   850  					for _, keyRange := range td.CmdArgs[i].Vals {
   851  						parts := strings.SplitN(keyRange, "-", 2)
   852  						start := []byte(strings.TrimSpace(parts[0]))
   853  						end := []byte(strings.TrimSpace(parts[1]))
   854  						keyRanges = append(keyRanges, manifest.UserKeyRange{
   855  							Start: start,
   856  							End:   end,
   857  						})
   858  					}
   859  				case "disable-elision":
   860  					disableElision = true
   861  				}
   862  			}
   863  			span := keyspan.ParseSpan(td.Input)
   864  			for i := range span.Keys {
   865  				if i > 0 {
   866  					if span.Keys[i-1].Trailer < span.Keys[i].Trailer {
   867  						return "span keys not sorted"
   868  					}
   869  				}
   870  			}
   871  			var outSpan keyspan.Span
   872  			c := compaction{
   873  				cmp:                base.DefaultComparer.Compare,
   874  				comparer:           base.DefaultComparer,
   875  				disableSpanElision: disableElision,
   876  				inuseKeyRanges:     keyRanges,
   877  			}
   878  			transformer := rangeKeyCompactionTransform(snapshots, c.elideRangeTombstone)
   879  			if err := transformer.Transform(base.DefaultComparer.Compare, span, &outSpan); err != nil {
   880  				return fmt.Sprintf("error: %s", err)
   881  			}
   882  			return outSpan.String()
   883  		default:
   884  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   885  		}
   886  	})
   887  }
   888  
   889  type cpuPermissionGranter struct {
   890  	granted int
   891  	used    bool
   892  }
   893  
   894  func (t *cpuPermissionGranter) TryGetProcs(count int) int {
   895  	t.granted += count
   896  	t.used = true
   897  	return count
   898  }
   899  
   900  func (t *cpuPermissionGranter) ReturnProcs(count int) {
   901  	t.granted -= count
   902  }
   903  
   904  // Simple test to check if compactions are using the granter, and if exactly the
   905  // used slots are being freed.
   906  func TestCompactionSlots(t *testing.T) {
   907  	mem := vfs.NewMem()
   908  	opts := &Options{
   909  		FS: mem,
   910  	}
   911  	g := &cpuPermissionGranter{}
   912  	opts.Experimental.CPUWorkPermissionGranter = g
   913  	d, err := Open("", opts)
   914  	if err != nil {
   915  		t.Fatalf("Open: %v", err)
   916  	}
   917  	defer d.Close()
   918  
   919  	d.Set([]byte{'a'}, []byte{'a'}, nil)
   920  	err = d.Compact([]byte{'a'}, []byte{'b'}, true)
   921  	if err != nil {
   922  		t.Fatalf("Compact: %v", err)
   923  	}
   924  	require.True(t, g.used)
   925  	require.Equal(t, 0, g.granted)
   926  }
   927  
   928  func TestCompaction(t *testing.T) {
   929  	const memTableSize = 10000
   930  	// Tuned so that 2 values can reside in the memtable before a flush, but a
   931  	// 3rd value will cause a flush. Needs to account for the max skiplist node
   932  	// size.
   933  	const valueSize = 3500
   934  
   935  	mem := vfs.NewMem()
   936  	opts := &Options{
   937  		FS:                    mem,
   938  		MemTableSize:          memTableSize,
   939  		DebugCheck:            DebugCheckLevels,
   940  		L0CompactionThreshold: 8,
   941  	}
   942  	opts.testingRandomized()
   943  	d, err := Open("", opts)
   944  	if err != nil {
   945  		t.Fatalf("Open: %v", err)
   946  	}
   947  
   948  	get1 := func(iter internalIterator) (ret string) {
   949  		b := &bytes.Buffer{}
   950  		for key, _ := iter.First(); key != nil; key, _ = iter.Next() {
   951  			b.Write(key.UserKey)
   952  		}
   953  		if err := iter.Close(); err != nil {
   954  			t.Fatalf("iterator Close: %v", err)
   955  		}
   956  		return b.String()
   957  	}
   958  	getAll := func() (gotMem, gotDisk string, err error) {
   959  		d.mu.Lock()
   960  		defer d.mu.Unlock()
   961  
   962  		if d.mu.mem.mutable != nil {
   963  			gotMem = get1(d.mu.mem.mutable.newIter(nil))
   964  		}
   965  		ss := []string(nil)
   966  		v := d.mu.versions.currentVersion()
   967  		for _, levelMetadata := range v.Levels {
   968  			iter := levelMetadata.Iter()
   969  			for meta := iter.First(); meta != nil; meta = iter.Next() {
   970  				f, err := mem.Open(base.MakeFilepath(mem, "", fileTypeTable, meta.FileNum))
   971  				if err != nil {
   972  					return "", "", errors.WithStack(err)
   973  				}
   974  				r, err := sstable.NewReader(f, sstable.ReaderOptions{})
   975  				if err != nil {
   976  					return "", "", errors.WithStack(err)
   977  				}
   978  				defer r.Close()
   979  				iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   980  				if err != nil {
   981  					return "", "", errors.WithStack(err)
   982  				}
   983  				ss = append(ss, get1(iter)+".")
   984  			}
   985  		}
   986  		sort.Strings(ss)
   987  		return gotMem, strings.Join(ss, ""), nil
   988  	}
   989  
   990  	value := bytes.Repeat([]byte("x"), valueSize)
   991  	testCases := []struct {
   992  		key, wantMem, wantDisk string
   993  	}{
   994  		{"+A", "A", ""},
   995  		{"+a", "Aa", ""},
   996  		{"+B", "B", "Aa."},
   997  		{"+b", "Bb", "Aa."},
   998  		// The next level-0 table overwrites the B key.
   999  		{"+C", "C", "Aa.Bb."},
  1000  		{"+B", "BC", "Aa.Bb."},
  1001  		// The next level-0 table deletes the a key.
  1002  		{"+D", "D", "Aa.BC.Bb."},
  1003  		{"-a", "Da", "Aa.BC.Bb."},
  1004  		{"+d", "Dad", "Aa.BC.Bb."},
  1005  		{"+E", "E", "Aa.BC.Bb.Dad."},
  1006  		{"+e", "Ee", "Aa.BC.Bb.Dad."},
  1007  		// The next addition creates the fourth level-0 table, and l0CompactionTrigger == 8,
  1008  		// but since the sublevel count is doubled when comparing with l0CompactionTrigger,
  1009  		// the addition of the 4th sublevel triggers a non-trivial compaction into one level-1 table.
  1010  		// Note that the keys in this one larger table are interleaved from the four smaller ones.
  1011  		{"+F", "F", "ABCDEbde."},
  1012  	}
  1013  	for _, tc := range testCases {
  1014  		if key := tc.key[1:]; tc.key[0] == '+' {
  1015  			if err := d.Set([]byte(key), value, nil); err != nil {
  1016  				t.Errorf("%q: Set: %v", key, err)
  1017  				break
  1018  			}
  1019  		} else {
  1020  			if err := d.Delete([]byte(key), nil); err != nil {
  1021  				t.Errorf("%q: Delete: %v", key, err)
  1022  				break
  1023  			}
  1024  		}
  1025  
  1026  		// try backs off to allow any writes to the memfs to complete.
  1027  		err := try(100*time.Microsecond, 20*time.Second, func() error {
  1028  			gotMem, gotDisk, err := getAll()
  1029  			if err != nil {
  1030  				return err
  1031  			}
  1032  			if testing.Verbose() {
  1033  				fmt.Printf("mem=%s (%s) disk=%s (%s)\n", gotMem, tc.wantMem, gotDisk, tc.wantDisk)
  1034  			}
  1035  
  1036  			if gotMem != tc.wantMem {
  1037  				return errors.Errorf("mem: got %q, want %q", gotMem, tc.wantMem)
  1038  			}
  1039  			if gotDisk != tc.wantDisk {
  1040  				return errors.Errorf("ldb: got %q, want %q", gotDisk, tc.wantDisk)
  1041  			}
  1042  			return nil
  1043  		})
  1044  		if err != nil {
  1045  			t.Errorf("%q: %v", tc.key, err)
  1046  		}
  1047  	}
  1048  	if err := d.Close(); err != nil {
  1049  		t.Fatalf("db Close: %v", err)
  1050  	}
  1051  }
  1052  
  1053  func TestValidateVersionEdit(t *testing.T) {
  1054  	const badKey = "malformed-key"
  1055  
  1056  	errValidationFailed := errors.New("validation failed")
  1057  	validateFn := func(key []byte) error {
  1058  		if string(key) == badKey {
  1059  			return errValidationFailed
  1060  		}
  1061  		return nil
  1062  	}
  1063  
  1064  	cmp := DefaultComparer.Compare
  1065  	newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata {
  1066  		m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest)
  1067  		return m
  1068  	}
  1069  
  1070  	testCases := []struct {
  1071  		desc    string
  1072  		ve      *versionEdit
  1073  		vFunc   func([]byte) error
  1074  		wantErr error
  1075  	}{
  1076  		{
  1077  			desc: "single new file; start key",
  1078  			ve: &versionEdit{
  1079  				NewFiles: []manifest.NewFileEntry{
  1080  					{
  1081  						Meta: newFileMeta(
  1082  							manifest.InternalKey{UserKey: []byte(badKey)},
  1083  							manifest.InternalKey{UserKey: []byte("z")},
  1084  						),
  1085  					},
  1086  				},
  1087  			},
  1088  			vFunc:   validateFn,
  1089  			wantErr: errValidationFailed,
  1090  		},
  1091  		{
  1092  			desc: "single new file; end key",
  1093  			ve: &versionEdit{
  1094  				NewFiles: []manifest.NewFileEntry{
  1095  					{
  1096  						Meta: newFileMeta(
  1097  							manifest.InternalKey{UserKey: []byte("a")},
  1098  							manifest.InternalKey{UserKey: []byte(badKey)},
  1099  						),
  1100  					},
  1101  				},
  1102  			},
  1103  			vFunc:   validateFn,
  1104  			wantErr: errValidationFailed,
  1105  		},
  1106  		{
  1107  			desc: "multiple new files",
  1108  			ve: &versionEdit{
  1109  				NewFiles: []manifest.NewFileEntry{
  1110  					{
  1111  						Meta: newFileMeta(
  1112  							manifest.InternalKey{UserKey: []byte("a")},
  1113  							manifest.InternalKey{UserKey: []byte("c")},
  1114  						),
  1115  					},
  1116  					{
  1117  						Meta: newFileMeta(
  1118  							manifest.InternalKey{UserKey: []byte(badKey)},
  1119  							manifest.InternalKey{UserKey: []byte("z")},
  1120  						),
  1121  					},
  1122  				},
  1123  			},
  1124  			vFunc:   validateFn,
  1125  			wantErr: errValidationFailed,
  1126  		},
  1127  		{
  1128  			desc: "single deleted file; start key",
  1129  			ve: &versionEdit{
  1130  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1131  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1132  						manifest.InternalKey{UserKey: []byte(badKey)},
  1133  						manifest.InternalKey{UserKey: []byte("z")},
  1134  					),
  1135  				},
  1136  			},
  1137  			vFunc:   validateFn,
  1138  			wantErr: errValidationFailed,
  1139  		},
  1140  		{
  1141  			desc: "single deleted file; end key",
  1142  			ve: &versionEdit{
  1143  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1144  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1145  						manifest.InternalKey{UserKey: []byte("a")},
  1146  						manifest.InternalKey{UserKey: []byte(badKey)},
  1147  					),
  1148  				},
  1149  			},
  1150  			vFunc:   validateFn,
  1151  			wantErr: errValidationFailed,
  1152  		},
  1153  		{
  1154  			desc: "multiple deleted files",
  1155  			ve: &versionEdit{
  1156  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1157  					deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta(
  1158  						manifest.InternalKey{UserKey: []byte("a")},
  1159  						manifest.InternalKey{UserKey: []byte("c")},
  1160  					),
  1161  					deletedFileEntry{Level: 0, FileNum: 1}: newFileMeta(
  1162  						manifest.InternalKey{UserKey: []byte(badKey)},
  1163  						manifest.InternalKey{UserKey: []byte("z")},
  1164  					),
  1165  				},
  1166  			},
  1167  			vFunc:   validateFn,
  1168  			wantErr: errValidationFailed,
  1169  		},
  1170  		{
  1171  			desc: "no errors",
  1172  			ve: &versionEdit{
  1173  				NewFiles: []manifest.NewFileEntry{
  1174  					{
  1175  						Level: 0,
  1176  						Meta: newFileMeta(
  1177  							manifest.InternalKey{UserKey: []byte("b")},
  1178  							manifest.InternalKey{UserKey: []byte("c")},
  1179  						),
  1180  					},
  1181  					{
  1182  						Level: 0,
  1183  						Meta: newFileMeta(
  1184  							manifest.InternalKey{UserKey: []byte("d")},
  1185  							manifest.InternalKey{UserKey: []byte("g")},
  1186  						),
  1187  					},
  1188  				},
  1189  				DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{
  1190  					deletedFileEntry{Level: 6, FileNum: 0}: newFileMeta(
  1191  						manifest.InternalKey{UserKey: []byte("a")},
  1192  						manifest.InternalKey{UserKey: []byte("d")},
  1193  					),
  1194  					deletedFileEntry{Level: 6, FileNum: 1}: newFileMeta(
  1195  						manifest.InternalKey{UserKey: []byte("x")},
  1196  						manifest.InternalKey{UserKey: []byte("z")},
  1197  					),
  1198  				},
  1199  			},
  1200  			vFunc: validateFn,
  1201  		},
  1202  	}
  1203  
  1204  	for _, tc := range testCases {
  1205  		t.Run(tc.desc, func(t *testing.T) {
  1206  			err := validateVersionEdit(tc.ve, tc.vFunc, base.DefaultFormatter)
  1207  			if tc.wantErr != nil {
  1208  				if !errors.Is(err, tc.wantErr) {
  1209  					t.Fatalf("got: %s; want: %s", err, tc.wantErr)
  1210  				}
  1211  				return
  1212  			}
  1213  			if err != nil {
  1214  				t.Fatalf("got %s; wanted no error", err)
  1215  			}
  1216  		})
  1217  	}
  1218  }
  1219  
  1220  func TestManualCompaction(t *testing.T) {
  1221  	var mem vfs.FS
  1222  	var d *DB
  1223  	defer func() {
  1224  		if d != nil {
  1225  			require.NoError(t, d.Close())
  1226  		}
  1227  	}()
  1228  
  1229  	seed := time.Now().UnixNano()
  1230  	rng := rand.New(rand.NewSource(seed))
  1231  	t.Logf("seed: %d", seed)
  1232  
  1233  	randVersion := func(min, max FormatMajorVersion) FormatMajorVersion {
  1234  		return FormatMajorVersion(int(min) + rng.Intn(int(max)-int(min)+1))
  1235  	}
  1236  
  1237  	reset := func(minVersion, maxVersion FormatMajorVersion) {
  1238  		if d != nil {
  1239  			require.NoError(t, d.Close())
  1240  		}
  1241  		mem = vfs.NewMem()
  1242  		require.NoError(t, mem.MkdirAll("ext", 0755))
  1243  
  1244  		opts := &Options{
  1245  			FS:                 mem,
  1246  			DebugCheck:         DebugCheckLevels,
  1247  			FormatMajorVersion: randVersion(minVersion, maxVersion),
  1248  		}
  1249  		opts.DisableAutomaticCompactions = true
  1250  
  1251  		var err error
  1252  		d, err = Open("", opts)
  1253  		require.NoError(t, err)
  1254  	}
  1255  
  1256  	// d.mu must be held when calling.
  1257  	createOngoingCompaction := func(start, end []byte, startLevel, outputLevel int) (ongoingCompaction *compaction) {
  1258  		ongoingCompaction = &compaction{
  1259  			inputs:   []compactionLevel{{level: startLevel}, {level: outputLevel}},
  1260  			smallest: InternalKey{UserKey: start},
  1261  			largest:  InternalKey{UserKey: end},
  1262  		}
  1263  		ongoingCompaction.startLevel = &ongoingCompaction.inputs[0]
  1264  		ongoingCompaction.outputLevel = &ongoingCompaction.inputs[1]
  1265  		// Mark files as compacting.
  1266  		curr := d.mu.versions.currentVersion()
  1267  		ongoingCompaction.startLevel.files = curr.Overlaps(startLevel, d.cmp, start, end, false)
  1268  		ongoingCompaction.outputLevel.files = curr.Overlaps(outputLevel, d.cmp, start, end, false)
  1269  		for _, cl := range ongoingCompaction.inputs {
  1270  			iter := cl.files.Iter()
  1271  			for f := iter.First(); f != nil; f = iter.Next() {
  1272  				f.CompactionState = manifest.CompactionStateCompacting
  1273  			}
  1274  		}
  1275  		d.mu.compact.inProgress[ongoingCompaction] = struct{}{}
  1276  		d.mu.compact.compactingCount++
  1277  		return
  1278  	}
  1279  
  1280  	// d.mu must be held when calling.
  1281  	deleteOngoingCompaction := func(ongoingCompaction *compaction) {
  1282  		for _, cl := range ongoingCompaction.inputs {
  1283  			iter := cl.files.Iter()
  1284  			for f := iter.First(); f != nil; f = iter.Next() {
  1285  				f.CompactionState = manifest.CompactionStateNotCompacting
  1286  			}
  1287  		}
  1288  		delete(d.mu.compact.inProgress, ongoingCompaction)
  1289  		d.mu.compact.compactingCount--
  1290  	}
  1291  
  1292  	runTest := func(t *testing.T, testData string, minVersion, maxVersion FormatMajorVersion, verbose bool) {
  1293  		reset(minVersion, maxVersion)
  1294  		var ongoingCompaction *compaction
  1295  		datadriven.RunTest(t, testData, func(td *datadriven.TestData) string {
  1296  			switch td.Cmd {
  1297  			case "reset":
  1298  				reset(minVersion, maxVersion)
  1299  				return ""
  1300  
  1301  			case "batch":
  1302  				b := d.NewIndexedBatch()
  1303  				if err := runBatchDefineCmd(td, b); err != nil {
  1304  					return err.Error()
  1305  				}
  1306  				require.NoError(t, b.Commit(nil))
  1307  				return ""
  1308  
  1309  			case "build":
  1310  				if err := runBuildCmd(td, d, mem); err != nil {
  1311  					return err.Error()
  1312  				}
  1313  				return ""
  1314  
  1315  			case "compact":
  1316  				if err := runCompactCmd(td, d); err != nil {
  1317  					return err.Error()
  1318  				}
  1319  				d.mu.Lock()
  1320  				s := d.mu.versions.currentVersion().String()
  1321  				if verbose {
  1322  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1323  				}
  1324  				d.mu.Unlock()
  1325  				if td.HasArg("hide-file-num") {
  1326  					re := regexp.MustCompile(`([0-9]*):\[`)
  1327  					s = re.ReplaceAllString(s, "[")
  1328  				}
  1329  				return s
  1330  
  1331  			case "define":
  1332  				if d != nil {
  1333  					if err := d.Close(); err != nil {
  1334  						return err.Error()
  1335  					}
  1336  				}
  1337  
  1338  				mem = vfs.NewMem()
  1339  				opts := &Options{
  1340  					FS:                          mem,
  1341  					DebugCheck:                  DebugCheckLevels,
  1342  					FormatMajorVersion:          randVersion(minVersion, maxVersion),
  1343  					DisableAutomaticCompactions: true,
  1344  				}
  1345  
  1346  				var err error
  1347  				if d, err = runDBDefineCmd(td, opts); err != nil {
  1348  					return err.Error()
  1349  				}
  1350  
  1351  				s := d.mu.versions.currentVersion().String()
  1352  				if verbose {
  1353  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1354  				}
  1355  				return s
  1356  
  1357  			case "ingest":
  1358  				if err := runIngestCmd(td, d, mem); err != nil {
  1359  					return err.Error()
  1360  				}
  1361  				d.mu.Lock()
  1362  				s := d.mu.versions.currentVersion().String()
  1363  				if verbose {
  1364  					s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  1365  				}
  1366  				d.mu.Unlock()
  1367  				return s
  1368  
  1369  			case "iter":
  1370  				// TODO(peter): runDBDefineCmd doesn't properly update the visible
  1371  				// sequence number. So we have to use a snapshot with a very large
  1372  				// sequence number, otherwise the DB appears empty.
  1373  				snap := Snapshot{
  1374  					db:     d,
  1375  					seqNum: InternalKeySeqNumMax,
  1376  				}
  1377  				iter := snap.NewIter(nil)
  1378  				return runIterCmd(td, iter, true)
  1379  
  1380  			case "async-compact":
  1381  				var s string
  1382  				ch := make(chan error, 1)
  1383  				go func() {
  1384  					if err := runCompactCmd(td, d); err != nil {
  1385  						ch <- err
  1386  						close(ch)
  1387  						return
  1388  					}
  1389  					d.mu.Lock()
  1390  					s = d.mu.versions.currentVersion().String()
  1391  					d.mu.Unlock()
  1392  					close(ch)
  1393  				}()
  1394  
  1395  				manualDone := func() bool {
  1396  					select {
  1397  					case <-ch:
  1398  						return true
  1399  					default:
  1400  						return false
  1401  					}
  1402  				}
  1403  
  1404  				err := try(100*time.Microsecond, 20*time.Second, func() error {
  1405  					if manualDone() {
  1406  						return nil
  1407  					}
  1408  
  1409  					d.mu.Lock()
  1410  					defer d.mu.Unlock()
  1411  					if len(d.mu.compact.manual) == 0 {
  1412  						return errors.New("no manual compaction queued")
  1413  					}
  1414  					manual := d.mu.compact.manual[0]
  1415  					if manual.retries == 0 {
  1416  						return errors.New("manual compaction has not been retried")
  1417  					}
  1418  					return nil
  1419  				})
  1420  				if err != nil {
  1421  					return err.Error()
  1422  				}
  1423  
  1424  				if manualDone() {
  1425  					return "manual compaction did not block for ongoing\n" + s
  1426  				}
  1427  
  1428  				d.mu.Lock()
  1429  				deleteOngoingCompaction(ongoingCompaction)
  1430  				ongoingCompaction = nil
  1431  				d.maybeScheduleCompaction()
  1432  				d.mu.Unlock()
  1433  				if err := <-ch; err != nil {
  1434  					return err.Error()
  1435  				}
  1436  				return "manual compaction blocked until ongoing finished\n" + s
  1437  
  1438  			case "add-ongoing-compaction":
  1439  				var startLevel int
  1440  				var outputLevel int
  1441  				var start string
  1442  				var end string
  1443  				td.ScanArgs(t, "startLevel", &startLevel)
  1444  				td.ScanArgs(t, "outputLevel", &outputLevel)
  1445  				td.ScanArgs(t, "start", &start)
  1446  				td.ScanArgs(t, "end", &end)
  1447  				d.mu.Lock()
  1448  				ongoingCompaction = createOngoingCompaction([]byte(start), []byte(end), startLevel, outputLevel)
  1449  				d.mu.Unlock()
  1450  				return ""
  1451  
  1452  			case "remove-ongoing-compaction":
  1453  				d.mu.Lock()
  1454  				deleteOngoingCompaction(ongoingCompaction)
  1455  				ongoingCompaction = nil
  1456  				d.mu.Unlock()
  1457  				return ""
  1458  
  1459  			case "set-concurrent-compactions":
  1460  				var concurrentCompactions int
  1461  				td.ScanArgs(t, "num", &concurrentCompactions)
  1462  				d.opts.MaxConcurrentCompactions = func() int {
  1463  					return concurrentCompactions
  1464  				}
  1465  				return ""
  1466  
  1467  			case "wait-pending-table-stats":
  1468  				return runTableStatsCmd(td, d)
  1469  
  1470  			case "close-snapshots":
  1471  				d.mu.Lock()
  1472  				var ss []*Snapshot
  1473  				l := &d.mu.snapshots
  1474  				for i := l.root.next; i != &l.root; i = i.next {
  1475  					ss = append(ss, i)
  1476  				}
  1477  				d.mu.Unlock()
  1478  				for i := range ss {
  1479  					if err := ss[i].Close(); err != nil {
  1480  						return err.Error()
  1481  					}
  1482  				}
  1483  				return ""
  1484  			default:
  1485  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  1486  			}
  1487  		})
  1488  	}
  1489  
  1490  	testCases := []struct {
  1491  		testData   string
  1492  		minVersion FormatMajorVersion
  1493  		maxVersion FormatMajorVersion // inclusive
  1494  		verbose    bool
  1495  	}{
  1496  		{
  1497  			testData:   "testdata/manual_compaction",
  1498  			minVersion: FormatMostCompatible,
  1499  			maxVersion: FormatSetWithDelete - 1,
  1500  		},
  1501  		{
  1502  			testData:   "testdata/manual_compaction_set_with_del",
  1503  			minVersion: FormatSetWithDelete,
  1504  			maxVersion: FormatNewest,
  1505  		},
  1506  		{
  1507  			testData:   "testdata/singledel_manual_compaction",
  1508  			minVersion: FormatMostCompatible,
  1509  			maxVersion: FormatSetWithDelete - 1,
  1510  		},
  1511  		{
  1512  			testData:   "testdata/singledel_manual_compaction_set_with_del",
  1513  			minVersion: FormatSetWithDelete,
  1514  			maxVersion: FormatNewest,
  1515  		},
  1516  		{
  1517  			testData:   "testdata/manual_compaction_range_keys",
  1518  			minVersion: FormatRangeKeys,
  1519  			maxVersion: FormatNewest,
  1520  			verbose:    true,
  1521  		},
  1522  	}
  1523  
  1524  	for _, tc := range testCases {
  1525  		t.Run(tc.testData, func(t *testing.T) {
  1526  			runTest(t, tc.testData, tc.minVersion, tc.maxVersion, tc.verbose)
  1527  		})
  1528  	}
  1529  }
  1530  
  1531  func TestCompactionFindGrandparentLimit(t *testing.T) {
  1532  	cmp := DefaultComparer.Compare
  1533  	var grandparents []*fileMetadata
  1534  
  1535  	var fileNum base.FileNum
  1536  	parseMeta := func(s string) *fileMetadata {
  1537  		parts := strings.Split(s, "-")
  1538  		if len(parts) != 2 {
  1539  			t.Fatalf("malformed table spec: %s", s)
  1540  		}
  1541  		fileNum++
  1542  		m := (&fileMetadata{
  1543  			FileNum: fileNum,
  1544  		}).ExtendPointKeyBounds(
  1545  			cmp,
  1546  			InternalKey{UserKey: []byte(parts[0])},
  1547  			InternalKey{UserKey: []byte(parts[1])},
  1548  		)
  1549  		return m
  1550  	}
  1551  
  1552  	datadriven.RunTest(t, "testdata/compaction_find_grandparent_limit",
  1553  		func(d *datadriven.TestData) string {
  1554  			switch d.Cmd {
  1555  			case "define":
  1556  				grandparents = nil
  1557  				if len(d.Input) == 0 {
  1558  					return ""
  1559  				}
  1560  				for _, data := range strings.Split(d.Input, "\n") {
  1561  					parts := strings.Fields(data)
  1562  					if len(parts) != 2 {
  1563  						return fmt.Sprintf("malformed test:\n%s", d.Input)
  1564  					}
  1565  
  1566  					meta := parseMeta(parts[0])
  1567  					var err error
  1568  					meta.Size, err = strconv.ParseUint(parts[1], 10, 64)
  1569  					if err != nil {
  1570  						return err.Error()
  1571  					}
  1572  					grandparents = append(grandparents, meta)
  1573  				}
  1574  				return ""
  1575  
  1576  			case "compact":
  1577  				c := &compaction{
  1578  					cmp:          cmp,
  1579  					equal:        DefaultComparer.Equal,
  1580  					comparer:     DefaultComparer,
  1581  					grandparents: manifest.NewLevelSliceKeySorted(cmp, grandparents),
  1582  				}
  1583  				if len(d.CmdArgs) != 1 {
  1584  					return fmt.Sprintf("%s expects 1 argument", d.Cmd)
  1585  				}
  1586  				if len(d.CmdArgs[0].Vals) != 1 {
  1587  					return fmt.Sprintf("%s expects 1 value", d.CmdArgs[0].Key)
  1588  				}
  1589  				var err error
  1590  				c.maxOverlapBytes, err = strconv.ParseUint(d.CmdArgs[0].Vals[0], 10, 64)
  1591  				if err != nil {
  1592  					return err.Error()
  1593  				}
  1594  
  1595  				var buf bytes.Buffer
  1596  				var smallest, largest string
  1597  				var grandparentLimit []byte
  1598  				for i, key := range strings.Fields(d.Input) {
  1599  					if i == 0 {
  1600  						smallest = key
  1601  						grandparentLimit = c.findGrandparentLimit([]byte(key))
  1602  					}
  1603  					if grandparentLimit != nil && c.cmp(grandparentLimit, []byte(key)) < 0 {
  1604  						fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1605  						smallest = key
  1606  						grandparentLimit = c.findGrandparentLimit([]byte(key))
  1607  					}
  1608  					largest = key
  1609  				}
  1610  				fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1611  				return buf.String()
  1612  
  1613  			default:
  1614  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1615  			}
  1616  		})
  1617  }
  1618  
  1619  func TestCompactionFindL0Limit(t *testing.T) {
  1620  	cmp := DefaultComparer.Compare
  1621  
  1622  	fileNumCounter := 1
  1623  	parseMeta := func(s string) (*fileMetadata, error) {
  1624  		fields := strings.Fields(s)
  1625  		parts := strings.Split(fields[0], "-")
  1626  		if len(parts) != 2 {
  1627  			return nil, errors.Errorf("malformed table spec: %s", s)
  1628  		}
  1629  		m := (&fileMetadata{
  1630  			FileNum: base.FileNum(fileNumCounter),
  1631  		}).ExtendPointKeyBounds(
  1632  			cmp,
  1633  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  1634  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  1635  		)
  1636  		fileNumCounter++
  1637  		m.SmallestSeqNum = m.Smallest.SeqNum()
  1638  		m.LargestSeqNum = m.Largest.SeqNum()
  1639  
  1640  		for _, field := range fields[1:] {
  1641  			parts := strings.Split(field, "=")
  1642  			switch parts[0] {
  1643  			case "size":
  1644  				size, err := strconv.ParseUint(parts[1], 10, 64)
  1645  				if err != nil {
  1646  					t.Fatal(err)
  1647  				}
  1648  				m.Size = size
  1649  			}
  1650  		}
  1651  		return m, nil
  1652  	}
  1653  
  1654  	var vers *version
  1655  	flushSplitBytes := int64(0)
  1656  
  1657  	datadriven.RunTest(t, "testdata/compaction_find_l0_limit",
  1658  		func(d *datadriven.TestData) string {
  1659  			switch d.Cmd {
  1660  			case "define":
  1661  				fileMetas := [manifest.NumLevels][]*fileMetadata{}
  1662  				baseLevel := manifest.NumLevels - 1
  1663  				level := 0
  1664  				var err error
  1665  				for _, arg := range d.CmdArgs {
  1666  					switch arg.Key {
  1667  					case "flush_split_bytes":
  1668  						flushSplitBytes, err = strconv.ParseInt(arg.Vals[0], 10, 64)
  1669  						if err != nil {
  1670  							t.Fatal(err)
  1671  						}
  1672  					}
  1673  				}
  1674  				for _, data := range strings.Split(d.Input, "\n") {
  1675  					data = strings.TrimSpace(data)
  1676  					switch data {
  1677  					case "L0", "L1", "L2", "L3", "L4", "L5", "L6":
  1678  						level, err = strconv.Atoi(data[1:])
  1679  						if err != nil {
  1680  							return err.Error()
  1681  						}
  1682  					default:
  1683  						meta, err := parseMeta(data)
  1684  						if err != nil {
  1685  							return err.Error()
  1686  						}
  1687  						if level != 0 && level < baseLevel {
  1688  							baseLevel = level
  1689  						}
  1690  						fileMetas[level] = append(fileMetas[level], meta)
  1691  					}
  1692  				}
  1693  
  1694  				vers = manifest.NewVersion(DefaultComparer.Compare, base.DefaultFormatter, flushSplitBytes, fileMetas)
  1695  				flushSplitKeys := vers.L0Sublevels.FlushSplitKeys()
  1696  
  1697  				var buf strings.Builder
  1698  				buf.WriteString(vers.String())
  1699  				buf.WriteString("flush split keys:\n")
  1700  				for _, key := range flushSplitKeys {
  1701  					fmt.Fprintf(&buf, "\t%s\n", base.DefaultFormatter(key))
  1702  				}
  1703  
  1704  				return buf.String()
  1705  
  1706  			case "flush":
  1707  				c := &compaction{
  1708  					cmp:      cmp,
  1709  					equal:    DefaultComparer.Equal,
  1710  					comparer: DefaultComparer,
  1711  					version:  vers,
  1712  					l0Limits: vers.L0Sublevels.FlushSplitKeys(),
  1713  					inputs:   []compactionLevel{{level: -1}, {level: 0}},
  1714  				}
  1715  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  1716  
  1717  				var buf bytes.Buffer
  1718  				var smallest, largest string
  1719  				var l0Limit []byte
  1720  				for i, key := range strings.Fields(d.Input) {
  1721  					if i == 0 {
  1722  						smallest = key
  1723  						l0Limit = c.findL0Limit([]byte(key))
  1724  					}
  1725  					if l0Limit != nil && c.cmp(l0Limit, []byte(key)) < 0 {
  1726  						fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1727  						smallest = key
  1728  						l0Limit = c.findL0Limit([]byte(key))
  1729  					}
  1730  					largest = key
  1731  				}
  1732  				fmt.Fprintf(&buf, "%s-%s\n", smallest, largest)
  1733  				return buf.String()
  1734  
  1735  			default:
  1736  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1737  			}
  1738  		})
  1739  }
  1740  
  1741  func TestCompactionOutputLevel(t *testing.T) {
  1742  	opts := (*Options)(nil).EnsureDefaults()
  1743  	version := &version{}
  1744  
  1745  	datadriven.RunTest(t, "testdata/compaction_output_level",
  1746  		func(d *datadriven.TestData) (res string) {
  1747  			defer func() {
  1748  				if r := recover(); r != nil {
  1749  					res = fmt.Sprintln(r)
  1750  				}
  1751  			}()
  1752  
  1753  			switch d.Cmd {
  1754  			case "compact":
  1755  				var start, base int
  1756  				d.ScanArgs(t, "start", &start)
  1757  				d.ScanArgs(t, "base", &base)
  1758  				pc := newPickedCompaction(opts, version, start, defaultOutputLevel(start, base), base)
  1759  				c := newCompaction(pc, opts)
  1760  				return fmt.Sprintf("output=%d\nmax-output-file-size=%d\n",
  1761  					c.outputLevel.level, c.maxOutputFileSize)
  1762  
  1763  			default:
  1764  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1765  			}
  1766  		})
  1767  }
  1768  
  1769  func TestCompactionAtomicUnitBounds(t *testing.T) {
  1770  	cmp := DefaultComparer.Compare
  1771  	var files manifest.LevelSlice
  1772  
  1773  	parseMeta := func(s string) *fileMetadata {
  1774  		parts := strings.Split(s, "-")
  1775  		if len(parts) != 2 {
  1776  			t.Fatalf("malformed table spec: %s", s)
  1777  		}
  1778  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  1779  			cmp,
  1780  			base.ParseInternalKey(parts[0]),
  1781  			base.ParseInternalKey(parts[1]),
  1782  		)
  1783  		return m
  1784  	}
  1785  
  1786  	datadriven.RunTest(t, "testdata/compaction_atomic_unit_bounds",
  1787  		func(d *datadriven.TestData) string {
  1788  			switch d.Cmd {
  1789  			case "define":
  1790  				files = manifest.LevelSlice{}
  1791  				if len(d.Input) == 0 {
  1792  					return ""
  1793  				}
  1794  				var ff []*fileMetadata
  1795  				for _, data := range strings.Split(d.Input, "\n") {
  1796  					meta := parseMeta(data)
  1797  					meta.FileNum = FileNum(len(ff))
  1798  					ff = append(ff, meta)
  1799  				}
  1800  				files = manifest.NewLevelSliceKeySorted(cmp, ff)
  1801  				return ""
  1802  
  1803  			case "atomic-unit-bounds":
  1804  				c := &compaction{
  1805  					cmp:      cmp,
  1806  					equal:    DefaultComparer.Equal,
  1807  					comparer: DefaultComparer,
  1808  					inputs:   []compactionLevel{{files: files}, {}},
  1809  				}
  1810  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  1811  				if len(d.CmdArgs) != 1 {
  1812  					return fmt.Sprintf("%s expects 1 argument", d.Cmd)
  1813  				}
  1814  				index, err := strconv.ParseInt(d.CmdArgs[0].String(), 10, 64)
  1815  				if err != nil {
  1816  					return err.Error()
  1817  				}
  1818  				iter := files.Iter()
  1819  				// Advance iter to `index`.
  1820  				_ = iter.First()
  1821  				for i := int64(0); i < index; i++ {
  1822  					_ = iter.Next()
  1823  				}
  1824  				atomicUnit, _ := expandToAtomicUnit(c.cmp, iter.Take().Slice(), true /* disableIsCompacting */)
  1825  				lower, upper := manifest.KeyRange(c.cmp, atomicUnit.Iter())
  1826  				return fmt.Sprintf("%s-%s\n", lower.UserKey, upper.UserKey)
  1827  
  1828  			default:
  1829  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1830  			}
  1831  		})
  1832  }
  1833  
  1834  func TestCompactionDeleteOnlyHints(t *testing.T) {
  1835  	parseUint64 := func(s string) uint64 {
  1836  		v, err := strconv.ParseUint(s, 10, 64)
  1837  		require.NoError(t, err)
  1838  		return v
  1839  	}
  1840  	var d *DB
  1841  	defer func() {
  1842  		if d != nil {
  1843  			require.NoError(t, d.Close())
  1844  		}
  1845  	}()
  1846  
  1847  	var compactInfo *CompactionInfo // protected by d.mu
  1848  	reset := func() (*Options, error) {
  1849  		if d != nil {
  1850  			compactInfo = nil
  1851  			if err := d.Close(); err != nil {
  1852  				return nil, err
  1853  			}
  1854  		}
  1855  		opts := &Options{
  1856  			FS:         vfs.NewMem(),
  1857  			DebugCheck: DebugCheckLevels,
  1858  			EventListener: EventListener{
  1859  				CompactionEnd: func(info CompactionInfo) {
  1860  					if compactInfo != nil {
  1861  						return
  1862  					}
  1863  					compactInfo = &info
  1864  				},
  1865  			},
  1866  			FormatMajorVersion: FormatNewest,
  1867  		}
  1868  
  1869  		// Collection of table stats can trigger compactions. As we want full
  1870  		// control over when compactions are run, disable stats by default.
  1871  		opts.private.disableTableStats = true
  1872  
  1873  		return opts, nil
  1874  	}
  1875  
  1876  	compactionString := func() string {
  1877  		for d.mu.compact.compactingCount > 0 {
  1878  			d.mu.compact.cond.Wait()
  1879  		}
  1880  
  1881  		s := "(none)"
  1882  		if compactInfo != nil {
  1883  			// Fix the job ID and durations for determinism.
  1884  			compactInfo.JobID = 100
  1885  			compactInfo.Duration = time.Second
  1886  			compactInfo.TotalDuration = 2 * time.Second
  1887  			s = compactInfo.String()
  1888  			compactInfo = nil
  1889  		}
  1890  		return s
  1891  	}
  1892  
  1893  	var err error
  1894  	var opts *Options
  1895  	datadriven.RunTest(t, "testdata/compaction_delete_only_hints",
  1896  		func(td *datadriven.TestData) string {
  1897  			switch td.Cmd {
  1898  			case "define":
  1899  				opts, err = reset()
  1900  				if err != nil {
  1901  					return err.Error()
  1902  				}
  1903  				d, err = runDBDefineCmd(td, opts)
  1904  				if err != nil {
  1905  					return err.Error()
  1906  				}
  1907  				d.mu.Lock()
  1908  				s := d.mu.versions.currentVersion().String()
  1909  				d.mu.Unlock()
  1910  				return s
  1911  
  1912  			case "force-set-hints":
  1913  				d.mu.Lock()
  1914  				defer d.mu.Unlock()
  1915  				d.mu.compact.deletionHints = d.mu.compact.deletionHints[:0]
  1916  				var buf bytes.Buffer
  1917  				for _, data := range strings.Split(td.Input, "\n") {
  1918  					parts := strings.FieldsFunc(strings.TrimSpace(data),
  1919  						func(r rune) bool { return r == '-' || r == ' ' || r == '.' })
  1920  
  1921  					start, end := []byte(parts[2]), []byte(parts[3])
  1922  
  1923  					var tombstoneFile *fileMetadata
  1924  					tombstoneLevel := int(parseUint64(parts[0][1:]))
  1925  
  1926  					// Set file number to the value provided in the input.
  1927  					tombstoneFile = &fileMetadata{
  1928  						FileNum: base.FileNum(parseUint64(parts[1])),
  1929  					}
  1930  
  1931  					var hintType deleteCompactionHintType
  1932  					switch typ := parts[7]; typ {
  1933  					case "point_key_only":
  1934  						hintType = deleteCompactionHintTypePointKeyOnly
  1935  					case "range_key_only":
  1936  						hintType = deleteCompactionHintTypeRangeKeyOnly
  1937  					case "point_and_range_key":
  1938  						hintType = deleteCompactionHintTypePointAndRangeKey
  1939  					default:
  1940  						return fmt.Sprintf("unknown hint type: %s", typ)
  1941  					}
  1942  
  1943  					h := deleteCompactionHint{
  1944  						hintType:                hintType,
  1945  						start:                   start,
  1946  						end:                     end,
  1947  						fileSmallestSeqNum:      parseUint64(parts[4]),
  1948  						tombstoneLevel:          tombstoneLevel,
  1949  						tombstoneFile:           tombstoneFile,
  1950  						tombstoneSmallestSeqNum: parseUint64(parts[5]),
  1951  						tombstoneLargestSeqNum:  parseUint64(parts[6]),
  1952  					}
  1953  					d.mu.compact.deletionHints = append(d.mu.compact.deletionHints, h)
  1954  					fmt.Fprintln(&buf, h.String())
  1955  				}
  1956  				return buf.String()
  1957  
  1958  			case "get-hints":
  1959  				d.mu.Lock()
  1960  				defer d.mu.Unlock()
  1961  
  1962  				// Force collection of table stats. This requires re-enabling the
  1963  				// collection flag. We also do not want compactions to run as part of
  1964  				// the stats collection job, so we disable it temporarily.
  1965  				d.opts.private.disableTableStats = false
  1966  				d.opts.DisableAutomaticCompactions = true
  1967  				defer func() {
  1968  					d.opts.private.disableTableStats = true
  1969  					d.opts.DisableAutomaticCompactions = false
  1970  				}()
  1971  
  1972  				// NB: collectTableStats attempts to acquire the lock. Temporarily
  1973  				// unlock here to avoid a deadlock.
  1974  				d.mu.Unlock()
  1975  				didRun := d.collectTableStats()
  1976  				d.mu.Lock()
  1977  
  1978  				if !didRun {
  1979  					// If a job was already running, wait for the results.
  1980  					d.waitTableStats()
  1981  				}
  1982  
  1983  				hints := d.mu.compact.deletionHints
  1984  				if len(hints) == 0 {
  1985  					return "(none)"
  1986  				}
  1987  				var buf bytes.Buffer
  1988  				for _, h := range hints {
  1989  					buf.WriteString(h.String() + "\n")
  1990  				}
  1991  				return buf.String()
  1992  
  1993  			case "maybe-compact":
  1994  				d.mu.Lock()
  1995  				d.maybeScheduleCompaction()
  1996  
  1997  				var buf bytes.Buffer
  1998  				fmt.Fprintf(&buf, "Deletion hints:\n")
  1999  				for _, h := range d.mu.compact.deletionHints {
  2000  					fmt.Fprintf(&buf, "  %s\n", h.String())
  2001  				}
  2002  				if len(d.mu.compact.deletionHints) == 0 {
  2003  					fmt.Fprintf(&buf, "  (none)\n")
  2004  				}
  2005  				fmt.Fprintf(&buf, "Compactions:\n")
  2006  				fmt.Fprintf(&buf, "  %s", compactionString())
  2007  				d.mu.Unlock()
  2008  				return buf.String()
  2009  
  2010  			case "compact":
  2011  				if err := runCompactCmd(td, d); err != nil {
  2012  					return err.Error()
  2013  				}
  2014  				d.mu.Lock()
  2015  				compactInfo = nil
  2016  				s := d.mu.versions.currentVersion().String()
  2017  				d.mu.Unlock()
  2018  				return s
  2019  
  2020  			case "close-snapshot":
  2021  				seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64)
  2022  				if err != nil {
  2023  					return err.Error()
  2024  				}
  2025  				d.mu.Lock()
  2026  				var s *Snapshot
  2027  				l := &d.mu.snapshots
  2028  				for i := l.root.next; i != &l.root; i = i.next {
  2029  					if i.seqNum == seqNum {
  2030  						s = i
  2031  					}
  2032  				}
  2033  				d.mu.Unlock()
  2034  				if s == nil {
  2035  					return "(not found)"
  2036  				} else if err := s.Close(); err != nil {
  2037  					return err.Error()
  2038  				}
  2039  
  2040  				d.mu.Lock()
  2041  				// Closing the snapshot may have triggered a compaction.
  2042  				str := compactionString()
  2043  				d.mu.Unlock()
  2044  				return str
  2045  
  2046  			case "iter":
  2047  				snap := Snapshot{
  2048  					db:     d,
  2049  					seqNum: InternalKeySeqNumMax,
  2050  				}
  2051  				iter := snap.NewIter(nil)
  2052  				return runIterCmd(td, iter, true)
  2053  
  2054  			case "reset":
  2055  				opts, err = reset()
  2056  				if err != nil {
  2057  					return err.Error()
  2058  				}
  2059  				d, err = Open("", opts)
  2060  				if err != nil {
  2061  					return err.Error()
  2062  				}
  2063  				return ""
  2064  
  2065  			case "ingest":
  2066  				if err = runBuildCmd(td, d, d.opts.FS); err != nil {
  2067  					return err.Error()
  2068  				}
  2069  				if err = runIngestCmd(td, d, d.opts.FS); err != nil {
  2070  					return err.Error()
  2071  				}
  2072  				return "OK"
  2073  
  2074  			case "describe-lsm":
  2075  				d.mu.Lock()
  2076  				s := d.mu.versions.currentVersion().String()
  2077  				d.mu.Unlock()
  2078  				return s
  2079  
  2080  			default:
  2081  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2082  			}
  2083  		})
  2084  }
  2085  
  2086  func TestCompactionTombstones(t *testing.T) {
  2087  	var d *DB
  2088  	defer func() {
  2089  		if d != nil {
  2090  			require.NoError(t, d.Close())
  2091  		}
  2092  	}()
  2093  
  2094  	var compactInfo *CompactionInfo // protected by d.mu
  2095  
  2096  	compactionString := func() string {
  2097  		for d.mu.compact.compactingCount > 0 {
  2098  			d.mu.compact.cond.Wait()
  2099  		}
  2100  
  2101  		s := "(none)"
  2102  		if compactInfo != nil {
  2103  			// Fix the job ID and durations for determinism.
  2104  			compactInfo.JobID = 100
  2105  			compactInfo.Duration = time.Second
  2106  			compactInfo.TotalDuration = 2 * time.Second
  2107  			s = compactInfo.String()
  2108  			compactInfo = nil
  2109  		}
  2110  		return s
  2111  	}
  2112  
  2113  	datadriven.RunTest(t, "testdata/compaction_tombstones",
  2114  		func(td *datadriven.TestData) string {
  2115  			switch td.Cmd {
  2116  			case "define":
  2117  				if d != nil {
  2118  					compactInfo = nil
  2119  					if err := d.Close(); err != nil {
  2120  						return err.Error()
  2121  					}
  2122  				}
  2123  				opts := &Options{
  2124  					FS:         vfs.NewMem(),
  2125  					DebugCheck: DebugCheckLevels,
  2126  					EventListener: EventListener{
  2127  						CompactionEnd: func(info CompactionInfo) {
  2128  							compactInfo = &info
  2129  						},
  2130  					},
  2131  					FormatMajorVersion: FormatNewest,
  2132  				}
  2133  				var err error
  2134  				d, err = runDBDefineCmd(td, opts)
  2135  				if err != nil {
  2136  					return err.Error()
  2137  				}
  2138  				d.mu.Lock()
  2139  				s := d.mu.versions.currentVersion().String()
  2140  				d.mu.Unlock()
  2141  				return s
  2142  
  2143  			case "maybe-compact":
  2144  				d.mu.Lock()
  2145  				d.opts.DisableAutomaticCompactions = false
  2146  				d.maybeScheduleCompaction()
  2147  				s := compactionString()
  2148  				d.mu.Unlock()
  2149  				return s
  2150  
  2151  			case "wait-pending-table-stats":
  2152  				return runTableStatsCmd(td, d)
  2153  
  2154  			case "close-snapshot":
  2155  				seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64)
  2156  				if err != nil {
  2157  					return err.Error()
  2158  				}
  2159  				d.mu.Lock()
  2160  				var s *Snapshot
  2161  				l := &d.mu.snapshots
  2162  				for i := l.root.next; i != &l.root; i = i.next {
  2163  					if i.seqNum == seqNum {
  2164  						s = i
  2165  					}
  2166  				}
  2167  				d.mu.Unlock()
  2168  				if s == nil {
  2169  					return "(not found)"
  2170  				} else if err := s.Close(); err != nil {
  2171  					return err.Error()
  2172  				}
  2173  
  2174  				d.mu.Lock()
  2175  				// Closing the snapshot may have triggered a compaction.
  2176  				str := compactionString()
  2177  				d.mu.Unlock()
  2178  				return str
  2179  
  2180  			case "version":
  2181  				d.mu.Lock()
  2182  				s := d.mu.versions.currentVersion().String()
  2183  				d.mu.Unlock()
  2184  				return s
  2185  
  2186  			default:
  2187  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2188  			}
  2189  		})
  2190  }
  2191  
  2192  func TestCompactionReadTriggeredQueue(t *testing.T) {
  2193  
  2194  	// Convert a read compaction to a string which this test
  2195  	// understands.
  2196  	showRC := func(rc *readCompaction) string {
  2197  		return fmt.Sprintf(
  2198  			"L%d: %s-%s %d\n", rc.level, string(rc.start), string(rc.end), rc.fileNum,
  2199  		)
  2200  	}
  2201  
  2202  	var queue *readCompactionQueue
  2203  
  2204  	datadriven.RunTest(t, "testdata/read_compaction_queue",
  2205  		func(td *datadriven.TestData) string {
  2206  			switch td.Cmd {
  2207  			case "create":
  2208  				queue = &readCompactionQueue{}
  2209  				return "(success)"
  2210  			case "add-compaction":
  2211  				for _, line := range strings.Split(td.Input, "\n") {
  2212  					if line == "" {
  2213  						continue
  2214  					}
  2215  					parts := strings.Split(line, " ")
  2216  
  2217  					if len(parts) != 3 {
  2218  						return "error: malformed data for add-compaction. usage: <level>: <start>-<end> <filenum>"
  2219  					}
  2220  					if l, err := strconv.Atoi(parts[0][1:2]); err == nil {
  2221  						keys := strings.Split(parts[1], "-")
  2222  						fileNum, _ := strconv.Atoi(parts[2])
  2223  						rc := readCompaction{
  2224  							level:   l,
  2225  							start:   []byte(keys[0]),
  2226  							end:     []byte(keys[1]),
  2227  							fileNum: base.FileNum(fileNum),
  2228  						}
  2229  						queue.add(&rc, DefaultComparer.Compare)
  2230  					} else {
  2231  						return err.Error()
  2232  					}
  2233  				}
  2234  				return ""
  2235  			case "remove-compaction":
  2236  				rc := queue.remove()
  2237  				if rc == nil {
  2238  					return "(nil)"
  2239  				}
  2240  				return showRC(rc)
  2241  			case "print-size":
  2242  				// Print the size of the queue.
  2243  				return fmt.Sprintf("%d", queue.size)
  2244  			case "print-queue":
  2245  				// Print each element of the queue on a separate line.
  2246  				var sb strings.Builder
  2247  				if queue.size == 0 {
  2248  					sb.WriteString("(empty)")
  2249  				}
  2250  
  2251  				for i := 0; i < queue.size; i++ {
  2252  					rc := queue.at(i)
  2253  					sb.WriteString(showRC(rc))
  2254  				}
  2255  				return sb.String()
  2256  			default:
  2257  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2258  			}
  2259  		},
  2260  	)
  2261  }
  2262  
  2263  func (qu *readCompactionQueue) at(i int) *readCompaction {
  2264  	if i >= qu.size {
  2265  		return nil
  2266  	}
  2267  
  2268  	return qu.queue[i]
  2269  }
  2270  
  2271  func TestCompactionReadTriggered(t *testing.T) {
  2272  	var d *DB
  2273  	defer func() {
  2274  		if d != nil {
  2275  			require.NoError(t, d.Close())
  2276  		}
  2277  	}()
  2278  
  2279  	var compactInfo *CompactionInfo // protected by d.mu
  2280  
  2281  	compactionString := func() string {
  2282  		for d.mu.compact.compactingCount > 0 {
  2283  			d.mu.compact.cond.Wait()
  2284  		}
  2285  
  2286  		s := "(none)"
  2287  		if compactInfo != nil {
  2288  			// Fix the job ID and durations for determinism.
  2289  			compactInfo.JobID = 100
  2290  			compactInfo.Duration = time.Second
  2291  			compactInfo.TotalDuration = 2 * time.Second
  2292  			s = compactInfo.String()
  2293  			compactInfo = nil
  2294  		}
  2295  		return s
  2296  	}
  2297  
  2298  	datadriven.RunTest(t, "testdata/compaction_read_triggered",
  2299  		func(td *datadriven.TestData) string {
  2300  			switch td.Cmd {
  2301  			case "define":
  2302  				if d != nil {
  2303  					compactInfo = nil
  2304  					if err := d.Close(); err != nil {
  2305  						return err.Error()
  2306  					}
  2307  				}
  2308  				opts := &Options{
  2309  					FS:         vfs.NewMem(),
  2310  					DebugCheck: DebugCheckLevels,
  2311  					EventListener: EventListener{
  2312  						CompactionEnd: func(info CompactionInfo) {
  2313  							compactInfo = &info
  2314  						},
  2315  					},
  2316  				}
  2317  				var err error
  2318  				d, err = runDBDefineCmd(td, opts)
  2319  				if err != nil {
  2320  					return err.Error()
  2321  				}
  2322  				d.mu.Lock()
  2323  				s := d.mu.versions.currentVersion().String()
  2324  				d.mu.Unlock()
  2325  				return s
  2326  
  2327  			case "add-read-compaction":
  2328  				d.mu.Lock()
  2329  				for _, arg := range td.CmdArgs {
  2330  					switch arg.Key {
  2331  					case "flushing":
  2332  						switch arg.Vals[0] {
  2333  						case "true":
  2334  							d.mu.compact.flushing = true
  2335  						default:
  2336  							d.mu.compact.flushing = false
  2337  						}
  2338  					}
  2339  				}
  2340  				for _, line := range strings.Split(td.Input, "\n") {
  2341  					if line == "" {
  2342  						continue
  2343  					}
  2344  					parts := strings.Split(line, " ")
  2345  					if len(parts) != 3 {
  2346  						return "error: malformed data for add-read-compaction. usage: <level>: <start>-<end> <filenum>"
  2347  					}
  2348  					if l, err := strconv.Atoi(parts[0][:1]); err == nil {
  2349  						keys := strings.Split(parts[1], "-")
  2350  						fileNum, _ := strconv.Atoi(parts[2])
  2351  						rc := readCompaction{
  2352  							level:   l,
  2353  							start:   []byte(keys[0]),
  2354  							end:     []byte(keys[1]),
  2355  							fileNum: base.FileNum(fileNum),
  2356  						}
  2357  						d.mu.compact.readCompactions.add(&rc, DefaultComparer.Compare)
  2358  					} else {
  2359  						return err.Error()
  2360  					}
  2361  				}
  2362  				d.mu.Unlock()
  2363  				return ""
  2364  
  2365  			case "show-read-compactions":
  2366  				d.mu.Lock()
  2367  				var sb strings.Builder
  2368  				if d.mu.compact.readCompactions.size == 0 {
  2369  					sb.WriteString("(none)")
  2370  				}
  2371  				for i := 0; i < d.mu.compact.readCompactions.size; i++ {
  2372  					rc := d.mu.compact.readCompactions.at(i)
  2373  					sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end)))
  2374  				}
  2375  				d.mu.Unlock()
  2376  				return sb.String()
  2377  
  2378  			case "maybe-compact":
  2379  				d.mu.Lock()
  2380  				d.opts.DisableAutomaticCompactions = false
  2381  				d.maybeScheduleCompaction()
  2382  				s := compactionString()
  2383  				d.mu.Unlock()
  2384  				return s
  2385  
  2386  			case "version":
  2387  				d.mu.Lock()
  2388  				s := d.mu.versions.currentVersion().String()
  2389  				d.mu.Unlock()
  2390  				return s
  2391  
  2392  			default:
  2393  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2394  			}
  2395  		})
  2396  }
  2397  
  2398  func TestCompactionInuseKeyRanges(t *testing.T) {
  2399  	cmp := DefaultComparer.Compare
  2400  	parseMeta := func(s string) *fileMetadata {
  2401  		parts := strings.Split(s, "-")
  2402  		if len(parts) != 2 {
  2403  			t.Fatalf("malformed table spec: %s", s)
  2404  		}
  2405  		m := (&fileMetadata{}).ExtendRangeKeyBounds(
  2406  			cmp,
  2407  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2408  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2409  		)
  2410  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2411  		m.LargestSeqNum = m.Largest.SeqNum()
  2412  		return m
  2413  	}
  2414  
  2415  	opts := (*Options)(nil).EnsureDefaults()
  2416  
  2417  	var c *compaction
  2418  	datadriven.RunTest(t, "testdata/compaction_inuse_key_ranges", func(td *datadriven.TestData) string {
  2419  		switch td.Cmd {
  2420  		case "define":
  2421  			c = &compaction{
  2422  				cmp:       DefaultComparer.Compare,
  2423  				equal:     DefaultComparer.Equal,
  2424  				comparer:  DefaultComparer,
  2425  				formatKey: DefaultComparer.FormatKey,
  2426  				inputs:    []compactionLevel{{}, {}},
  2427  			}
  2428  			c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2429  			var files [numLevels][]*fileMetadata
  2430  			var currentLevel int
  2431  			fileNum := FileNum(1)
  2432  
  2433  			for _, data := range strings.Split(td.Input, "\n") {
  2434  				switch data {
  2435  				case "L0", "L1", "L2", "L3", "L4", "L5", "L6":
  2436  					level, err := strconv.Atoi(data[1:])
  2437  					if err != nil {
  2438  						return err.Error()
  2439  					}
  2440  					currentLevel = level
  2441  
  2442  				default:
  2443  					meta := parseMeta(data)
  2444  					meta.FileNum = fileNum
  2445  					fileNum++
  2446  					files[currentLevel] = append(files[currentLevel], meta)
  2447  				}
  2448  			}
  2449  			c.version = newVersion(opts, files)
  2450  			return c.version.String()
  2451  
  2452  		case "inuse-key-ranges":
  2453  			var buf bytes.Buffer
  2454  			for _, line := range strings.Split(td.Input, "\n") {
  2455  				parts := strings.Fields(line)
  2456  				if len(parts) != 3 {
  2457  					fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q\n", line)
  2458  					continue
  2459  				}
  2460  				level, err := strconv.Atoi(parts[0])
  2461  				if err != nil {
  2462  					fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q: %v\n", line, err)
  2463  					continue
  2464  				}
  2465  				c.outputLevel.level = level
  2466  				c.smallest.UserKey = []byte(parts[1])
  2467  				c.largest.UserKey = []byte(parts[2])
  2468  
  2469  				c.inuseKeyRanges = nil
  2470  				c.setupInuseKeyRanges()
  2471  				if len(c.inuseKeyRanges) == 0 {
  2472  					fmt.Fprintf(&buf, ".\n")
  2473  				} else {
  2474  					for i, r := range c.inuseKeyRanges {
  2475  						if i > 0 {
  2476  							fmt.Fprintf(&buf, " ")
  2477  						}
  2478  						fmt.Fprintf(&buf, "%s-%s", r.Start, r.End)
  2479  					}
  2480  					fmt.Fprintf(&buf, "\n")
  2481  				}
  2482  			}
  2483  			return buf.String()
  2484  
  2485  		default:
  2486  			return fmt.Sprintf("unknown command: %s", td.Cmd)
  2487  		}
  2488  	})
  2489  }
  2490  
  2491  func TestCompactionInuseKeyRangesRandomized(t *testing.T) {
  2492  	var (
  2493  		fileNum     = FileNum(0)
  2494  		opts        = (*Options)(nil).EnsureDefaults()
  2495  		seed        = int64(time.Now().UnixNano())
  2496  		rng         = rand.New(rand.NewSource(seed))
  2497  		endKeyspace = 26 * 26
  2498  	)
  2499  	t.Logf("Using rng seed %d.", seed)
  2500  
  2501  	for iter := 0; iter < 100; iter++ {
  2502  		makeUserKey := func(i int) []byte {
  2503  			if i >= endKeyspace {
  2504  				i = endKeyspace - 1
  2505  			}
  2506  			return []byte{byte(i/26 + 'a'), byte(i%26 + 'a')}
  2507  		}
  2508  		makeIK := func(level, i int) InternalKey {
  2509  			return base.MakeInternalKey(
  2510  				makeUserKey(i),
  2511  				uint64(numLevels-level),
  2512  				base.InternalKeyKindSet,
  2513  			)
  2514  		}
  2515  		makeFile := func(level, start, end int) *fileMetadata {
  2516  			fileNum++
  2517  			m := (&fileMetadata{
  2518  				FileNum: fileNum,
  2519  			}).ExtendPointKeyBounds(
  2520  				opts.Comparer.Compare,
  2521  				makeIK(level, start),
  2522  				makeIK(level, end),
  2523  			)
  2524  			m.SmallestSeqNum = m.Smallest.SeqNum()
  2525  			m.LargestSeqNum = m.Largest.SeqNum()
  2526  			return m
  2527  		}
  2528  		overlaps := func(startA, endA, startB, endB []byte) bool {
  2529  			disjoint := opts.Comparer.Compare(endB, startA) < 0 || opts.Comparer.Compare(endA, startB) < 0
  2530  			return !disjoint
  2531  		}
  2532  		var files [numLevels][]*fileMetadata
  2533  		for l := 0; l < numLevels; l++ {
  2534  			for i := 0; i < rand.Intn(10); i++ {
  2535  				s := rng.Intn(endKeyspace)
  2536  				maxWidth := rng.Intn(endKeyspace-s) + 1
  2537  				e := rng.Intn(maxWidth) + s
  2538  				sKey, eKey := makeUserKey(s), makeUserKey(e)
  2539  				// Discard the key range if it overlaps any existing files
  2540  				// within this level.
  2541  				var o bool
  2542  				for _, f := range files[l] {
  2543  					o = o || overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey)
  2544  				}
  2545  				if o {
  2546  					continue
  2547  				}
  2548  				files[l] = append(files[l], makeFile(l, s, e))
  2549  			}
  2550  			sort.Slice(files[l], func(i, j int) bool {
  2551  				return opts.Comparer.Compare(files[l][i].Smallest.UserKey, files[l][j].Smallest.UserKey) < 0
  2552  			})
  2553  		}
  2554  		v := newVersion(opts, files)
  2555  		t.Log(v.DebugString(opts.Comparer.FormatKey))
  2556  		for i := 0; i < 1000; i++ {
  2557  			l := rng.Intn(numLevels)
  2558  			s := rng.Intn(endKeyspace)
  2559  			maxWidth := rng.Intn(endKeyspace-s) + 1
  2560  			e := rng.Intn(maxWidth) + s
  2561  			sKey, eKey := makeUserKey(s), makeUserKey(e)
  2562  			keyRanges := calculateInuseKeyRanges(v, opts.Comparer.Compare, l, numLevels-1, sKey, eKey)
  2563  
  2564  			for level := l; level < numLevels; level++ {
  2565  				for _, f := range files[level] {
  2566  					if !overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) {
  2567  						// This file doesn't overlap the queried range. Skip it.
  2568  						continue
  2569  					}
  2570  					// This file does overlap the queried range. The key range
  2571  					// [MAX(f.Smallest, sKey), MIN(f.Largest, eKey)] must be fully
  2572  					// contained by a key range in keyRanges.
  2573  					checkStart, checkEnd := f.Smallest.UserKey, f.Largest.UserKey
  2574  					if opts.Comparer.Compare(checkStart, sKey) < 0 {
  2575  						checkStart = sKey
  2576  					}
  2577  					if opts.Comparer.Compare(checkEnd, eKey) > 0 {
  2578  						checkEnd = eKey
  2579  					}
  2580  					var contained bool
  2581  					for _, kr := range keyRanges {
  2582  						contained = contained ||
  2583  							(opts.Comparer.Compare(checkStart, kr.Start) >= 0 &&
  2584  								opts.Comparer.Compare(checkEnd, kr.End) <= 0)
  2585  					}
  2586  					if !contained {
  2587  						t.Errorf("Seed %d, iter %d: File %s overlaps %q-%q, but is not fully contained in any of the key ranges.",
  2588  							seed, iter, f, sKey, eKey)
  2589  					}
  2590  				}
  2591  			}
  2592  		}
  2593  	}
  2594  }
  2595  
  2596  func TestCompactionAllowZeroSeqNum(t *testing.T) {
  2597  	var d *DB
  2598  	defer func() {
  2599  		if d != nil {
  2600  			require.NoError(t, d.Close())
  2601  		}
  2602  	}()
  2603  
  2604  	metaRE := regexp.MustCompile(`^L([0-9]+):([^-]+)-(.+)$`)
  2605  	var fileNum base.FileNum
  2606  	parseMeta := func(s string) (level int, meta *fileMetadata) {
  2607  		match := metaRE.FindStringSubmatch(s)
  2608  		if match == nil {
  2609  			t.Fatalf("malformed table spec: %s", s)
  2610  		}
  2611  		level, err := strconv.Atoi(match[1])
  2612  		if err != nil {
  2613  			t.Fatalf("malformed table spec: %s: %s", s, err)
  2614  		}
  2615  		fileNum++
  2616  		meta = (&fileMetadata{
  2617  			FileNum: fileNum,
  2618  		}).ExtendPointKeyBounds(
  2619  			d.cmp,
  2620  			InternalKey{UserKey: []byte(match[2])},
  2621  			InternalKey{UserKey: []byte(match[3])},
  2622  		)
  2623  		return level, meta
  2624  	}
  2625  
  2626  	datadriven.RunTest(t, "testdata/compaction_allow_zero_seqnum",
  2627  		func(td *datadriven.TestData) string {
  2628  			switch td.Cmd {
  2629  			case "define":
  2630  				if d != nil {
  2631  					if err := d.Close(); err != nil {
  2632  						return err.Error()
  2633  					}
  2634  				}
  2635  
  2636  				var err error
  2637  				if d, err = runDBDefineCmd(td, nil /* options */); err != nil {
  2638  					return err.Error()
  2639  				}
  2640  
  2641  				d.mu.Lock()
  2642  				s := d.mu.versions.currentVersion().String()
  2643  				d.mu.Unlock()
  2644  				return s
  2645  
  2646  			case "allow-zero-seqnum":
  2647  				d.mu.Lock()
  2648  				c := &compaction{
  2649  					cmp:      d.cmp,
  2650  					comparer: d.opts.Comparer,
  2651  					version:  d.mu.versions.currentVersion(),
  2652  					inputs:   []compactionLevel{{}, {}},
  2653  				}
  2654  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2655  				d.mu.Unlock()
  2656  
  2657  				var buf bytes.Buffer
  2658  				for _, line := range strings.Split(td.Input, "\n") {
  2659  					parts := strings.Fields(line)
  2660  					if len(parts) == 0 {
  2661  						continue
  2662  					}
  2663  					c.flushing = nil
  2664  					c.startLevel.level = -1
  2665  
  2666  					var startFiles, outputFiles []*fileMetadata
  2667  
  2668  					switch {
  2669  					case len(parts) == 1 && parts[0] == "flush":
  2670  						c.outputLevel.level = 0
  2671  						d.mu.Lock()
  2672  						c.flushing = d.mu.mem.queue
  2673  						d.mu.Unlock()
  2674  
  2675  					default:
  2676  						for _, p := range parts {
  2677  							level, meta := parseMeta(p)
  2678  							if c.startLevel.level == -1 {
  2679  								c.startLevel.level = level
  2680  							}
  2681  
  2682  							switch level {
  2683  							case c.startLevel.level:
  2684  								startFiles = append(startFiles, meta)
  2685  							case c.startLevel.level + 1:
  2686  								outputFiles = append(outputFiles, meta)
  2687  							default:
  2688  								return fmt.Sprintf("invalid level %d: expected %d or %d",
  2689  									level, c.startLevel.level, c.startLevel.level+1)
  2690  							}
  2691  						}
  2692  						c.outputLevel.level = c.startLevel.level + 1
  2693  						c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles)
  2694  						c.outputLevel.files = manifest.NewLevelSliceKeySorted(c.cmp, outputFiles)
  2695  					}
  2696  
  2697  					c.smallest, c.largest = manifest.KeyRange(c.cmp,
  2698  						c.startLevel.files.Iter(),
  2699  						c.outputLevel.files.Iter())
  2700  
  2701  					c.inuseKeyRanges = nil
  2702  					c.setupInuseKeyRanges()
  2703  					fmt.Fprintf(&buf, "%t\n", c.allowZeroSeqNum())
  2704  				}
  2705  				return buf.String()
  2706  
  2707  			default:
  2708  				return fmt.Sprintf("unknown command: %s", td.Cmd)
  2709  			}
  2710  		})
  2711  }
  2712  
  2713  func TestCompactionErrorOnUserKeyOverlap(t *testing.T) {
  2714  	cmp := DefaultComparer.Compare
  2715  	parseMeta := func(s string) *fileMetadata {
  2716  		parts := strings.Split(s, "-")
  2717  		if len(parts) != 2 {
  2718  			t.Fatalf("malformed table spec: %s", s)
  2719  		}
  2720  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  2721  			cmp,
  2722  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2723  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2724  		)
  2725  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2726  		m.LargestSeqNum = m.Largest.SeqNum()
  2727  		return m
  2728  	}
  2729  
  2730  	datadriven.RunTest(t, "testdata/compaction_error_on_user_key_overlap",
  2731  		func(d *datadriven.TestData) string {
  2732  			switch d.Cmd {
  2733  			case "error-on-user-key-overlap":
  2734  				c := &compaction{
  2735  					cmp:       DefaultComparer.Compare,
  2736  					comparer:  DefaultComparer,
  2737  					formatKey: DefaultComparer.FormatKey,
  2738  				}
  2739  				var files []manifest.NewFileEntry
  2740  				fileNum := FileNum(1)
  2741  
  2742  				for _, data := range strings.Split(d.Input, "\n") {
  2743  					meta := parseMeta(data)
  2744  					meta.FileNum = fileNum
  2745  					fileNum++
  2746  					files = append(files, manifest.NewFileEntry{Level: 1, Meta: meta})
  2747  				}
  2748  
  2749  				result := "OK"
  2750  				ve := &versionEdit{
  2751  					NewFiles: files,
  2752  				}
  2753  				if err := c.errorOnUserKeyOverlap(ve); err != nil {
  2754  					result = fmt.Sprint(err)
  2755  				}
  2756  				return result
  2757  
  2758  			default:
  2759  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  2760  			}
  2761  		})
  2762  }
  2763  
  2764  // TestCompactionErrorCleanup tests an error encountered during a compaction
  2765  // after some output tables have been created. It ensures that the pending
  2766  // output tables are removed from the filesystem.
  2767  func TestCompactionErrorCleanup(t *testing.T) {
  2768  	// protected by d.mu
  2769  	var (
  2770  		initialSetupDone bool
  2771  		tablesCreated    []FileNum
  2772  	)
  2773  
  2774  	mem := vfs.NewMem()
  2775  	ii := errorfs.OnIndex(math.MaxInt32) // start disabled
  2776  	opts := &Options{
  2777  		FS:     errorfs.Wrap(mem, ii),
  2778  		Levels: make([]LevelOptions, numLevels),
  2779  		EventListener: EventListener{
  2780  			TableCreated: func(info TableCreateInfo) {
  2781  				t.Log(info)
  2782  
  2783  				// If the initial setup is over, record tables created and
  2784  				// inject an error immediately after the second table is
  2785  				// created.
  2786  				if initialSetupDone {
  2787  					tablesCreated = append(tablesCreated, info.FileNum)
  2788  					if len(tablesCreated) >= 2 {
  2789  						ii.SetIndex(0)
  2790  					}
  2791  				}
  2792  			},
  2793  		},
  2794  	}
  2795  	for i := range opts.Levels {
  2796  		opts.Levels[i].TargetFileSize = 1
  2797  	}
  2798  	opts.testingRandomized()
  2799  	d, err := Open("", opts)
  2800  	require.NoError(t, err)
  2801  
  2802  	ingest := func(keys ...string) {
  2803  		t.Helper()
  2804  		f, err := mem.Create("ext")
  2805  		require.NoError(t, err)
  2806  
  2807  		w := sstable.NewWriter(f, sstable.WriterOptions{
  2808  			TableFormat: d.FormatMajorVersion().MaxTableFormat(),
  2809  		})
  2810  		for _, k := range keys {
  2811  			require.NoError(t, w.Set([]byte(k), nil))
  2812  		}
  2813  		require.NoError(t, w.Close())
  2814  		require.NoError(t, d.Ingest([]string{"ext"}))
  2815  	}
  2816  	ingest("a", "c")
  2817  	ingest("b")
  2818  
  2819  	// Trigger a manual compaction, which will encounter an injected error
  2820  	// after the second table is created.
  2821  	d.mu.Lock()
  2822  	initialSetupDone = true
  2823  	d.mu.Unlock()
  2824  	err = d.Compact([]byte("a"), []byte("d"), false)
  2825  	require.Error(t, err, "injected error")
  2826  
  2827  	d.mu.Lock()
  2828  	if len(tablesCreated) < 2 {
  2829  		t.Fatalf("expected 2 output tables created by compaction: found %d", len(tablesCreated))
  2830  	}
  2831  	d.mu.Unlock()
  2832  
  2833  	require.NoError(t, d.Close())
  2834  	for _, fileNum := range tablesCreated {
  2835  		filename := fmt.Sprintf("%s.sst", fileNum)
  2836  		if _, err = mem.Stat(filename); err == nil || !oserror.IsNotExist(err) {
  2837  			t.Errorf("expected %q to not exist: %s", filename, err)
  2838  		}
  2839  	}
  2840  }
  2841  
  2842  func TestCompactionCheckOrdering(t *testing.T) {
  2843  	cmp := DefaultComparer.Compare
  2844  	parseMeta := func(s string) *fileMetadata {
  2845  		parts := strings.Split(s, "-")
  2846  		if len(parts) != 2 {
  2847  			t.Fatalf("malformed table spec: %s", s)
  2848  		}
  2849  		m := (&fileMetadata{}).ExtendPointKeyBounds(
  2850  			cmp,
  2851  			base.ParseInternalKey(strings.TrimSpace(parts[0])),
  2852  			base.ParseInternalKey(strings.TrimSpace(parts[1])),
  2853  		)
  2854  		m.SmallestSeqNum = m.Smallest.SeqNum()
  2855  		m.LargestSeqNum = m.Largest.SeqNum()
  2856  		return m
  2857  	}
  2858  
  2859  	datadriven.RunTest(t, "testdata/compaction_check_ordering",
  2860  		func(d *datadriven.TestData) string {
  2861  			switch d.Cmd {
  2862  			case "check-ordering":
  2863  				c := &compaction{
  2864  					cmp:       DefaultComparer.Compare,
  2865  					comparer:  DefaultComparer,
  2866  					formatKey: DefaultComparer.FormatKey,
  2867  					logger:    panicLogger{},
  2868  					inputs:    []compactionLevel{{level: -1}, {level: -1}},
  2869  				}
  2870  				c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1]
  2871  				var startFiles, outputFiles []*fileMetadata
  2872  				var sublevels []manifest.LevelSlice
  2873  				var files *[]*fileMetadata
  2874  				var sublevel []*fileMetadata
  2875  				var sublevelNum int
  2876  				var parsingSublevel bool
  2877  				fileNum := FileNum(1)
  2878  
  2879  				switchSublevel := func() {
  2880  					if sublevel != nil {
  2881  						sublevels = append(
  2882  							sublevels, manifest.NewLevelSliceSpecificOrder(sublevel),
  2883  						)
  2884  						sublevel = nil
  2885  					}
  2886  					parsingSublevel = false
  2887  				}
  2888  
  2889  				for _, data := range strings.Split(d.Input, "\n") {
  2890  					if data[0] == 'L' && len(data) == 4 {
  2891  						// Format L0.{sublevel}.
  2892  						switchSublevel()
  2893  						level, err := strconv.Atoi(data[1:2])
  2894  						if err != nil {
  2895  							return err.Error()
  2896  						}
  2897  						sublevelNum, err = strconv.Atoi(data[3:])
  2898  						if err != nil {
  2899  							return err.Error()
  2900  						}
  2901  						if c.startLevel.level == -1 {
  2902  							c.startLevel.level = level
  2903  							files = &startFiles
  2904  						}
  2905  						parsingSublevel = true
  2906  					} else if data[0] == 'L' {
  2907  						switchSublevel()
  2908  						level, err := strconv.Atoi(data[1:])
  2909  						if err != nil {
  2910  							return err.Error()
  2911  						}
  2912  						if c.startLevel.level == -1 {
  2913  							c.startLevel.level = level
  2914  							files = &startFiles
  2915  						} else if c.outputLevel.level == -1 {
  2916  							if c.startLevel.level >= level {
  2917  								return fmt.Sprintf("startLevel=%d >= outputLevel=%d\n", c.startLevel.level, level)
  2918  							}
  2919  							c.outputLevel.level = level
  2920  							files = &outputFiles
  2921  						} else {
  2922  							return "outputLevel already set\n"
  2923  						}
  2924  					} else {
  2925  						meta := parseMeta(data)
  2926  						meta.FileNum = fileNum
  2927  						fileNum++
  2928  						*files = append(*files, meta)
  2929  						if parsingSublevel {
  2930  							meta.SubLevel = sublevelNum
  2931  							sublevel = append(sublevel, meta)
  2932  						}
  2933  					}
  2934  				}
  2935  
  2936  				switchSublevel()
  2937  				c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles)
  2938  				c.outputLevel.files = manifest.NewLevelSliceSpecificOrder(outputFiles)
  2939  				if c.outputLevel.level == -1 {
  2940  					c.outputLevel.level = 0
  2941  				}
  2942  				if c.startLevel.level == 0 {
  2943  					// We don't change the input files for the compaction beyond this point.
  2944  					c.l0SublevelInfo = generateSublevelInfo(c.cmp, c.startLevel.files)
  2945  				}
  2946  
  2947  				newIters := func(
  2948  					_ *manifest.FileMetadata, _ *IterOptions, _ internalIterOpts,
  2949  				) (internalIterator, keyspan.FragmentIterator, error) {
  2950  					return &errorIter{}, nil, nil
  2951  				}
  2952  				result := "OK"
  2953  				_, err := c.newInputIter(newIters, nil, nil)
  2954  				if err != nil {
  2955  					result = fmt.Sprint(err)
  2956  				}
  2957  				return result
  2958  
  2959  			default:
  2960  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  2961  			}
  2962  		})
  2963  }
  2964  
  2965  type mockSplitter struct {
  2966  	shouldSplitVal compactionSplitSuggestion
  2967  }
  2968  
  2969  func (m *mockSplitter) shouldSplitBefore(
  2970  	key *InternalKey, tw *sstable.Writer,
  2971  ) compactionSplitSuggestion {
  2972  	return m.shouldSplitVal
  2973  }
  2974  
  2975  func (m *mockSplitter) onNewOutput(key *InternalKey) []byte {
  2976  	return nil
  2977  }
  2978  
  2979  func TestCompactionOutputSplitters(t *testing.T) {
  2980  	var main, child0, child1 compactionOutputSplitter
  2981  	var prevUserKey []byte
  2982  	pickSplitter := func(input string) *compactionOutputSplitter {
  2983  		switch input {
  2984  		case "main":
  2985  			return &main
  2986  		case "child0":
  2987  			return &child0
  2988  		case "child1":
  2989  			return &child1
  2990  		default:
  2991  			t.Fatalf("invalid splitter slot: %s", input)
  2992  			return nil
  2993  		}
  2994  	}
  2995  
  2996  	datadriven.RunTest(t, "testdata/compaction_output_splitters",
  2997  		func(d *datadriven.TestData) string {
  2998  			switch d.Cmd {
  2999  			case "reset":
  3000  				main = nil
  3001  				child0 = nil
  3002  				child1 = nil
  3003  			case "init":
  3004  				if len(d.CmdArgs) < 2 {
  3005  					return "expected at least 2 args"
  3006  				}
  3007  				splitterToInit := pickSplitter(d.CmdArgs[0].Key)
  3008  				switch d.CmdArgs[1].Key {
  3009  				case "array":
  3010  					*splitterToInit = &splitterGroup{
  3011  						cmp:       base.DefaultComparer.Compare,
  3012  						splitters: []compactionOutputSplitter{child0, child1},
  3013  					}
  3014  				case "mock":
  3015  					*splitterToInit = &mockSplitter{}
  3016  				case "userkey":
  3017  					*splitterToInit = &userKeyChangeSplitter{
  3018  						cmp: base.DefaultComparer.Compare,
  3019  						unsafePrevUserKey: func() []byte {
  3020  							return prevUserKey
  3021  						},
  3022  						splitter: child0,
  3023  					}
  3024  				}
  3025  				(*splitterToInit).onNewOutput(nil)
  3026  			case "set-should-split":
  3027  				if len(d.CmdArgs) < 2 {
  3028  					return "expected at least 2 args"
  3029  				}
  3030  				splitterToSet := (*pickSplitter(d.CmdArgs[0].Key)).(*mockSplitter)
  3031  				var val compactionSplitSuggestion
  3032  				switch d.CmdArgs[1].Key {
  3033  				case "split-now":
  3034  					val = splitNow
  3035  				case "no-split":
  3036  					val = noSplit
  3037  				default:
  3038  					t.Fatalf("unexpected value for should-split: %s", d.CmdArgs[1].Key)
  3039  				}
  3040  				splitterToSet.shouldSplitVal = val
  3041  			case "should-split-before":
  3042  				if len(d.CmdArgs) < 1 {
  3043  					return "expected at least 1 arg"
  3044  				}
  3045  				key := base.ParseInternalKey(d.CmdArgs[0].Key)
  3046  				shouldSplit := main.shouldSplitBefore(&key, nil)
  3047  				if shouldSplit == splitNow {
  3048  					main.onNewOutput(&key)
  3049  					prevUserKey = nil
  3050  				} else {
  3051  					prevUserKey = key.UserKey
  3052  				}
  3053  				return shouldSplit.String()
  3054  			default:
  3055  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  3056  			}
  3057  			return "ok"
  3058  		})
  3059  }
  3060  
  3061  func TestFlushInvariant(t *testing.T) {
  3062  	for _, disableWAL := range []bool{false, true} {
  3063  		t.Run(fmt.Sprintf("disableWAL=%t", disableWAL), func(t *testing.T) {
  3064  			for i := 0; i < 2; i++ {
  3065  				t.Run("", func(t *testing.T) {
  3066  					errCh := make(chan error, 1)
  3067  					defer close(errCh)
  3068  					d, err := Open("", testingRandomized(&Options{
  3069  						DisableWAL: disableWAL,
  3070  						FS:         vfs.NewMem(),
  3071  						EventListener: EventListener{
  3072  							BackgroundError: func(err error) {
  3073  								select {
  3074  								case errCh <- err:
  3075  								default:
  3076  								}
  3077  							},
  3078  						},
  3079  						DebugCheck: DebugCheckLevels,
  3080  					}))
  3081  					require.NoError(t, err)
  3082  
  3083  					require.NoError(t, d.Set([]byte("hello"), nil, NoSync))
  3084  
  3085  					// Contort the DB into a state where it does something invalid.
  3086  					d.mu.Lock()
  3087  					switch i {
  3088  					case 0:
  3089  						// Force the next log number to be 0.
  3090  						d.mu.versions.nextFileNum = 0
  3091  					case 1:
  3092  						// Force the flushing memtable to have a log number equal to the new
  3093  						// log's number.
  3094  						d.mu.mem.queue[len(d.mu.mem.queue)-1].logNum = d.mu.versions.nextFileNum
  3095  					}
  3096  					d.mu.Unlock()
  3097  
  3098  					flushCh, err := d.AsyncFlush()
  3099  					require.NoError(t, err)
  3100  
  3101  					select {
  3102  					case err := <-errCh:
  3103  						if disableWAL {
  3104  							t.Fatalf("expected success, but found %v", err)
  3105  						} else if !errors.Is(err, errFlushInvariant) {
  3106  							t.Fatalf("expected %q, but found %v", errFlushInvariant, err)
  3107  						}
  3108  					case <-flushCh:
  3109  						if !disableWAL {
  3110  							t.Fatalf("expected error but found success")
  3111  						}
  3112  					}
  3113  
  3114  					require.NoError(t, d.Close())
  3115  				})
  3116  			}
  3117  		})
  3118  	}
  3119  }
  3120  
  3121  func TestCompactFlushQueuedMemTableAndFlushMetrics(t *testing.T) {
  3122  	if runtime.GOOS == "windows" {
  3123  		t.Skip("test is flaky on windows")
  3124  	}
  3125  
  3126  	// Verify that manual compaction forces a flush of a queued memtable.
  3127  
  3128  	mem := vfs.NewMem()
  3129  	d, err := Open("", testingRandomized(&Options{
  3130  		FS: mem,
  3131  	}))
  3132  	require.NoError(t, err)
  3133  
  3134  	// Add the key "a" to the memtable, then fill up the memtable with the key
  3135  	// prefix "b". The compaction will only overlap with the queued memtable,
  3136  	// not the mutable memtable.
  3137  	// NB: The initial memtable size is 256KB, which is filled up with random
  3138  	// values which typically don't compress well. The test also appends the
  3139  	// random value to the "b" key to limit overwriting of the same key, which
  3140  	// would get collapsed at flush time since there are no open snapshots.
  3141  	value := make([]byte, 50)
  3142  	rand.Read(value)
  3143  	require.NoError(t, d.Set([]byte("a"), value, nil))
  3144  	for {
  3145  		rand.Read(value)
  3146  		require.NoError(t, d.Set(append([]byte("b"), value...), value, nil))
  3147  		d.mu.Lock()
  3148  		done := len(d.mu.mem.queue) == 2
  3149  		d.mu.Unlock()
  3150  		if done {
  3151  			break
  3152  		}
  3153  	}
  3154  
  3155  	require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
  3156  	d.mu.Lock()
  3157  	require.Equal(t, 1, len(d.mu.mem.queue))
  3158  	d.mu.Unlock()
  3159  	// Flush metrics are updated after and non-atomically with the memtable
  3160  	// being removed from the queue.
  3161  	func() {
  3162  		begin := time.Now()
  3163  		for {
  3164  			metrics := d.InternalIntervalMetrics()
  3165  			require.NotNil(t, metrics)
  3166  			if int64(50<<10) < metrics.Flush.WriteThroughput.Bytes {
  3167  				// The writes (during which the flush is idle) and the flush work
  3168  				// should not be so fast as to be unrealistic. If these turn out to be
  3169  				// flaky we could instead inject a clock.
  3170  				tinyInterval := int64(50 * time.Microsecond)
  3171  				require.Less(t, tinyInterval, int64(metrics.Flush.WriteThroughput.WorkDuration))
  3172  				require.Less(t, tinyInterval, int64(metrics.Flush.WriteThroughput.IdleDuration))
  3173  				return
  3174  			}
  3175  			if time.Since(begin) > 2*time.Second {
  3176  				t.Fatal()
  3177  			}
  3178  			time.Sleep(time.Millisecond)
  3179  		}
  3180  	}()
  3181  	require.NoError(t, d.Close())
  3182  }
  3183  
  3184  func TestCompactFlushQueuedLargeBatch(t *testing.T) {
  3185  	// Verify that compaction forces a flush of a queued large batch.
  3186  
  3187  	mem := vfs.NewMem()
  3188  	d, err := Open("", testingRandomized(&Options{
  3189  		FS: mem,
  3190  	}))
  3191  	require.NoError(t, err)
  3192  
  3193  	// The default large batch threshold is slightly less than 1/2 of the
  3194  	// memtable size which makes triggering a problem with flushing queued large
  3195  	// batches irritating. Manually adjust the threshold to 1/8 of the memtable
  3196  	// size in order to more easily create a situation where a large batch is
  3197  	// queued but not automatically flushed.
  3198  	d.mu.Lock()
  3199  	d.largeBatchThreshold = d.opts.MemTableSize / 8
  3200  	require.Equal(t, 1, len(d.mu.mem.queue))
  3201  	d.mu.Unlock()
  3202  
  3203  	// Set a record with a large value. This will be transformed into a large
  3204  	// batch and placed in the flushable queue.
  3205  	require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), d.largeBatchThreshold), nil))
  3206  	d.mu.Lock()
  3207  	require.Greater(t, len(d.mu.mem.queue), 1)
  3208  	d.mu.Unlock()
  3209  
  3210  	require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
  3211  	d.mu.Lock()
  3212  	require.Equal(t, 1, len(d.mu.mem.queue))
  3213  	d.mu.Unlock()
  3214  
  3215  	require.NoError(t, d.Close())
  3216  }
  3217  
  3218  // Regression test for #747. Test a problematic series of "cleaner" operations
  3219  // that could previously lead to DB.disableFileDeletions blocking forever even
  3220  // though no cleaning was in progress.
  3221  func TestCleanerCond(t *testing.T) {
  3222  	d, err := Open("", testingRandomized(&Options{
  3223  		FS: vfs.NewMem(),
  3224  	}))
  3225  	require.NoError(t, err)
  3226  
  3227  	for i := 0; i < 10; i++ {
  3228  		d.mu.Lock()
  3229  		require.True(t, d.acquireCleaningTurn(true))
  3230  		d.mu.Unlock()
  3231  
  3232  		var wg sync.WaitGroup
  3233  		wg.Add(2)
  3234  
  3235  		go func() {
  3236  			defer wg.Done()
  3237  			d.mu.Lock()
  3238  			if d.acquireCleaningTurn(true) {
  3239  				d.releaseCleaningTurn()
  3240  			}
  3241  			d.mu.Unlock()
  3242  		}()
  3243  
  3244  		runtime.Gosched()
  3245  
  3246  		go func() {
  3247  			defer wg.Done()
  3248  			d.mu.Lock()
  3249  			d.disableFileDeletions()
  3250  			d.enableFileDeletions()
  3251  			d.mu.Unlock()
  3252  		}()
  3253  
  3254  		runtime.Gosched()
  3255  
  3256  		d.mu.Lock()
  3257  		d.releaseCleaningTurn()
  3258  		d.mu.Unlock()
  3259  
  3260  		wg.Wait()
  3261  	}
  3262  
  3263  	require.NoError(t, d.Close())
  3264  }
  3265  
  3266  func TestFlushError(t *testing.T) {
  3267  	// Error the first five times we try to write a sstable.
  3268  	errorOps := int32(3)
  3269  	fs := errorfs.Wrap(vfs.NewMem(), errorfs.InjectorFunc(func(op errorfs.Op, path string) error {
  3270  		if op == errorfs.OpCreate && filepath.Ext(path) == ".sst" && atomic.AddInt32(&errorOps, -1) >= 0 {
  3271  			return errorfs.ErrInjected
  3272  		}
  3273  		return nil
  3274  	}))
  3275  	d, err := Open("", testingRandomized(&Options{
  3276  		FS: fs,
  3277  		EventListener: EventListener{
  3278  			BackgroundError: func(err error) {
  3279  				t.Log(err)
  3280  			},
  3281  		},
  3282  	}))
  3283  	require.NoError(t, err)
  3284  	require.NoError(t, d.Set([]byte("a"), []byte("foo"), NoSync))
  3285  	require.NoError(t, d.Flush())
  3286  	require.NoError(t, d.Close())
  3287  }
  3288  
  3289  func TestAdjustGrandparentOverlapBytesForFlush(t *testing.T) {
  3290  	// 500MB in Lbase
  3291  	var lbaseFiles []*manifest.FileMetadata
  3292  	const lbaseSize = 5 << 20
  3293  	for i := 0; i < 100; i++ {
  3294  		lbaseFiles =
  3295  			append(lbaseFiles, &manifest.FileMetadata{Size: lbaseSize, FileNum: FileNum(i)})
  3296  	}
  3297  	const maxOutputFileSize = 2 << 20
  3298  	// 20MB max overlap, so flush split into 25 files.
  3299  	const maxOverlapBytes = 20 << 20
  3300  	ls := manifest.NewLevelSliceSpecificOrder(lbaseFiles)
  3301  	testCases := []struct {
  3302  		flushingBytes        uint64
  3303  		adjustedOverlapBytes uint64
  3304  	}{
  3305  		// Flushes large enough that 25 files is acceptable.
  3306  		{flushingBytes: 128 << 20, adjustedOverlapBytes: 20971520},
  3307  		{flushingBytes: 64 << 20, adjustedOverlapBytes: 20971520},
  3308  		// Small increase in adjustedOverlapBytes.
  3309  		{flushingBytes: 32 << 20, adjustedOverlapBytes: 32768000},
  3310  		// Large increase in adjusterOverlapBytes, to limit to 4 files.
  3311  		{flushingBytes: 1 << 20, adjustedOverlapBytes: 131072000},
  3312  	}
  3313  	for _, tc := range testCases {
  3314  		t.Run("", func(t *testing.T) {
  3315  			c := compaction{
  3316  				grandparents:      ls,
  3317  				maxOverlapBytes:   maxOverlapBytes,
  3318  				maxOutputFileSize: maxOutputFileSize,
  3319  			}
  3320  			adjustGrandparentOverlapBytesForFlush(&c, tc.flushingBytes)
  3321  			require.Equal(t, tc.adjustedOverlapBytes, c.maxOverlapBytes)
  3322  		})
  3323  	}
  3324  }
  3325  
  3326  func TestCompactionInvalidBounds(t *testing.T) {
  3327  	db, err := Open("", testingRandomized(&Options{
  3328  		FS: vfs.NewMem(),
  3329  	}))
  3330  	require.NoError(t, err)
  3331  	defer db.Close()
  3332  	require.NoError(t, db.Compact([]byte("a"), []byte("b"), false))
  3333  	require.Error(t, db.Compact([]byte("a"), []byte("a"), false))
  3334  	require.Error(t, db.Compact([]byte("b"), []byte("a"), false))
  3335  }
  3336  
  3337  func Test_calculateInuseKeyRanges(t *testing.T) {
  3338  	opts := (*Options)(nil).EnsureDefaults()
  3339  	cmp := base.DefaultComparer.Compare
  3340  	tests := []struct {
  3341  		name     string
  3342  		v        *version
  3343  		level    int
  3344  		depth    int
  3345  		smallest []byte
  3346  		largest  []byte
  3347  		want     []manifest.UserKeyRange
  3348  	}{
  3349  		{
  3350  			name: "No files in next level",
  3351  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3352  				1: {
  3353  					{
  3354  						FileNum:  1,
  3355  						Size:     1,
  3356  						Smallest: base.ParseInternalKey("a.SET.2"),
  3357  						Largest:  base.ParseInternalKey("c.SET.2"),
  3358  					},
  3359  					{
  3360  						FileNum:  2,
  3361  						Size:     1,
  3362  						Smallest: base.ParseInternalKey("d.SET.2"),
  3363  						Largest:  base.ParseInternalKey("e.SET.2"),
  3364  					},
  3365  				},
  3366  			}),
  3367  			level:    1,
  3368  			depth:    2,
  3369  			smallest: []byte("a"),
  3370  			largest:  []byte("e"),
  3371  			want: []manifest.UserKeyRange{
  3372  				{
  3373  					Start: []byte("a"),
  3374  					End:   []byte("c"),
  3375  				},
  3376  				{
  3377  					Start: []byte("d"),
  3378  					End:   []byte("e"),
  3379  				},
  3380  			},
  3381  		},
  3382  		{
  3383  			name: "No overlapping key ranges",
  3384  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3385  				1: {
  3386  					{
  3387  						FileNum:  1,
  3388  						Size:     1,
  3389  						Smallest: base.ParseInternalKey("a.SET.1"),
  3390  						Largest:  base.ParseInternalKey("c.SET.1"),
  3391  					},
  3392  					{
  3393  						FileNum:  2,
  3394  						Size:     1,
  3395  						Smallest: base.ParseInternalKey("l.SET.1"),
  3396  						Largest:  base.ParseInternalKey("p.SET.1"),
  3397  					},
  3398  				},
  3399  				2: {
  3400  					{
  3401  						FileNum:  3,
  3402  						Size:     1,
  3403  						Smallest: base.ParseInternalKey("d.SET.1"),
  3404  						Largest:  base.ParseInternalKey("i.SET.1"),
  3405  					},
  3406  					{
  3407  						FileNum:  4,
  3408  						Size:     1,
  3409  						Smallest: base.ParseInternalKey("s.SET.1"),
  3410  						Largest:  base.ParseInternalKey("w.SET.1"),
  3411  					},
  3412  				},
  3413  			}),
  3414  			level:    1,
  3415  			depth:    2,
  3416  			smallest: []byte("a"),
  3417  			largest:  []byte("z"),
  3418  			want: []manifest.UserKeyRange{
  3419  				{
  3420  					Start: []byte("a"),
  3421  					End:   []byte("c"),
  3422  				},
  3423  				{
  3424  					Start: []byte("d"),
  3425  					End:   []byte("i"),
  3426  				},
  3427  				{
  3428  					Start: []byte("l"),
  3429  					End:   []byte("p"),
  3430  				},
  3431  				{
  3432  					Start: []byte("s"),
  3433  					End:   []byte("w"),
  3434  				},
  3435  			},
  3436  		},
  3437  		{
  3438  			name: "First few non-overlapping, followed by overlapping",
  3439  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3440  				1: {
  3441  					{
  3442  						FileNum:  1,
  3443  						Size:     1,
  3444  						Smallest: base.ParseInternalKey("a.SET.1"),
  3445  						Largest:  base.ParseInternalKey("c.SET.1"),
  3446  					},
  3447  					{
  3448  						FileNum:  2,
  3449  						Size:     1,
  3450  						Smallest: base.ParseInternalKey("d.SET.1"),
  3451  						Largest:  base.ParseInternalKey("e.SET.1"),
  3452  					},
  3453  					{
  3454  						FileNum:  3,
  3455  						Size:     1,
  3456  						Smallest: base.ParseInternalKey("n.SET.1"),
  3457  						Largest:  base.ParseInternalKey("o.SET.1"),
  3458  					},
  3459  					{
  3460  						FileNum:  4,
  3461  						Size:     1,
  3462  						Smallest: base.ParseInternalKey("p.SET.1"),
  3463  						Largest:  base.ParseInternalKey("q.SET.1"),
  3464  					},
  3465  				},
  3466  				2: {
  3467  					{
  3468  						FileNum:  5,
  3469  						Size:     1,
  3470  						Smallest: base.ParseInternalKey("m.SET.1"),
  3471  						Largest:  base.ParseInternalKey("q.SET.1"),
  3472  					},
  3473  					{
  3474  						FileNum:  6,
  3475  						Size:     1,
  3476  						Smallest: base.ParseInternalKey("s.SET.1"),
  3477  						Largest:  base.ParseInternalKey("w.SET.1"),
  3478  					},
  3479  				},
  3480  			}),
  3481  			level:    1,
  3482  			depth:    2,
  3483  			smallest: []byte("a"),
  3484  			largest:  []byte("z"),
  3485  			want: []manifest.UserKeyRange{
  3486  				{
  3487  					Start: []byte("a"),
  3488  					End:   []byte("c"),
  3489  				},
  3490  				{
  3491  					Start: []byte("d"),
  3492  					End:   []byte("e"),
  3493  				},
  3494  				{
  3495  					Start: []byte("m"),
  3496  					End:   []byte("q"),
  3497  				},
  3498  				{
  3499  					Start: []byte("s"),
  3500  					End:   []byte("w"),
  3501  				},
  3502  			},
  3503  		},
  3504  		{
  3505  			name: "All overlapping",
  3506  			v: newVersion(opts, [numLevels][]*fileMetadata{
  3507  				1: {
  3508  					{
  3509  						FileNum:  1,
  3510  						Size:     1,
  3511  						Smallest: base.ParseInternalKey("d.SET.1"),
  3512  						Largest:  base.ParseInternalKey("e.SET.1"),
  3513  					},
  3514  					{
  3515  						FileNum:  2,
  3516  						Size:     1,
  3517  						Smallest: base.ParseInternalKey("n.SET.1"),
  3518  						Largest:  base.ParseInternalKey("o.SET.1"),
  3519  					},
  3520  					{
  3521  						FileNum:  3,
  3522  						Size:     1,
  3523  						Smallest: base.ParseInternalKey("p.SET.1"),
  3524  						Largest:  base.ParseInternalKey("q.SET.1"),
  3525  					},
  3526  				},
  3527  				2: {
  3528  					{
  3529  						FileNum:  4,
  3530  						Size:     1,
  3531  						Smallest: base.ParseInternalKey("a.SET.1"),
  3532  						Largest:  base.ParseInternalKey("c.SET.1"),
  3533  					},
  3534  					{
  3535  						FileNum:  5,
  3536  						Size:     1,
  3537  						Smallest: base.ParseInternalKey("d.SET.1"),
  3538  						Largest:  base.ParseInternalKey("w.SET.1"),
  3539  					},
  3540  				},
  3541  			}),
  3542  			level:    1,
  3543  			depth:    2,
  3544  			smallest: []byte("a"),
  3545  			largest:  []byte("z"),
  3546  			want: []manifest.UserKeyRange{
  3547  				{
  3548  					Start: []byte("a"),
  3549  					End:   []byte("c"),
  3550  				},
  3551  				{
  3552  					Start: []byte("d"),
  3553  					End:   []byte("w"),
  3554  				},
  3555  			},
  3556  		},
  3557  	}
  3558  	for _, tt := range tests {
  3559  		t.Run(tt.name, func(t *testing.T) {
  3560  			if got := calculateInuseKeyRanges(tt.v, cmp, tt.level, tt.depth, tt.smallest, tt.largest); !reflect.DeepEqual(got, tt.want) {
  3561  				t.Errorf("calculateInuseKeyRanges() = %v, want %v", got, tt.want)
  3562  			}
  3563  		})
  3564  	}
  3565  }
  3566  
  3567  func TestMarkedForCompaction(t *testing.T) {
  3568  	var mem vfs.FS = vfs.NewMem()
  3569  	var d *DB
  3570  	defer func() {
  3571  		if d != nil {
  3572  			require.NoError(t, d.Close())
  3573  		}
  3574  	}()
  3575  
  3576  	var buf bytes.Buffer
  3577  	opts := &Options{
  3578  		FS:                          mem,
  3579  		DebugCheck:                  DebugCheckLevels,
  3580  		DisableAutomaticCompactions: true,
  3581  		FormatMajorVersion:          FormatNewest,
  3582  		EventListener: EventListener{
  3583  			CompactionEnd: func(info CompactionInfo) {
  3584  				// Fix the job ID and durations for determinism.
  3585  				info.JobID = 100
  3586  				info.Duration = time.Second
  3587  				info.TotalDuration = 2 * time.Second
  3588  				fmt.Fprintln(&buf, info)
  3589  			},
  3590  		},
  3591  	}
  3592  
  3593  	reset := func() {
  3594  		if d != nil {
  3595  			require.NoError(t, d.Close())
  3596  		}
  3597  		mem = vfs.NewMem()
  3598  		require.NoError(t, mem.MkdirAll("ext", 0755))
  3599  
  3600  		var err error
  3601  		d, err = Open("", opts)
  3602  		require.NoError(t, err)
  3603  	}
  3604  	datadriven.RunTest(t, "testdata/marked_for_compaction", func(td *datadriven.TestData) string {
  3605  		switch td.Cmd {
  3606  		case "reset":
  3607  			reset()
  3608  			return ""
  3609  
  3610  		case "define":
  3611  			if d != nil {
  3612  				if err := d.Close(); err != nil {
  3613  					return err.Error()
  3614  				}
  3615  			}
  3616  			var err error
  3617  			if d, err = runDBDefineCmd(td, opts); err != nil {
  3618  				return err.Error()
  3619  			}
  3620  			d.mu.Lock()
  3621  			defer d.mu.Unlock()
  3622  			t := time.Now()
  3623  			d.timeNow = func() time.Time {
  3624  				t = t.Add(time.Second)
  3625  				return t
  3626  			}
  3627  			s := d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)
  3628  			return s
  3629  
  3630  		case "mark-for-compaction":
  3631  			d.mu.Lock()
  3632  			defer d.mu.Unlock()
  3633  			vers := d.mu.versions.currentVersion()
  3634  			var fileNum uint64
  3635  			td.ScanArgs(t, "file", &fileNum)
  3636  			for l, lm := range vers.Levels {
  3637  				iter := lm.Iter()
  3638  				for f := iter.First(); f != nil; f = iter.Next() {
  3639  					if f.FileNum != base.FileNum(fileNum) {
  3640  						continue
  3641  					}
  3642  					f.MarkedForCompaction = true
  3643  					vers.Stats.MarkedForCompaction++
  3644  					vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{})
  3645  					return fmt.Sprintf("marked L%d.%s", l, f.FileNum)
  3646  				}
  3647  			}
  3648  			return "not-found"
  3649  
  3650  		case "maybe-compact":
  3651  			d.mu.Lock()
  3652  			defer d.mu.Unlock()
  3653  			d.opts.DisableAutomaticCompactions = false
  3654  			d.maybeScheduleCompaction()
  3655  			for d.mu.compact.compactingCount > 0 {
  3656  				d.mu.compact.cond.Wait()
  3657  			}
  3658  
  3659  			fmt.Fprintln(&buf, d.mu.versions.currentVersion().DebugString(base.DefaultFormatter))
  3660  			s := strings.TrimSpace(buf.String())
  3661  			buf.Reset()
  3662  			opts.DisableAutomaticCompactions = true
  3663  			return s
  3664  
  3665  		default:
  3666  			return fmt.Sprintf("unknown command: %s", td.Cmd)
  3667  		}
  3668  	})
  3669  }
  3670  
  3671  // createManifestErrorInjector injects errors (when enabled) into vfs.FS calls
  3672  // to create MANIFEST files.
  3673  type createManifestErrorInjector struct {
  3674  	enabled uint32 // atomic
  3675  }
  3676  
  3677  // enable enables error injection for the vfs.FS.
  3678  func (i *createManifestErrorInjector) enable() {
  3679  	atomic.StoreUint32(&i.enabled, 1)
  3680  }
  3681  
  3682  // MaybeError implements errorfs.Injector.
  3683  func (i *createManifestErrorInjector) MaybeError(op errorfs.Op, path string) error {
  3684  	if atomic.LoadUint32(&i.enabled) == 0 {
  3685  		return nil
  3686  	}
  3687  	// This necessitates having a MaxManifestSize of 1, to reliably induce
  3688  	// logAndApply errors.
  3689  	if strings.Contains(path, "MANIFEST") && op == errorfs.OpCreate {
  3690  		return errorfs.ErrInjected
  3691  	}
  3692  	return nil
  3693  }
  3694  
  3695  var _ errorfs.Injector = &createManifestErrorInjector{}