github.com/cockroachdb/pebble@v1.1.2/open_test.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"reflect"
    15  	"runtime/debug"
    16  	"sort"
    17  	"strconv"
    18  	"strings"
    19  	"sync/atomic"
    20  	"syscall"
    21  	"testing"
    22  
    23  	"github.com/cockroachdb/datadriven"
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/cockroachdb/pebble/internal/base"
    26  	"github.com/cockroachdb/pebble/internal/cache"
    27  	"github.com/cockroachdb/pebble/internal/manifest"
    28  	"github.com/cockroachdb/pebble/objstorage"
    29  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    30  	"github.com/cockroachdb/pebble/objstorage/remote"
    31  	"github.com/cockroachdb/pebble/vfs"
    32  	"github.com/cockroachdb/pebble/vfs/atomicfs"
    33  	"github.com/cockroachdb/pebble/vfs/errorfs"
    34  	"github.com/cockroachdb/redact"
    35  	"github.com/kr/pretty"
    36  	"github.com/stretchr/testify/require"
    37  )
    38  
    39  func TestOpenSharedTableCache(t *testing.T) {
    40  	c := cache.New(cacheDefaultSize)
    41  	tc := NewTableCache(c, 16, 100)
    42  	defer tc.Unref()
    43  	defer c.Unref()
    44  
    45  	d0, err := Open("", testingRandomized(t, &Options{
    46  		FS:         vfs.NewMem(),
    47  		Cache:      c,
    48  		TableCache: tc,
    49  	}))
    50  	if err != nil {
    51  		t.Errorf("d0 Open: %s", err.Error())
    52  	}
    53  	defer d0.Close()
    54  
    55  	d1, err := Open("", testingRandomized(t, &Options{
    56  		FS:         vfs.NewMem(),
    57  		Cache:      c,
    58  		TableCache: tc,
    59  	}))
    60  	if err != nil {
    61  		t.Errorf("d1 Open: %s", err.Error())
    62  	}
    63  	defer d1.Close()
    64  
    65  	// Make sure that the Open function is using the passed in table cache
    66  	// when the TableCache option is set.
    67  	require.Equalf(
    68  		t, d0.tableCache.tableCache, d1.tableCache.tableCache,
    69  		"expected tableCache for both d0 and d1 to be the same",
    70  	)
    71  }
    72  
    73  func TestErrorIfExists(t *testing.T) {
    74  	opts := testingRandomized(t, &Options{
    75  		FS:            vfs.NewMem(),
    76  		ErrorIfExists: true,
    77  	})
    78  	defer ensureFilesClosed(t, opts)()
    79  
    80  	d0, err := Open("", opts)
    81  	require.NoError(t, err)
    82  	require.NoError(t, d0.Close())
    83  
    84  	if _, err := Open("", opts); !errors.Is(err, ErrDBAlreadyExists) {
    85  		t.Fatalf("expected db-already-exists error, got %v", err)
    86  	}
    87  
    88  	opts.ErrorIfExists = false
    89  	d1, err := Open("", opts)
    90  	require.NoError(t, err)
    91  	require.NoError(t, d1.Close())
    92  }
    93  
    94  func TestErrorIfNotExists(t *testing.T) {
    95  	opts := testingRandomized(t, &Options{
    96  		FS:               vfs.NewMem(),
    97  		ErrorIfNotExists: true,
    98  	})
    99  	defer ensureFilesClosed(t, opts)()
   100  
   101  	_, err := Open("", opts)
   102  	if !errors.Is(err, ErrDBDoesNotExist) {
   103  		t.Fatalf("expected db-does-not-exist error, got %v", err)
   104  	}
   105  
   106  	// Create the DB and try again.
   107  	opts.ErrorIfNotExists = false
   108  	d0, err := Open("", opts)
   109  	require.NoError(t, err)
   110  	require.NoError(t, d0.Close())
   111  
   112  	opts.ErrorIfNotExists = true
   113  	d1, err := Open("", opts)
   114  	require.NoError(t, err)
   115  	require.NoError(t, d1.Close())
   116  }
   117  
   118  func TestErrorIfNotPristine(t *testing.T) {
   119  	opts := testingRandomized(t, &Options{
   120  		FS:                 vfs.NewMem(),
   121  		ErrorIfNotPristine: true,
   122  	})
   123  	defer ensureFilesClosed(t, opts)()
   124  
   125  	d0, err := Open("", opts)
   126  	require.NoError(t, err)
   127  	require.NoError(t, d0.Close())
   128  
   129  	// Store is pristine; ok to open.
   130  	d1, err := Open("", opts)
   131  	require.NoError(t, err)
   132  	require.NoError(t, d1.Set([]byte("foo"), []byte("bar"), Sync))
   133  	require.NoError(t, d1.Close())
   134  
   135  	if _, err := Open("", opts); !errors.Is(err, ErrDBNotPristine) {
   136  		t.Fatalf("expected db-not-pristine error, got %v", err)
   137  	}
   138  
   139  	// Run compaction and make sure we're still not allowed to open.
   140  	opts.ErrorIfNotPristine = false
   141  	d2, err := Open("", opts)
   142  	require.NoError(t, err)
   143  	require.NoError(t, d2.Compact([]byte("a"), []byte("z"), false /* parallelize */))
   144  	require.NoError(t, d2.Close())
   145  
   146  	opts.ErrorIfNotPristine = true
   147  	if _, err := Open("", opts); !errors.Is(err, ErrDBNotPristine) {
   148  		t.Fatalf("expected db-already-exists error, got %v", err)
   149  	}
   150  }
   151  
   152  func TestOpenAlreadyLocked(t *testing.T) {
   153  	runTest := func(t *testing.T, dirname string, fs vfs.FS) {
   154  		opts := testingRandomized(t, &Options{FS: fs})
   155  		var err error
   156  		opts.Lock, err = LockDirectory(dirname, fs)
   157  		require.NoError(t, err)
   158  
   159  		d, err := Open(dirname, opts)
   160  		require.NoError(t, err)
   161  		require.NoError(t, d.Set([]byte("foo"), []byte("bar"), Sync))
   162  
   163  		// Try to open the same database reusing the Options containing the same
   164  		// Lock. It should error when it observes that it's already referenced.
   165  		_, err = Open(dirname, opts)
   166  		require.Error(t, err)
   167  
   168  		// Close the database.
   169  		require.NoError(t, d.Close())
   170  
   171  		// Now Opening should succeed again.
   172  		d, err = Open(dirname, opts)
   173  		require.NoError(t, err)
   174  		require.NoError(t, d.Close())
   175  
   176  		require.NoError(t, opts.Lock.Close())
   177  		// There should be no more remaining references.
   178  		require.Equal(t, int32(0), opts.Lock.refs.Load())
   179  	}
   180  	t.Run("memfs", func(t *testing.T) {
   181  		runTest(t, "", vfs.NewMem())
   182  	})
   183  	t.Run("disk", func(t *testing.T) {
   184  		runTest(t, t.TempDir(), vfs.Default)
   185  	})
   186  }
   187  
   188  func TestNewDBFilenames(t *testing.T) {
   189  	versions := map[FormatMajorVersion][]string{
   190  		FormatMostCompatible: {
   191  			"000002.log",
   192  			"CURRENT",
   193  			"LOCK",
   194  			"MANIFEST-000001",
   195  			"OPTIONS-000003",
   196  		},
   197  		internalFormatNewest: {
   198  			"000002.log",
   199  			"CURRENT",
   200  			"LOCK",
   201  			"MANIFEST-000001",
   202  			"OPTIONS-000003",
   203  			"marker.format-version.000015.016",
   204  			"marker.manifest.000001.MANIFEST-000001",
   205  		},
   206  	}
   207  
   208  	for formatVers, want := range versions {
   209  		t.Run(fmt.Sprintf("vers=%s", formatVers), func(t *testing.T) {
   210  			mem := vfs.NewMem()
   211  			fooBar := mem.PathJoin("foo", "bar")
   212  			d, err := Open(fooBar, &Options{
   213  				FS:                 mem,
   214  				FormatMajorVersion: formatVers,
   215  			})
   216  			if err != nil {
   217  				t.Fatalf("Open: %v", err)
   218  			}
   219  			if err := d.Close(); err != nil {
   220  				t.Fatalf("Close: %v", err)
   221  			}
   222  			got, err := mem.List(fooBar)
   223  			if err != nil {
   224  				t.Fatalf("List: %v", err)
   225  			}
   226  			sort.Strings(got)
   227  			if !reflect.DeepEqual(got, want) {
   228  				t.Errorf("\ngot  %v\nwant %v", got, want)
   229  			}
   230  		})
   231  	}
   232  }
   233  
   234  func testOpenCloseOpenClose(t *testing.T, fs vfs.FS, root string) {
   235  	opts := testingRandomized(t, &Options{FS: fs})
   236  
   237  	for _, startFromEmpty := range []bool{false, true} {
   238  		for _, walDirname := range []string{"", "wal"} {
   239  			for _, length := range []int{-1, 0, 1, 1000, 10000, 100000} {
   240  				dirname := "sharedDatabase" + walDirname
   241  				if startFromEmpty {
   242  					dirname = "startFromEmpty" + walDirname + strconv.Itoa(length)
   243  				}
   244  				dirname = fs.PathJoin(root, dirname)
   245  				if walDirname == "" {
   246  					opts.WALDir = ""
   247  				} else {
   248  					opts.WALDir = fs.PathJoin(dirname, walDirname)
   249  				}
   250  
   251  				got, xxx := []byte(nil), ""
   252  				if length >= 0 {
   253  					xxx = strings.Repeat("x", length)
   254  				}
   255  
   256  				d0, err := Open(dirname, opts)
   257  				if err != nil {
   258  					t.Fatalf("sfe=%t, length=%d: Open #0: %v",
   259  						startFromEmpty, length, err)
   260  					continue
   261  				}
   262  				if length >= 0 {
   263  					err = d0.Set([]byte("key"), []byte(xxx), nil)
   264  					if err != nil {
   265  						t.Errorf("sfe=%t, length=%d: Set: %v",
   266  							startFromEmpty, length, err)
   267  						continue
   268  					}
   269  				}
   270  				err = d0.Close()
   271  				if err != nil {
   272  					t.Errorf("sfe=%t, length=%d: Close #0: %v",
   273  						startFromEmpty, length, err)
   274  					continue
   275  				}
   276  
   277  				d1, err := Open(dirname, opts)
   278  				if err != nil {
   279  					t.Errorf("sfe=%t, length=%d: Open #1: %v",
   280  						startFromEmpty, length, err)
   281  					continue
   282  				}
   283  				if length >= 0 {
   284  					var closer io.Closer
   285  					got, closer, err = d1.Get([]byte("key"))
   286  					if err != nil {
   287  						t.Errorf("sfe=%t, length=%d: Get: %v",
   288  							startFromEmpty, length, err)
   289  						continue
   290  					}
   291  					got = append([]byte(nil), got...)
   292  					closer.Close()
   293  				}
   294  				err = d1.Close()
   295  				if err != nil {
   296  					t.Errorf("sfe=%t, length=%d: Close #1: %v",
   297  						startFromEmpty, length, err)
   298  					continue
   299  				}
   300  
   301  				if length >= 0 && string(got) != xxx {
   302  					t.Errorf("sfe=%t, length=%d: got value differs from set value",
   303  						startFromEmpty, length)
   304  					continue
   305  				}
   306  
   307  				{
   308  					got, err := opts.FS.List(dirname)
   309  					if err != nil {
   310  						t.Fatalf("List: %v", err)
   311  					}
   312  					var optionsCount int
   313  					for _, s := range got {
   314  						if t, _, ok := base.ParseFilename(opts.FS, s); ok && t == fileTypeOptions {
   315  							optionsCount++
   316  						}
   317  					}
   318  					if optionsCount != 1 {
   319  						t.Fatalf("expected 1 OPTIONS file, but found %d", optionsCount)
   320  					}
   321  				}
   322  			}
   323  		}
   324  	}
   325  }
   326  
   327  func TestOpenCloseOpenClose(t *testing.T) {
   328  	for _, fstype := range []string{"disk", "mem"} {
   329  		t.Run(fstype, func(t *testing.T) {
   330  			var fs vfs.FS
   331  			var dir string
   332  			switch fstype {
   333  			case "disk":
   334  				var err error
   335  				dir, err = os.MkdirTemp("", "open-close")
   336  				require.NoError(t, err)
   337  				defer func() {
   338  					_ = os.RemoveAll(dir)
   339  				}()
   340  				fs = vfs.Default
   341  			case "mem":
   342  				dir = ""
   343  				fs = vfs.NewMem()
   344  			}
   345  			testOpenCloseOpenClose(t, fs, dir)
   346  		})
   347  	}
   348  }
   349  
   350  func TestOpenOptionsCheck(t *testing.T) {
   351  	mem := vfs.NewMem()
   352  	opts := &Options{FS: mem}
   353  
   354  	d, err := Open("", opts)
   355  	require.NoError(t, err)
   356  	require.NoError(t, d.Close())
   357  
   358  	opts = &Options{
   359  		Comparer: &Comparer{Name: "foo"},
   360  		FS:       mem,
   361  	}
   362  	_, err = Open("", opts)
   363  	require.Regexp(t, `comparer name from file.*!=.*`, err)
   364  
   365  	opts = &Options{
   366  		Merger: &Merger{Name: "bar"},
   367  		FS:     mem,
   368  	}
   369  	_, err = Open("", opts)
   370  	require.Regexp(t, `merger name from file.*!=.*`, err)
   371  }
   372  
   373  func TestOpenCrashWritingOptions(t *testing.T) {
   374  	memFS := vfs.NewMem()
   375  
   376  	d, err := Open("", &Options{FS: memFS})
   377  	require.NoError(t, err)
   378  	require.NoError(t, d.Close())
   379  
   380  	// Open the database again, this time with a mocked filesystem that
   381  	// will only succeed in partially writing the OPTIONS file.
   382  	fs := optionsTornWriteFS{FS: memFS}
   383  	_, err = Open("", &Options{FS: fs})
   384  	require.Error(t, err)
   385  
   386  	// Re-opening the database must succeed.
   387  	d, err = Open("", &Options{FS: memFS})
   388  	require.NoError(t, err)
   389  	require.NoError(t, d.Close())
   390  }
   391  
   392  type optionsTornWriteFS struct {
   393  	vfs.FS
   394  }
   395  
   396  func (fs optionsTornWriteFS) Create(name string) (vfs.File, error) {
   397  	file, err := fs.FS.Create(name)
   398  	if file != nil {
   399  		file = optionsTornWriteFile{File: file}
   400  	}
   401  	return file, err
   402  }
   403  
   404  type optionsTornWriteFile struct {
   405  	vfs.File
   406  }
   407  
   408  func (f optionsTornWriteFile) Write(b []byte) (int, error) {
   409  	// Look for the OPTIONS-XXXXXX file's `comparer=` field.
   410  	comparerKey := []byte("comparer=")
   411  	i := bytes.Index(b, comparerKey)
   412  	if i == -1 {
   413  		return f.File.Write(b)
   414  	}
   415  	// Write only the contents through `comparer=` and return an error.
   416  	n, _ := f.File.Write(b[:i+len(comparerKey)])
   417  	return n, syscall.EIO
   418  }
   419  
   420  func TestOpenReadOnly(t *testing.T) {
   421  	mem := vfs.NewMem()
   422  
   423  	{
   424  		// Opening a non-existent DB in read-only mode should result in no mutable
   425  		// filesystem operations.
   426  		var memLog base.InMemLogger
   427  		_, err := Open("non-existent", testingRandomized(t, &Options{
   428  			FS:       vfs.WithLogging(mem, memLog.Infof),
   429  			ReadOnly: true,
   430  			WALDir:   "non-existent-waldir",
   431  		}))
   432  		if err == nil {
   433  			t.Fatalf("expected error, but found success")
   434  		}
   435  		const expected = `open-dir: non-existent`
   436  		if trimmed := strings.TrimSpace(memLog.String()); expected != trimmed {
   437  			t.Fatalf("expected %q, but found %q", expected, trimmed)
   438  		}
   439  	}
   440  
   441  	{
   442  		// Opening a DB with a non-existent WAL dir in read-only mode should result
   443  		// in no mutable filesystem operations other than the LOCK.
   444  		var memLog base.InMemLogger
   445  		_, err := Open("", testingRandomized(t, &Options{
   446  			FS:       vfs.WithLogging(mem, memLog.Infof),
   447  			ReadOnly: true,
   448  			WALDir:   "non-existent-waldir",
   449  		}))
   450  		if err == nil {
   451  			t.Fatalf("expected error, but found success")
   452  		}
   453  		const expected = "open-dir: \nopen-dir: non-existent-waldir\nclose:"
   454  		if trimmed := strings.TrimSpace(memLog.String()); expected != trimmed {
   455  			t.Fatalf("expected %q, but found %q", expected, trimmed)
   456  		}
   457  	}
   458  
   459  	var contents []string
   460  	{
   461  		// Create a new DB and populate it with a small amount of data.
   462  		d, err := Open("", testingRandomized(t, &Options{
   463  			FS: mem,
   464  		}))
   465  		require.NoError(t, err)
   466  		require.NoError(t, d.Set([]byte("test"), nil, nil))
   467  		require.NoError(t, d.Close())
   468  		contents, err = mem.List("")
   469  		require.NoError(t, err)
   470  		sort.Strings(contents)
   471  	}
   472  
   473  	{
   474  		// Re-open the DB read-only. The directory contents should be unchanged.
   475  		d, err := Open("", testingRandomized(t, &Options{
   476  			FS:       mem,
   477  			ReadOnly: true,
   478  		}))
   479  		require.NoError(t, err)
   480  
   481  		// Verify various write operations fail in read-only mode.
   482  		require.EqualValues(t, ErrReadOnly, d.Compact(nil, []byte("\xff"), false))
   483  		require.EqualValues(t, ErrReadOnly, d.Flush())
   484  		require.EqualValues(t, ErrReadOnly, func() error { _, err := d.AsyncFlush(); return err }())
   485  
   486  		require.EqualValues(t, ErrReadOnly, d.Delete(nil, nil))
   487  		require.EqualValues(t, ErrReadOnly, d.DeleteRange(nil, nil, nil))
   488  		require.EqualValues(t, ErrReadOnly, d.Ingest(nil))
   489  		require.EqualValues(t, ErrReadOnly, d.LogData(nil, nil))
   490  		require.EqualValues(t, ErrReadOnly, d.Merge(nil, nil, nil))
   491  		require.EqualValues(t, ErrReadOnly, d.Set(nil, nil, nil))
   492  
   493  		// Verify we can still read in read-only mode.
   494  		require.NoError(t, func() error {
   495  			_, closer, err := d.Get([]byte("test"))
   496  			if closer != nil {
   497  				closer.Close()
   498  			}
   499  			return err
   500  		}())
   501  
   502  		checkIter := func(iter *Iterator, err error) {
   503  			t.Helper()
   504  
   505  			var keys []string
   506  			for valid := iter.First(); valid; valid = iter.Next() {
   507  				keys = append(keys, string(iter.Key()))
   508  			}
   509  			require.NoError(t, iter.Close())
   510  			expectedKeys := []string{"test"}
   511  			if diff := pretty.Diff(keys, expectedKeys); diff != nil {
   512  				t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys)
   513  			}
   514  		}
   515  
   516  		checkIter(d.NewIter(nil))
   517  
   518  		b := d.NewIndexedBatch()
   519  		checkIter(b.NewIter(nil))
   520  		require.EqualValues(t, ErrReadOnly, b.Commit(nil))
   521  		require.EqualValues(t, ErrReadOnly, d.Apply(b, nil))
   522  
   523  		s := d.NewSnapshot()
   524  		checkIter(s.NewIter(nil))
   525  		require.NoError(t, s.Close())
   526  
   527  		require.NoError(t, d.Close())
   528  
   529  		newContents, err := mem.List("")
   530  		require.NoError(t, err)
   531  
   532  		sort.Strings(newContents)
   533  		if diff := pretty.Diff(contents, newContents); diff != nil {
   534  			t.Fatalf("%s", strings.Join(diff, "\n"))
   535  		}
   536  	}
   537  }
   538  
   539  func TestOpenWALReplay(t *testing.T) {
   540  	largeValue := []byte(strings.Repeat("a", 100<<10))
   541  	hugeValue := []byte(strings.Repeat("b", 10<<20))
   542  	checkIter := func(iter *Iterator, err error) {
   543  		t.Helper()
   544  
   545  		var keys []string
   546  		for valid := iter.First(); valid; valid = iter.Next() {
   547  			keys = append(keys, string(iter.Key()))
   548  		}
   549  		require.NoError(t, iter.Close())
   550  		expectedKeys := []string{"1", "2", "3", "4", "5"}
   551  		if diff := pretty.Diff(keys, expectedKeys); diff != nil {
   552  			t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys)
   553  		}
   554  	}
   555  
   556  	for _, readOnly := range []bool{false, true} {
   557  		t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) {
   558  			// Create a new DB and populate it with some data.
   559  			const dir = ""
   560  			mem := vfs.NewMem()
   561  			d, err := Open(dir, testingRandomized(t, &Options{
   562  				FS:           mem,
   563  				MemTableSize: 32 << 20,
   564  			}))
   565  			require.NoError(t, err)
   566  			// All these values will fit in a single memtable, so on closing the db there
   567  			// will be no sst and all the data is in a single WAL.
   568  			require.NoError(t, d.Set([]byte("1"), largeValue, nil))
   569  			require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   570  			require.NoError(t, d.Set([]byte("3"), largeValue, nil))
   571  			require.NoError(t, d.Set([]byte("4"), hugeValue, nil))
   572  			require.NoError(t, d.Set([]byte("5"), largeValue, nil))
   573  			checkIter(d.NewIter(nil))
   574  			require.NoError(t, d.Close())
   575  			files, err := mem.List(dir)
   576  			require.NoError(t, err)
   577  			sort.Strings(files)
   578  			logCount, sstCount := 0, 0
   579  			for _, fname := range files {
   580  				if strings.HasSuffix(fname, ".sst") {
   581  					sstCount++
   582  				}
   583  				if strings.HasSuffix(fname, ".log") {
   584  					logCount++
   585  				}
   586  			}
   587  			require.Equal(t, 0, sstCount)
   588  			// The memtable size starts at 256KB and doubles up to 32MB so we expect 5
   589  			// logs (one for each doubling).
   590  			require.Equal(t, 7, logCount)
   591  
   592  			// Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable;
   593  			// value for 3 will go in the next memtable; value for 4 will be in a flushable batch
   594  			// which will cause the previous memtable to be flushed; value for 5 will go in the next
   595  			// memtable
   596  			d, err = Open(dir, testingRandomized(t, &Options{
   597  				FS:           mem,
   598  				MemTableSize: 300 << 10,
   599  				ReadOnly:     readOnly,
   600  			}))
   601  			require.NoError(t, err)
   602  
   603  			if readOnly {
   604  				m := d.Metrics()
   605  				require.Equal(t, int64(logCount), m.WAL.Files)
   606  				d.mu.Lock()
   607  				require.NotNil(t, d.mu.mem.mutable)
   608  				d.mu.Unlock()
   609  			}
   610  			checkIter(d.NewIter(nil))
   611  			require.NoError(t, d.Close())
   612  		})
   613  	}
   614  }
   615  
   616  // Reproduction for https://github.com/cockroachdb/pebble/issues/2234.
   617  func TestWALReplaySequenceNumBug(t *testing.T) {
   618  	mem := vfs.NewMem()
   619  	d, err := Open("", testingRandomized(t, &Options{
   620  		FS: mem,
   621  	}))
   622  	require.NoError(t, err)
   623  
   624  	d.mu.Lock()
   625  	// Disable any flushes.
   626  	d.mu.compact.flushing = true
   627  	d.mu.Unlock()
   628  
   629  	require.NoError(t, d.Set([]byte("1"), nil, nil))
   630  	require.NoError(t, d.Set([]byte("2"), nil, nil))
   631  
   632  	// Write a large batch. This should go to a separate memtable.
   633  	largeValue := []byte(strings.Repeat("a", int(d.largeBatchThreshold)))
   634  	require.NoError(t, d.Set([]byte("1"), largeValue, nil))
   635  
   636  	// This write should go the mutable memtable after the large batch in the
   637  	// memtable queue.
   638  	d.Set([]byte("1"), nil, nil)
   639  
   640  	d.mu.Lock()
   641  	d.mu.compact.flushing = false
   642  	d.mu.Unlock()
   643  
   644  	// Make sure none of the flushables have been flushed.
   645  	require.Equal(t, 3, len(d.mu.mem.queue))
   646  
   647  	// Close the db. This doesn't cause a flush of the memtables, so they'll
   648  	// have to be replayed when the db is reopened.
   649  	require.NoError(t, d.Close())
   650  
   651  	files, err := mem.List("")
   652  	require.NoError(t, err)
   653  	sort.Strings(files)
   654  	sstCount := 0
   655  	for _, fname := range files {
   656  		if strings.HasSuffix(fname, ".sst") {
   657  			sstCount++
   658  		}
   659  	}
   660  	require.Equal(t, 0, sstCount)
   661  
   662  	// Reopen db in read only mode to force read only wal replay.
   663  	d, err = Open("", &Options{
   664  		FS:       mem,
   665  		ReadOnly: true,
   666  	})
   667  	require.NoError(t, err)
   668  	val, c, _ := d.Get([]byte("1"))
   669  	require.Equal(t, []byte{}, val)
   670  	c.Close()
   671  	require.NoError(t, d.Close())
   672  }
   673  
   674  // Similar to TestOpenWALReplay, except we test replay behavior after a
   675  // memtable has been flushed. We test all 3 reasons for flushing: forced, size,
   676  // and large-batch.
   677  func TestOpenWALReplay2(t *testing.T) {
   678  	for _, readOnly := range []bool{false, true} {
   679  		t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) {
   680  			for _, reason := range []string{"forced", "size", "large-batch"} {
   681  				t.Run(reason, func(t *testing.T) {
   682  					mem := vfs.NewMem()
   683  					d, err := Open("", testingRandomized(t, &Options{
   684  						FS:           mem,
   685  						MemTableSize: 256 << 10,
   686  					}))
   687  					require.NoError(t, err)
   688  
   689  					switch reason {
   690  					case "forced":
   691  						require.NoError(t, d.Set([]byte("1"), nil, nil))
   692  						require.NoError(t, d.Flush())
   693  						require.NoError(t, d.Set([]byte("2"), nil, nil))
   694  					case "size":
   695  						largeValue := []byte(strings.Repeat("a", 100<<10))
   696  						require.NoError(t, d.Set([]byte("1"), largeValue, nil))
   697  						require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   698  						require.NoError(t, d.Set([]byte("3"), largeValue, nil))
   699  					case "large-batch":
   700  						largeValue := []byte(strings.Repeat("a", int(d.largeBatchThreshold)))
   701  						require.NoError(t, d.Set([]byte("1"), nil, nil))
   702  						require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   703  						require.NoError(t, d.Set([]byte("3"), nil, nil))
   704  					}
   705  					require.NoError(t, d.Close())
   706  
   707  					files, err := mem.List("")
   708  					require.NoError(t, err)
   709  					sort.Strings(files)
   710  					sstCount := 0
   711  					for _, fname := range files {
   712  						if strings.HasSuffix(fname, ".sst") {
   713  							sstCount++
   714  						}
   715  					}
   716  					require.Equal(t, 1, sstCount)
   717  
   718  					// Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable;
   719  					// value for 3 will go in the next memtable; value for 4 will be in a flushable batch
   720  					// which will cause the previous memtable to be flushed; value for 5 will go in the next
   721  					// memtable
   722  					d, err = Open("", testingRandomized(t, &Options{
   723  						FS:           mem,
   724  						MemTableSize: 300 << 10,
   725  						ReadOnly:     readOnly,
   726  					}))
   727  					require.NoError(t, err)
   728  					require.NoError(t, d.Close())
   729  				})
   730  			}
   731  		})
   732  	}
   733  }
   734  
   735  // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
   736  // WALs is corrupted with an sstable checksum error. Replay must stop at the
   737  // first WAL because otherwise we may violate point-in-time recovery
   738  // semantics. See #864.
   739  func TestTwoWALReplayCorrupt(t *testing.T) {
   740  	// Use the real filesystem so that we can seek and overwrite WAL data
   741  	// easily.
   742  	dir, err := os.MkdirTemp("", "wal-replay")
   743  	require.NoError(t, err)
   744  	defer os.RemoveAll(dir)
   745  
   746  	d, err := Open(dir, testingRandomized(t, &Options{
   747  		MemTableStopWritesThreshold: 4,
   748  		MemTableSize:                2048,
   749  	}))
   750  	require.NoError(t, err)
   751  	d.mu.Lock()
   752  	d.mu.compact.flushing = true
   753  	d.mu.Unlock()
   754  	require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
   755  	require.NoError(t, d.Set([]byte("2"), nil, nil))
   756  	d.mu.Lock()
   757  	d.mu.compact.flushing = false
   758  	d.mu.Unlock()
   759  	require.NoError(t, d.Close())
   760  
   761  	// We should have two WALs.
   762  	var logs []string
   763  	ls, err := vfs.Default.List(dir)
   764  	require.NoError(t, err)
   765  	for _, name := range ls {
   766  		if filepath.Ext(name) == ".log" {
   767  			logs = append(logs, name)
   768  		}
   769  	}
   770  	sort.Strings(logs)
   771  	if len(logs) < 2 {
   772  		t.Fatalf("expected at least two log files, found %d", len(logs))
   773  	}
   774  
   775  	// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
   776  	// of the file.
   777  	f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
   778  	require.NoError(t, err)
   779  	off, err := f.Seek(-100, 2)
   780  	require.NoError(t, err)
   781  	_, err = f.Write([]byte{0, 0, 0, 0})
   782  	require.NoError(t, err)
   783  	require.NoError(t, f.Close())
   784  	t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)
   785  
   786  	// Re-opening the database should detect and report the corruption.
   787  	_, err = Open(dir, nil)
   788  	require.Error(t, err, "pebble: corruption")
   789  }
   790  
   791  // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
   792  // WALs is corrupted with an sstable checksum error and the OPTIONS file does
   793  // not enable the private strict_wal_tail option, indicating that the WAL was
   794  // produced by a database that did not guarantee clean WAL tails. See #864.
   795  func TestTwoWALReplayPermissive(t *testing.T) {
   796  	// Use the real filesystem so that we can seek and overwrite WAL data
   797  	// easily.
   798  	dir, err := os.MkdirTemp("", "wal-replay")
   799  	require.NoError(t, err)
   800  	defer os.RemoveAll(dir)
   801  
   802  	opts := &Options{
   803  		MemTableStopWritesThreshold: 4,
   804  		MemTableSize:                2048,
   805  	}
   806  	opts.testingRandomized(t)
   807  	opts.EnsureDefaults()
   808  	d, err := Open(dir, opts)
   809  	require.NoError(t, err)
   810  	d.mu.Lock()
   811  	d.mu.compact.flushing = true
   812  	d.mu.Unlock()
   813  	require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
   814  	require.NoError(t, d.Set([]byte("2"), nil, nil))
   815  	d.mu.Lock()
   816  	d.mu.compact.flushing = false
   817  	d.mu.Unlock()
   818  	require.NoError(t, d.Close())
   819  
   820  	// We should have two WALs.
   821  	var logs []string
   822  	var optionFilename string
   823  	ls, err := vfs.Default.List(dir)
   824  	require.NoError(t, err)
   825  	for _, name := range ls {
   826  		if filepath.Ext(name) == ".log" {
   827  			logs = append(logs, name)
   828  		}
   829  		if strings.HasPrefix(filepath.Base(name), "OPTIONS") {
   830  			optionFilename = name
   831  		}
   832  	}
   833  	sort.Strings(logs)
   834  	if len(logs) < 2 {
   835  		t.Fatalf("expected at least two log files, found %d", len(logs))
   836  	}
   837  
   838  	// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
   839  	// of the file.
   840  	f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
   841  	require.NoError(t, err)
   842  	off, err := f.Seek(-100, 2)
   843  	require.NoError(t, err)
   844  	_, err = f.Write([]byte{0, 0, 0, 0})
   845  	require.NoError(t, err)
   846  	require.NoError(t, f.Close())
   847  	t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)
   848  
   849  	// Remove the OPTIONS file containing the strict_wal_tail option.
   850  	require.NoError(t, vfs.Default.Remove(filepath.Join(dir, optionFilename)))
   851  
   852  	// Re-opening the database should not report the corruption.
   853  	d, err = Open(dir, nil)
   854  	require.NoError(t, err)
   855  	require.NoError(t, d.Close())
   856  }
   857  
   858  // TestCrashOpenCrashAfterWALCreation tests a database that exits
   859  // ungracefully, begins recovery, creates the new WAL but promptly exits
   860  // ungracefully again.
   861  //
   862  // This sequence has the potential to be problematic with the strict_wal_tail
   863  // behavior because the first crash's WAL has an unclean tail. By the time the
   864  // new WAL is created, the current manifest's MinUnflushedLogNum must be
   865  // higher than the previous WAL.
   866  func TestCrashOpenCrashAfterWALCreation(t *testing.T) {
   867  	fs := vfs.NewStrictMem()
   868  
   869  	getLogs := func() (logs []string) {
   870  		ls, err := fs.List("")
   871  		require.NoError(t, err)
   872  		for _, name := range ls {
   873  			if filepath.Ext(name) == ".log" {
   874  				logs = append(logs, name)
   875  			}
   876  		}
   877  		return logs
   878  	}
   879  
   880  	{
   881  		d, err := Open("", testingRandomized(t, &Options{FS: fs}))
   882  		require.NoError(t, err)
   883  		require.NoError(t, d.Set([]byte("abc"), nil, Sync))
   884  
   885  		// Ignore syncs during close to simulate a crash. This will leave the WAL
   886  		// without an EOF trailer. It won't be an 'unclean tail' yet since the
   887  		// log file was not recycled, but we'll fix that down below.
   888  		fs.SetIgnoreSyncs(true)
   889  		require.NoError(t, d.Close())
   890  		fs.ResetToSyncedState()
   891  		fs.SetIgnoreSyncs(false)
   892  	}
   893  
   894  	// There should be one WAL.
   895  	logs := getLogs()
   896  	if len(logs) != 1 {
   897  		t.Fatalf("expected one log file, found %d", len(logs))
   898  	}
   899  
   900  	// The one WAL file doesn't have an EOF trailer, but since it wasn't
   901  	// recycled it won't have garbage at the end. Rewrite it so that it has
   902  	// the same contents it currently has, followed by garbage.
   903  	{
   904  		f, err := fs.Open(logs[0])
   905  		require.NoError(t, err)
   906  		b, err := io.ReadAll(f)
   907  		require.NoError(t, err)
   908  		require.NoError(t, f.Close())
   909  		f, err = fs.Create(logs[0])
   910  		require.NoError(t, err)
   911  		_, err = f.Write(b)
   912  		require.NoError(t, err)
   913  		_, err = f.Write([]byte{0xde, 0xad, 0xbe, 0xef})
   914  		require.NoError(t, err)
   915  		require.NoError(t, f.Sync())
   916  		require.NoError(t, f.Close())
   917  		dir, err := fs.OpenDir("")
   918  		require.NoError(t, err)
   919  		require.NoError(t, dir.Sync())
   920  		require.NoError(t, dir.Close())
   921  	}
   922  
   923  	// Open the database again (with syncs respected again). Wrap the
   924  	// filesystem with an errorfs that will turn off syncs after a new .log
   925  	// file is created and after a subsequent directory sync occurs. This
   926  	// simulates a crash after the new log file is created and synced.
   927  	{
   928  		var walCreated, dirSynced atomic.Bool
   929  		d, err := Open("", &Options{
   930  			FS: errorfs.Wrap(fs, errorfs.InjectorFunc(func(op errorfs.Op, path string) error {
   931  				if dirSynced.Load() {
   932  					fs.SetIgnoreSyncs(true)
   933  				}
   934  				if op == errorfs.OpCreate && filepath.Ext(path) == ".log" {
   935  					walCreated.Store(true)
   936  				}
   937  				// Record when there's a sync of the data directory after the
   938  				// WAL was created. The data directory will have an empty
   939  				// path because that's what we passed into Open.
   940  				if op == errorfs.OpFileSync && path == "" && walCreated.Load() {
   941  					dirSynced.Store(true)
   942  				}
   943  				return nil
   944  			})),
   945  		})
   946  		require.NoError(t, err)
   947  		require.NoError(t, d.Close())
   948  	}
   949  
   950  	fs.ResetToSyncedState()
   951  	fs.SetIgnoreSyncs(false)
   952  
   953  	if n := len(getLogs()); n != 2 {
   954  		t.Fatalf("expected two logs, found %d\n", n)
   955  	}
   956  
   957  	// Finally, open the database with syncs enabled.
   958  	d, err := Open("", testingRandomized(t, &Options{FS: fs}))
   959  	require.NoError(t, err)
   960  	require.NoError(t, d.Close())
   961  }
   962  
   963  // TestOpenWALReplayReadOnlySeqNums tests opening a database:
   964  //   - in read-only mode
   965  //   - with multiple unflushed log files that must replayed
   966  //   - a MANIFEST that sets the last sequence number to a number greater than
   967  //     the unflushed log files
   968  //
   969  // See cockroachdb/cockroach#48660.
   970  func TestOpenWALReplayReadOnlySeqNums(t *testing.T) {
   971  	const root = ""
   972  	mem := vfs.NewMem()
   973  
   974  	copyFiles := func(srcDir, dstDir string) {
   975  		files, err := mem.List(srcDir)
   976  		require.NoError(t, err)
   977  		for _, f := range files {
   978  			require.NoError(t, vfs.Copy(mem, mem.PathJoin(srcDir, f), mem.PathJoin(dstDir, f)))
   979  		}
   980  	}
   981  
   982  	// Create a new database under `/original` with a couple sstables.
   983  	dir := mem.PathJoin(root, "original")
   984  	d, err := Open(dir, testingRandomized(t, &Options{FS: mem}))
   985  	require.NoError(t, err)
   986  	require.NoError(t, d.Set([]byte("a"), nil, nil))
   987  	require.NoError(t, d.Flush())
   988  	require.NoError(t, d.Set([]byte("a"), nil, nil))
   989  	require.NoError(t, d.Flush())
   990  
   991  	// Prevent flushes so that multiple unflushed log files build up.
   992  	d.mu.Lock()
   993  	d.mu.compact.flushing = true
   994  	d.mu.Unlock()
   995  
   996  	require.NoError(t, d.Set([]byte("b"), nil, nil))
   997  	d.AsyncFlush()
   998  	require.NoError(t, d.Set([]byte("c"), nil, nil))
   999  	d.AsyncFlush()
  1000  	require.NoError(t, d.Set([]byte("e"), nil, nil))
  1001  
  1002  	// Manually compact some of the key space so that the latest `logSeqNum` is
  1003  	// written to the MANIFEST. This produces a MANIFEST where the `logSeqNum`
  1004  	// is greater than the sequence numbers contained in the
  1005  	// `minUnflushedLogNum` log file
  1006  	require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
  1007  	d.mu.Lock()
  1008  	for d.mu.compact.compactingCount > 0 {
  1009  		d.mu.compact.cond.Wait()
  1010  	}
  1011  	d.mu.Unlock()
  1012  
  1013  	d.TestOnlyWaitForCleaning()
  1014  	// While the MANIFEST is still in this state, copy all the files in the
  1015  	// database to a new directory.
  1016  	replayDir := mem.PathJoin(root, "replay")
  1017  	require.NoError(t, mem.MkdirAll(replayDir, os.ModePerm))
  1018  	copyFiles(dir, replayDir)
  1019  
  1020  	d.mu.Lock()
  1021  	d.mu.compact.flushing = false
  1022  	d.mu.Unlock()
  1023  	require.NoError(t, d.Close())
  1024  
  1025  	// Open the copy of the database in read-only mode. Since we copied all
  1026  	// the files before the flushes were allowed to complete, there should be
  1027  	// multiple unflushed log files that need to replay. Since the manual
  1028  	// compaction completed, the `logSeqNum` read from the manifest should be
  1029  	// greater than the unflushed log files' sequence numbers.
  1030  	d, err = Open(replayDir, testingRandomized(t, &Options{
  1031  		FS:       mem,
  1032  		ReadOnly: true,
  1033  	}))
  1034  	require.NoError(t, err)
  1035  	require.NoError(t, d.Close())
  1036  }
  1037  
  1038  func TestOpenWALReplayMemtableGrowth(t *testing.T) {
  1039  	mem := vfs.NewMem()
  1040  	const memTableSize = 64 * 1024 * 1024
  1041  	opts := &Options{
  1042  		MemTableSize: memTableSize,
  1043  		FS:           mem,
  1044  	}
  1045  	opts.testingRandomized(t)
  1046  	func() {
  1047  		db, err := Open("", opts)
  1048  		require.NoError(t, err)
  1049  		defer db.Close()
  1050  		b := db.NewBatch()
  1051  		defer b.Close()
  1052  		key := make([]byte, 8)
  1053  		val := make([]byte, 16*1024*1024)
  1054  		b.Set(key, val, nil)
  1055  		require.NoError(t, db.Apply(b, Sync))
  1056  	}()
  1057  	db, err := Open("", opts)
  1058  	require.NoError(t, err)
  1059  	db.Close()
  1060  }
  1061  
  1062  func TestGetVersion(t *testing.T) {
  1063  	mem := vfs.NewMem()
  1064  	opts := &Options{
  1065  		FS: mem,
  1066  	}
  1067  	opts.testingRandomized(t)
  1068  
  1069  	// Case 1: No options file.
  1070  	version, err := GetVersion("", mem)
  1071  	require.NoError(t, err)
  1072  	require.Empty(t, version)
  1073  
  1074  	// Case 2: Pebble created file.
  1075  	db, err := Open("", opts)
  1076  	require.NoError(t, err)
  1077  	require.NoError(t, db.Close())
  1078  	version, err = GetVersion("", mem)
  1079  	require.NoError(t, err)
  1080  	require.Equal(t, "0.1", version)
  1081  
  1082  	// Case 3: Manually created OPTIONS file with a higher number.
  1083  	highestOptionsNum := FileNum(0)
  1084  	ls, err := mem.List("")
  1085  	require.NoError(t, err)
  1086  	for _, filename := range ls {
  1087  		ft, fn, ok := base.ParseFilename(mem, filename)
  1088  		if !ok {
  1089  			continue
  1090  		}
  1091  		switch ft {
  1092  		case fileTypeOptions:
  1093  			if fn.FileNum() > highestOptionsNum {
  1094  				highestOptionsNum = fn.FileNum()
  1095  			}
  1096  		}
  1097  	}
  1098  	f, _ := mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+1))
  1099  	_, err = f.Write([]byte("[Version]\n  pebble_version=0.2\n"))
  1100  	require.NoError(t, err)
  1101  	err = f.Close()
  1102  	require.NoError(t, err)
  1103  	version, err = GetVersion("", mem)
  1104  	require.NoError(t, err)
  1105  	require.Equal(t, "0.2", version)
  1106  
  1107  	// Case 4: Manually created OPTIONS file with a RocksDB number.
  1108  	f, _ = mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+2))
  1109  	_, err = f.Write([]byte("[Version]\n  rocksdb_version=6.2.1\n"))
  1110  	require.NoError(t, err)
  1111  	err = f.Close()
  1112  	require.NoError(t, err)
  1113  	version, err = GetVersion("", mem)
  1114  	require.NoError(t, err)
  1115  	require.Equal(t, "rocksdb v6.2.1", version)
  1116  }
  1117  
  1118  func TestRocksDBNoFlushManifest(t *testing.T) {
  1119  	mem := vfs.NewMem()
  1120  	// Have the comparer and merger names match what's in the testdata
  1121  	// directory.
  1122  	comparer := *DefaultComparer
  1123  	merger := *DefaultMerger
  1124  	comparer.Name = "cockroach_comparator"
  1125  	merger.Name = "cockroach_merge_operator"
  1126  	opts := &Options{
  1127  		FS:       mem,
  1128  		Comparer: &comparer,
  1129  		Merger:   &merger,
  1130  	}
  1131  
  1132  	// rocksdb-ingest-only is a RocksDB-generated db directory that has not had
  1133  	// a single flush yet, only ingestion operations. The manifest contains
  1134  	// a next-log-num but no log-num entry. Ensure that pebble can read these
  1135  	// directories without an issue.
  1136  	_, err := vfs.Clone(vfs.Default, mem, "testdata/rocksdb-ingest-only", "testdata")
  1137  	require.NoError(t, err)
  1138  
  1139  	db, err := Open("testdata", opts)
  1140  	require.NoError(t, err)
  1141  	defer db.Close()
  1142  
  1143  	val, closer, err := db.Get([]byte("ajulxeiombjiyw\x00\x00\x00\x00\x00\x00\x00\x01\x12\x09"))
  1144  	require.NoError(t, err)
  1145  	require.NotEmpty(t, val)
  1146  	require.NoError(t, closer.Close())
  1147  }
  1148  
  1149  func TestOpen_ErrorIfUnknownFormatVersion(t *testing.T) {
  1150  	fs := vfs.NewMem()
  1151  	d, err := Open("", &Options{
  1152  		FS:                 fs,
  1153  		FormatMajorVersion: FormatVersioned,
  1154  	})
  1155  	require.NoError(t, err)
  1156  	require.NoError(t, d.Close())
  1157  
  1158  	// Move the marker to a version that does not exist.
  1159  	m, _, err := atomicfs.LocateMarker(fs, "", formatVersionMarkerName)
  1160  	require.NoError(t, err)
  1161  	require.NoError(t, m.Move("999999"))
  1162  	require.NoError(t, m.Close())
  1163  
  1164  	_, err = Open("", &Options{
  1165  		FS:                 fs,
  1166  		FormatMajorVersion: FormatVersioned,
  1167  	})
  1168  	require.Error(t, err)
  1169  	require.EqualError(t, err, `pebble: database "" written in format major version 999999`)
  1170  }
  1171  
  1172  // ensureFilesClosed updates the provided Options to wrap the filesystem. It
  1173  // returns a closure that when invoked fails the test if any files opened by the
  1174  // filesystem are not closed.
  1175  //
  1176  // This function is intended to be used in tests with defer.
  1177  //
  1178  //	opts := &Options{FS: vfs.NewMem()}
  1179  //	defer ensureFilesClosed(t, opts)()
  1180  //	/* test code */
  1181  func ensureFilesClosed(t *testing.T, o *Options) func() {
  1182  	fs := &closeTrackingFS{
  1183  		FS:    o.FS,
  1184  		files: map[*closeTrackingFile]struct{}{},
  1185  	}
  1186  	o.FS = fs
  1187  	return func() {
  1188  		// fs.files should be empty if all the files were closed.
  1189  		for f := range fs.files {
  1190  			t.Errorf("An open file was never closed. Opened at:\n%s", f.stack)
  1191  		}
  1192  	}
  1193  }
  1194  
  1195  type closeTrackingFS struct {
  1196  	vfs.FS
  1197  	files map[*closeTrackingFile]struct{}
  1198  }
  1199  
  1200  func (fs *closeTrackingFS) wrap(file vfs.File, err error) (vfs.File, error) {
  1201  	if err != nil {
  1202  		return nil, err
  1203  	}
  1204  	f := &closeTrackingFile{
  1205  		File:  file,
  1206  		fs:    fs,
  1207  		stack: debug.Stack(),
  1208  	}
  1209  	fs.files[f] = struct{}{}
  1210  	return f, err
  1211  }
  1212  
  1213  func (fs *closeTrackingFS) Create(name string) (vfs.File, error) {
  1214  	return fs.wrap(fs.FS.Create(name))
  1215  }
  1216  
  1217  func (fs *closeTrackingFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) {
  1218  	return fs.wrap(fs.FS.Open(name))
  1219  }
  1220  
  1221  func (fs *closeTrackingFS) OpenDir(name string) (vfs.File, error) {
  1222  	return fs.wrap(fs.FS.OpenDir(name))
  1223  }
  1224  
  1225  func (fs *closeTrackingFS) ReuseForWrite(oldname, newname string) (vfs.File, error) {
  1226  	return fs.wrap(fs.FS.ReuseForWrite(oldname, newname))
  1227  }
  1228  
  1229  type closeTrackingFile struct {
  1230  	vfs.File
  1231  	fs    *closeTrackingFS
  1232  	stack []byte
  1233  }
  1234  
  1235  func (f *closeTrackingFile) Close() error {
  1236  	delete(f.fs.files, f)
  1237  	return f.File.Close()
  1238  }
  1239  
  1240  func TestCheckConsistency(t *testing.T) {
  1241  	const dir = "./test"
  1242  	mem := vfs.NewMem()
  1243  	mem.MkdirAll(dir, 0755)
  1244  
  1245  	provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, dir))
  1246  	require.NoError(t, err)
  1247  	defer provider.Close()
  1248  
  1249  	cmp := base.DefaultComparer.Compare
  1250  	fmtKey := base.DefaultComparer.FormatKey
  1251  	parseMeta := func(s string) (*manifest.FileMetadata, error) {
  1252  		if len(s) == 0 {
  1253  			return nil, nil
  1254  		}
  1255  		parts := strings.Split(s, ":")
  1256  		if len(parts) != 2 {
  1257  			return nil, errors.Errorf("malformed table spec: %q", s)
  1258  		}
  1259  		fileNum, err := strconv.Atoi(strings.TrimSpace(parts[0]))
  1260  		if err != nil {
  1261  			return nil, err
  1262  		}
  1263  		size, err := strconv.Atoi(strings.TrimSpace(parts[1]))
  1264  		if err != nil {
  1265  			return nil, err
  1266  		}
  1267  		m := &manifest.FileMetadata{
  1268  			FileNum: base.FileNum(fileNum),
  1269  			Size:    uint64(size),
  1270  		}
  1271  		m.InitPhysicalBacking()
  1272  		return m, nil
  1273  	}
  1274  
  1275  	datadriven.RunTest(t, "testdata/version_check_consistency",
  1276  		func(t *testing.T, d *datadriven.TestData) string {
  1277  			switch d.Cmd {
  1278  			case "check-consistency":
  1279  				var filesByLevel [manifest.NumLevels][]*manifest.FileMetadata
  1280  				var files *[]*manifest.FileMetadata
  1281  
  1282  				for _, data := range strings.Split(d.Input, "\n") {
  1283  					switch data {
  1284  					case "L0", "L1", "L2", "L3", "L4", "L5", "L6":
  1285  						level, err := strconv.Atoi(data[1:])
  1286  						if err != nil {
  1287  							return err.Error()
  1288  						}
  1289  						files = &filesByLevel[level]
  1290  
  1291  					default:
  1292  						m, err := parseMeta(data)
  1293  						if err != nil {
  1294  							return err.Error()
  1295  						}
  1296  						if m != nil {
  1297  							*files = append(*files, m)
  1298  						}
  1299  					}
  1300  				}
  1301  
  1302  				redactErr := false
  1303  				for _, arg := range d.CmdArgs {
  1304  					switch v := arg.String(); v {
  1305  					case "redact":
  1306  						redactErr = true
  1307  					default:
  1308  						return fmt.Sprintf("unknown argument: %q", v)
  1309  					}
  1310  				}
  1311  
  1312  				v := manifest.NewVersion(cmp, fmtKey, 0, filesByLevel)
  1313  				err := checkConsistency(v, dir, provider)
  1314  				if err != nil {
  1315  					if redactErr {
  1316  						redacted := redact.Sprint(err).Redact()
  1317  						return string(redacted)
  1318  					}
  1319  					return err.Error()
  1320  				}
  1321  				return "OK"
  1322  
  1323  			case "build":
  1324  				for _, data := range strings.Split(d.Input, "\n") {
  1325  					m, err := parseMeta(data)
  1326  					if err != nil {
  1327  						return err.Error()
  1328  					}
  1329  					path := base.MakeFilepath(mem, dir, base.FileTypeTable, m.FileBacking.DiskFileNum)
  1330  					_ = mem.Remove(path)
  1331  					f, err := mem.Create(path)
  1332  					if err != nil {
  1333  						return err.Error()
  1334  					}
  1335  					_, err = f.Write(make([]byte, m.Size))
  1336  					if err != nil {
  1337  						return err.Error()
  1338  					}
  1339  					f.Close()
  1340  				}
  1341  				return ""
  1342  
  1343  			default:
  1344  				return fmt.Sprintf("unknown command: %s", d.Cmd)
  1345  			}
  1346  		})
  1347  }
  1348  
  1349  func TestOpenRatchetsNextFileNum(t *testing.T) {
  1350  	mem := vfs.NewMem()
  1351  	memShared := remote.NewInMem()
  1352  
  1353  	opts := &Options{FS: mem}
  1354  	opts.Experimental.CreateOnShared = remote.CreateOnSharedAll
  1355  	opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{
  1356  		"": memShared,
  1357  	})
  1358  	d, err := Open("", opts)
  1359  	require.NoError(t, err)
  1360  	d.SetCreatorID(1)
  1361  
  1362  	require.NoError(t, d.Set([]byte("foo"), []byte("value"), nil))
  1363  	require.NoError(t, d.Set([]byte("bar"), []byte("value"), nil))
  1364  	require.NoError(t, d.Flush())
  1365  	require.NoError(t, d.Compact([]byte("a"), []byte("z"), false))
  1366  
  1367  	// Create a shared file with the newest file num and then close the db.
  1368  	d.mu.Lock()
  1369  	nextFileNum := d.mu.versions.getNextFileNum()
  1370  	w, _, err := d.objProvider.Create(context.TODO(), fileTypeTable, nextFileNum.DiskFileNum(), objstorage.CreateOptions{PreferSharedStorage: true})
  1371  	require.NoError(t, err)
  1372  	require.NoError(t, w.Write([]byte("foobar")))
  1373  	require.NoError(t, w.Finish())
  1374  	require.NoError(t, d.objProvider.Sync())
  1375  	d.mu.Unlock()
  1376  
  1377  	// Write one key and then close the db. This write will stay in the memtable,
  1378  	// forcing the reopen to do a compaction on open.
  1379  	require.NoError(t, d.Set([]byte("foo1"), []byte("value"), nil))
  1380  	require.NoError(t, d.Close())
  1381  
  1382  	// Reopen db. Compactions should happen without error.
  1383  	d, err = Open("", opts)
  1384  	require.NoError(t, err)
  1385  	require.NoError(t, d.Set([]byte("foo2"), []byte("value"), nil))
  1386  	require.NoError(t, d.Set([]byte("bar2"), []byte("value"), nil))
  1387  	require.NoError(t, d.Flush())
  1388  	require.NoError(t, d.Compact([]byte("a"), []byte("z"), false))
  1389  
  1390  }