github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/open_test.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  	"reflect"
    15  	"runtime/debug"
    16  	"sort"
    17  	"strconv"
    18  	"strings"
    19  	"sync/atomic"
    20  	"syscall"
    21  	"testing"
    22  
    23  	"github.com/cockroachdb/errors/oserror"
    24  	"github.com/kr/pretty"
    25  	"github.com/stretchr/testify/require"
    26  	"github.com/zuoyebang/bitalostable/internal/base"
    27  	"github.com/zuoyebang/bitalostable/internal/cache"
    28  	"github.com/zuoyebang/bitalostable/internal/errorfs"
    29  	"github.com/zuoyebang/bitalostable/vfs"
    30  	"github.com/zuoyebang/bitalostable/vfs/atomicfs"
    31  )
    32  
    33  func TestOpenSharedTableCache(t *testing.T) {
    34  	c := cache.New(cacheDefaultSize)
    35  	tc := NewTableCache(c, 16, 100)
    36  	defer tc.Unref()
    37  	defer c.Unref()
    38  
    39  	d0, err := Open("", testingRandomized(&Options{
    40  		FS:         vfs.NewMem(),
    41  		Cache:      c,
    42  		TableCache: tc,
    43  	}))
    44  	if err != nil {
    45  		t.Errorf("d0 Open: %s", err.Error())
    46  	}
    47  	defer d0.Close()
    48  
    49  	d1, err := Open("", testingRandomized(&Options{
    50  		FS:         vfs.NewMem(),
    51  		Cache:      c,
    52  		TableCache: tc,
    53  	}))
    54  	if err != nil {
    55  		t.Errorf("d1 Open: %s", err.Error())
    56  	}
    57  	defer d1.Close()
    58  
    59  	// Make sure that the Open function is using the passed in table cache
    60  	// when the TableCache option is set.
    61  	require.Equalf(
    62  		t, d0.tableCache.tableCache, d1.tableCache.tableCache,
    63  		"expected tableCache for both d0 and d1 to be the same",
    64  	)
    65  }
    66  
    67  func TestErrorIfExists(t *testing.T) {
    68  	for _, b := range [...]bool{false, true} {
    69  		t.Run(fmt.Sprintf("%t", b), func(t *testing.T) {
    70  			mem := vfs.NewMem()
    71  			d0, err := Open("", testingRandomized(&Options{
    72  				FS: mem,
    73  			}))
    74  			if err != nil {
    75  				t.Errorf("b=%v: d0 Open: %v", b, err)
    76  				return
    77  			}
    78  			if err := d0.Close(); err != nil {
    79  				t.Errorf("b=%v: d0 Close: %v", b, err)
    80  				return
    81  			}
    82  
    83  			opts := testingRandomized(&Options{
    84  				FS:            mem,
    85  				ErrorIfExists: b,
    86  			})
    87  			defer ensureFilesClosed(t, opts)()
    88  			d1, err := Open("", opts)
    89  			if d1 != nil {
    90  				defer d1.Close()
    91  			}
    92  			if got := err != nil; got != b {
    93  				t.Errorf("b=%v: d1 Open: err is %v, got (err != nil) is %v, want %v", b, err, got, b)
    94  				return
    95  			}
    96  		})
    97  	}
    98  }
    99  
   100  func TestErrorIfNotExists(t *testing.T) {
   101  	t.Run("does-not-exist", func(t *testing.T) {
   102  		opts := testingRandomized(&Options{
   103  			FS:               vfs.NewMem(),
   104  			ErrorIfNotExists: true,
   105  		})
   106  		defer ensureFilesClosed(t, opts)()
   107  
   108  		_, err := Open("", opts)
   109  		if err == nil {
   110  			t.Fatalf("expected error, but found success")
   111  		} else if !strings.HasSuffix(err.Error(), oserror.ErrNotExist.Error()) {
   112  			t.Fatalf("expected not exists, but found %q", err)
   113  		}
   114  	})
   115  
   116  	t.Run("does-exist", func(t *testing.T) {
   117  		opts := testingRandomized(&Options{
   118  			FS:               vfs.NewMem(),
   119  			ErrorIfNotExists: false,
   120  		})
   121  		defer ensureFilesClosed(t, opts)()
   122  
   123  		// Create the DB and try again.
   124  		d, err := Open("", opts)
   125  		require.NoError(t, err)
   126  		require.NoError(t, d.Close())
   127  
   128  		opts.ErrorIfNotExists = true
   129  		// The DB exists, so the setting of ErrorIfNotExists is a no-op.
   130  		d, err = Open("", opts)
   131  		require.NoError(t, err)
   132  		require.NoError(t, d.Close())
   133  	})
   134  }
   135  
   136  func TestNewDBFilenames(t *testing.T) {
   137  	versions := map[FormatMajorVersion][]string{
   138  		FormatMostCompatible: {
   139  			"000002.log",
   140  			"CURRENT",
   141  			"LOCK",
   142  			"MANIFEST-000001",
   143  			"OPTIONS-000003",
   144  		},
   145  		FormatNewest: {
   146  			"000002.log",
   147  			"CURRENT",
   148  			"LOCK",
   149  			"MANIFEST-000001",
   150  			"OPTIONS-000003",
   151  			"marker.format-version.000010.011",
   152  			"marker.manifest.000001.MANIFEST-000001",
   153  		},
   154  	}
   155  
   156  	for formatVers, want := range versions {
   157  		t.Run(fmt.Sprintf("vers=%s", formatVers), func(t *testing.T) {
   158  			mem := vfs.NewMem()
   159  			fooBar := mem.PathJoin("foo", "bar")
   160  			d, err := Open(fooBar, &Options{
   161  				FS:                 mem,
   162  				FormatMajorVersion: formatVers,
   163  			})
   164  			if err != nil {
   165  				t.Fatalf("Open: %v", err)
   166  			}
   167  			if err := d.Close(); err != nil {
   168  				t.Fatalf("Close: %v", err)
   169  			}
   170  			got, err := mem.List(fooBar)
   171  			if err != nil {
   172  				t.Fatalf("List: %v", err)
   173  			}
   174  			sort.Strings(got)
   175  			if !reflect.DeepEqual(got, want) {
   176  				t.Errorf("\ngot  %v\nwant %v", got, want)
   177  			}
   178  		})
   179  	}
   180  }
   181  
   182  func testOpenCloseOpenClose(t *testing.T, fs vfs.FS, root string) {
   183  	opts := testingRandomized(&Options{FS: fs})
   184  
   185  	for _, startFromEmpty := range []bool{false, true} {
   186  		for _, walDirname := range []string{"", "wal"} {
   187  			for _, length := range []int{-1, 0, 1, 1000, 10000, 100000} {
   188  				dirname := "sharedDatabase" + walDirname
   189  				if startFromEmpty {
   190  					dirname = "startFromEmpty" + walDirname + strconv.Itoa(length)
   191  				}
   192  				dirname = fs.PathJoin(root, dirname)
   193  				if walDirname == "" {
   194  					opts.WALDir = ""
   195  				} else {
   196  					opts.WALDir = fs.PathJoin(dirname, walDirname)
   197  				}
   198  
   199  				got, xxx := []byte(nil), ""
   200  				if length >= 0 {
   201  					xxx = strings.Repeat("x", length)
   202  				}
   203  
   204  				d0, err := Open(dirname, opts)
   205  				if err != nil {
   206  					t.Fatalf("sfe=%t, length=%d: Open #0: %v",
   207  						startFromEmpty, length, err)
   208  					continue
   209  				}
   210  				if length >= 0 {
   211  					err = d0.Set([]byte("key"), []byte(xxx), nil)
   212  					if err != nil {
   213  						t.Errorf("sfe=%t, length=%d: Set: %v",
   214  							startFromEmpty, length, err)
   215  						continue
   216  					}
   217  				}
   218  				err = d0.Close()
   219  				if err != nil {
   220  					t.Errorf("sfe=%t, length=%d: Close #0: %v",
   221  						startFromEmpty, length, err)
   222  					continue
   223  				}
   224  
   225  				d1, err := Open(dirname, opts)
   226  				if err != nil {
   227  					t.Errorf("sfe=%t, length=%d: Open #1: %v",
   228  						startFromEmpty, length, err)
   229  					continue
   230  				}
   231  				if length >= 0 {
   232  					var closer io.Closer
   233  					got, closer, err = d1.Get([]byte("key"))
   234  					if err != nil {
   235  						t.Errorf("sfe=%t, length=%d: Get: %v",
   236  							startFromEmpty, length, err)
   237  						continue
   238  					}
   239  					got = append([]byte(nil), got...)
   240  					closer.Close()
   241  				}
   242  				err = d1.Close()
   243  				if err != nil {
   244  					t.Errorf("sfe=%t, length=%d: Close #1: %v",
   245  						startFromEmpty, length, err)
   246  					continue
   247  				}
   248  
   249  				if length >= 0 && string(got) != xxx {
   250  					t.Errorf("sfe=%t, length=%d: got value differs from set value",
   251  						startFromEmpty, length)
   252  					continue
   253  				}
   254  
   255  				{
   256  					got, err := opts.FS.List(dirname)
   257  					if err != nil {
   258  						t.Fatalf("List: %v", err)
   259  					}
   260  					var optionsCount int
   261  					for _, s := range got {
   262  						if t, _, ok := base.ParseFilename(opts.FS, s); ok && t == fileTypeOptions {
   263  							optionsCount++
   264  						}
   265  					}
   266  					if optionsCount != 1 {
   267  						t.Fatalf("expected 1 OPTIONS file, but found %d", optionsCount)
   268  					}
   269  				}
   270  			}
   271  		}
   272  	}
   273  }
   274  
   275  func TestOpenCloseOpenClose(t *testing.T) {
   276  	for _, fstype := range []string{"disk", "mem"} {
   277  		t.Run(fstype, func(t *testing.T) {
   278  			var fs vfs.FS
   279  			var dir string
   280  			switch fstype {
   281  			case "disk":
   282  				var err error
   283  				dir, err = ioutil.TempDir("", "open-close")
   284  				require.NoError(t, err)
   285  				defer func() {
   286  					_ = os.RemoveAll(dir)
   287  				}()
   288  				fs = vfs.Default
   289  			case "mem":
   290  				dir = ""
   291  				fs = vfs.NewMem()
   292  			}
   293  			testOpenCloseOpenClose(t, fs, dir)
   294  		})
   295  	}
   296  }
   297  
   298  func TestOpenOptionsCheck(t *testing.T) {
   299  	mem := vfs.NewMem()
   300  	opts := &Options{FS: mem}
   301  
   302  	d, err := Open("", opts)
   303  	require.NoError(t, err)
   304  	require.NoError(t, d.Close())
   305  
   306  	opts = &Options{
   307  		Comparer: &Comparer{Name: "foo"},
   308  		FS:       mem,
   309  	}
   310  	_, err = Open("", opts)
   311  	require.Regexp(t, `comparer name from file.*!=.*`, err)
   312  
   313  	opts = &Options{
   314  		Merger: &Merger{Name: "bar"},
   315  		FS:     mem,
   316  	}
   317  	_, err = Open("", opts)
   318  	require.Regexp(t, `merger name from file.*!=.*`, err)
   319  }
   320  
   321  func TestOpenCrashWritingOptions(t *testing.T) {
   322  	memFS := vfs.NewMem()
   323  
   324  	d, err := Open("", &Options{FS: memFS})
   325  	require.NoError(t, err)
   326  	require.NoError(t, d.Close())
   327  
   328  	// Open the database again, this time with a mocked filesystem that
   329  	// will only succeed in partially writing the OPTIONS file.
   330  	fs := optionsTornWriteFS{FS: memFS}
   331  	_, err = Open("", &Options{FS: fs})
   332  	require.Error(t, err)
   333  
   334  	// Re-opening the database must succeed.
   335  	d, err = Open("", &Options{FS: memFS})
   336  	require.NoError(t, err)
   337  	require.NoError(t, d.Close())
   338  }
   339  
   340  type optionsTornWriteFS struct {
   341  	vfs.FS
   342  }
   343  
   344  func (fs optionsTornWriteFS) Create(name string) (vfs.File, error) {
   345  	file, err := fs.FS.Create(name)
   346  	if file != nil {
   347  		file = optionsTornWriteFile{File: file}
   348  	}
   349  	return file, err
   350  }
   351  
   352  type optionsTornWriteFile struct {
   353  	vfs.File
   354  }
   355  
   356  func (f optionsTornWriteFile) Write(b []byte) (int, error) {
   357  	// Look for the OPTIONS-XXXXXX file's `comparer=` field.
   358  	comparerKey := []byte("comparer=")
   359  	i := bytes.Index(b, comparerKey)
   360  	if i == -1 {
   361  		return f.File.Write(b)
   362  	}
   363  	// Write only the contents through `comparer=` and return an error.
   364  	n, _ := f.File.Write(b[:i+len(comparerKey)])
   365  	return n, syscall.EIO
   366  }
   367  
   368  func TestOpenReadOnly(t *testing.T) {
   369  	mem := vfs.NewMem()
   370  
   371  	{
   372  		// Opening a non-existent DB in read-only mode should result in no mutable
   373  		// filesystem operations.
   374  		var buf syncedBuffer
   375  		_, err := Open("non-existent", testingRandomized(&Options{
   376  			FS:       loggingFS{mem, &buf},
   377  			ReadOnly: true,
   378  			WALDir:   "non-existent-waldir",
   379  		}))
   380  		if err == nil {
   381  			t.Fatalf("expected error, but found success")
   382  		}
   383  		const expected = `open-dir: non-existent`
   384  		if trimmed := strings.TrimSpace(buf.String()); expected != trimmed {
   385  			t.Fatalf("expected %q, but found %q", expected, trimmed)
   386  		}
   387  	}
   388  
   389  	{
   390  		// Opening a DB with a non-existent WAL dir in read-only mode should result
   391  		// in no mutable filesystem operations other than the LOCK.
   392  		var buf syncedBuffer
   393  		_, err := Open("", testingRandomized(&Options{
   394  			FS:       loggingFS{mem, &buf},
   395  			ReadOnly: true,
   396  			WALDir:   "non-existent-waldir",
   397  		}))
   398  		if err == nil {
   399  			t.Fatalf("expected error, but found success")
   400  		}
   401  		const expected = "open-dir: \nopen-dir: non-existent-waldir\nclose:"
   402  		if trimmed := strings.TrimSpace(buf.String()); expected != trimmed {
   403  			t.Fatalf("expected %q, but found %q", expected, trimmed)
   404  		}
   405  	}
   406  
   407  	var contents []string
   408  	{
   409  		// Create a new DB and populate it with a small amount of data.
   410  		d, err := Open("", testingRandomized(&Options{
   411  			FS: mem,
   412  		}))
   413  		require.NoError(t, err)
   414  		require.NoError(t, d.Set([]byte("test"), nil, nil))
   415  		require.NoError(t, d.Close())
   416  		contents, err = mem.List("")
   417  		require.NoError(t, err)
   418  		sort.Strings(contents)
   419  	}
   420  
   421  	{
   422  		// Re-open the DB read-only. The directory contents should be unchanged.
   423  		d, err := Open("", testingRandomized(&Options{
   424  			FS:       mem,
   425  			ReadOnly: true,
   426  		}))
   427  		require.NoError(t, err)
   428  
   429  		// Verify various write operations fail in read-only mode.
   430  		require.EqualValues(t, ErrReadOnly, d.Compact(nil, []byte("\xff"), false))
   431  		require.EqualValues(t, ErrReadOnly, d.Flush())
   432  		require.EqualValues(t, ErrReadOnly, func() error { _, err := d.AsyncFlush(); return err }())
   433  
   434  		require.EqualValues(t, ErrReadOnly, d.Delete(nil, nil))
   435  		require.EqualValues(t, ErrReadOnly, d.DeleteRange(nil, nil, nil))
   436  		require.EqualValues(t, ErrReadOnly, d.Ingest(nil))
   437  		require.EqualValues(t, ErrReadOnly, d.LogData(nil, nil))
   438  		require.EqualValues(t, ErrReadOnly, d.Merge(nil, nil, nil))
   439  		require.EqualValues(t, ErrReadOnly, d.Set(nil, nil, nil))
   440  
   441  		// Verify we can still read in read-only mode.
   442  		require.NoError(t, func() error {
   443  			_, closer, err := d.Get([]byte("test"))
   444  			if closer != nil {
   445  				closer.Close()
   446  			}
   447  			return err
   448  		}())
   449  
   450  		checkIter := func(iter *Iterator) {
   451  			t.Helper()
   452  
   453  			var keys []string
   454  			for valid := iter.First(); valid; valid = iter.Next() {
   455  				keys = append(keys, string(iter.Key()))
   456  			}
   457  			require.NoError(t, iter.Close())
   458  			expectedKeys := []string{"test"}
   459  			if diff := pretty.Diff(keys, expectedKeys); diff != nil {
   460  				t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys)
   461  			}
   462  		}
   463  
   464  		checkIter(d.NewIter(nil))
   465  
   466  		b := d.NewIndexedBatch()
   467  		checkIter(b.NewIter(nil))
   468  		require.EqualValues(t, ErrReadOnly, b.Commit(nil))
   469  		require.EqualValues(t, ErrReadOnly, d.Apply(b, nil))
   470  
   471  		s := d.NewSnapshot()
   472  		checkIter(s.NewIter(nil))
   473  		require.NoError(t, s.Close())
   474  
   475  		require.NoError(t, d.Close())
   476  
   477  		newContents, err := mem.List("")
   478  		require.NoError(t, err)
   479  
   480  		sort.Strings(newContents)
   481  		if diff := pretty.Diff(contents, newContents); diff != nil {
   482  			t.Fatalf("%s", strings.Join(diff, "\n"))
   483  		}
   484  	}
   485  }
   486  
   487  func TestOpenWALReplay(t *testing.T) {
   488  	largeValue := []byte(strings.Repeat("a", 100<<10))
   489  	hugeValue := []byte(strings.Repeat("b", 10<<20))
   490  	checkIter := func(iter *Iterator) {
   491  		t.Helper()
   492  
   493  		var keys []string
   494  		for valid := iter.First(); valid; valid = iter.Next() {
   495  			keys = append(keys, string(iter.Key()))
   496  		}
   497  		require.NoError(t, iter.Close())
   498  		expectedKeys := []string{"1", "2", "3", "4", "5"}
   499  		if diff := pretty.Diff(keys, expectedKeys); diff != nil {
   500  			t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys)
   501  		}
   502  	}
   503  
   504  	for _, readOnly := range []bool{false, true} {
   505  		t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) {
   506  			// Create a new DB and populate it with some data.
   507  			const dir = ""
   508  			mem := vfs.NewMem()
   509  			d, err := Open(dir, testingRandomized(&Options{
   510  				FS:           mem,
   511  				MemTableSize: 32 << 20,
   512  			}))
   513  			require.NoError(t, err)
   514  			// All these values will fit in a single memtable, so on closing the db there
   515  			// will be no sst and all the data is in a single WAL.
   516  			require.NoError(t, d.Set([]byte("1"), largeValue, nil))
   517  			require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   518  			require.NoError(t, d.Set([]byte("3"), largeValue, nil))
   519  			require.NoError(t, d.Set([]byte("4"), hugeValue, nil))
   520  			require.NoError(t, d.Set([]byte("5"), largeValue, nil))
   521  			checkIter(d.NewIter(nil))
   522  			require.NoError(t, d.Close())
   523  			files, err := mem.List(dir)
   524  			require.NoError(t, err)
   525  			sort.Strings(files)
   526  			logCount, sstCount := 0, 0
   527  			for _, fname := range files {
   528  				if strings.HasSuffix(fname, ".sst") {
   529  					sstCount++
   530  				}
   531  				if strings.HasSuffix(fname, ".log") {
   532  					logCount++
   533  				}
   534  			}
   535  			require.Equal(t, 0, sstCount)
   536  			// The memtable size starts at 256KB and doubles up to 32MB so we expect 5
   537  			// logs (one for each doubling).
   538  			require.Equal(t, 7, logCount)
   539  
   540  			// Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable;
   541  			// value for 3 will go in the next memtable; value for 4 will be in a flushable batch
   542  			// which will cause the previous memtable to be flushed; value for 5 will go in the next
   543  			// memtable
   544  			d, err = Open(dir, testingRandomized(&Options{
   545  				FS:           mem,
   546  				MemTableSize: 300 << 10,
   547  				ReadOnly:     readOnly,
   548  			}))
   549  			require.NoError(t, err)
   550  
   551  			if readOnly {
   552  				m := d.Metrics()
   553  				require.Equal(t, int64(logCount), m.WAL.Files)
   554  				d.mu.Lock()
   555  				require.NotNil(t, d.mu.mem.mutable)
   556  				d.mu.Unlock()
   557  			}
   558  			checkIter(d.NewIter(nil))
   559  			require.NoError(t, d.Close())
   560  		})
   561  	}
   562  }
   563  
   564  // Similar to TestOpenWALReplay, except we test replay behavior after a
   565  // memtable has been flushed. We test all 3 reasons for flushing: forced, size,
   566  // and large-batch.
   567  func TestOpenWALReplay2(t *testing.T) {
   568  	for _, readOnly := range []bool{false, true} {
   569  		t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) {
   570  			for _, reason := range []string{"forced", "size", "large-batch"} {
   571  				t.Run(reason, func(t *testing.T) {
   572  					mem := vfs.NewMem()
   573  					d, err := Open("", testingRandomized(&Options{
   574  						FS:           mem,
   575  						MemTableSize: 256 << 10,
   576  					}))
   577  					require.NoError(t, err)
   578  
   579  					switch reason {
   580  					case "forced":
   581  						require.NoError(t, d.Set([]byte("1"), nil, nil))
   582  						require.NoError(t, d.Flush())
   583  						require.NoError(t, d.Set([]byte("2"), nil, nil))
   584  					case "size":
   585  						largeValue := []byte(strings.Repeat("a", 100<<10))
   586  						require.NoError(t, d.Set([]byte("1"), largeValue, nil))
   587  						require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   588  						require.NoError(t, d.Set([]byte("3"), largeValue, nil))
   589  					case "large-batch":
   590  						largeValue := []byte(strings.Repeat("a", d.largeBatchThreshold))
   591  						require.NoError(t, d.Set([]byte("1"), nil, nil))
   592  						require.NoError(t, d.Set([]byte("2"), largeValue, nil))
   593  						require.NoError(t, d.Set([]byte("3"), nil, nil))
   594  					}
   595  					require.NoError(t, d.Close())
   596  
   597  					files, err := mem.List("")
   598  					require.NoError(t, err)
   599  					sort.Strings(files)
   600  					sstCount := 0
   601  					for _, fname := range files {
   602  						if strings.HasSuffix(fname, ".sst") {
   603  							sstCount++
   604  						}
   605  					}
   606  					require.Equal(t, 1, sstCount)
   607  
   608  					// Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable;
   609  					// value for 3 will go in the next memtable; value for 4 will be in a flushable batch
   610  					// which will cause the previous memtable to be flushed; value for 5 will go in the next
   611  					// memtable
   612  					d, err = Open("", testingRandomized(&Options{
   613  						FS:           mem,
   614  						MemTableSize: 300 << 10,
   615  						ReadOnly:     readOnly,
   616  					}))
   617  					require.NoError(t, err)
   618  					require.NoError(t, d.Close())
   619  				})
   620  			}
   621  		})
   622  	}
   623  }
   624  
   625  // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
   626  // WALs is corrupted with an sstable checksum error. Replay must stop at the
   627  // first WAL because otherwise we may violate point-in-time recovery
   628  // semantics. See #864.
   629  func TestTwoWALReplayCorrupt(t *testing.T) {
   630  	// Use the real filesystem so that we can seek and overwrite WAL data
   631  	// easily.
   632  	dir, err := ioutil.TempDir("", "wal-replay")
   633  	require.NoError(t, err)
   634  	defer os.RemoveAll(dir)
   635  
   636  	d, err := Open(dir, testingRandomized(&Options{
   637  		MemTableStopWritesThreshold: 4,
   638  		MemTableSize:                2048,
   639  	}))
   640  	require.NoError(t, err)
   641  	d.mu.Lock()
   642  	d.mu.compact.flushing = true
   643  	d.mu.Unlock()
   644  	require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
   645  	require.NoError(t, d.Set([]byte("2"), nil, nil))
   646  	d.mu.Lock()
   647  	d.mu.compact.flushing = false
   648  	d.mu.Unlock()
   649  	require.NoError(t, d.Close())
   650  
   651  	// We should have two WALs.
   652  	var logs []string
   653  	ls, err := vfs.Default.List(dir)
   654  	require.NoError(t, err)
   655  	for _, name := range ls {
   656  		if filepath.Ext(name) == ".log" {
   657  			logs = append(logs, name)
   658  		}
   659  	}
   660  	sort.Strings(logs)
   661  	if len(logs) < 2 {
   662  		t.Fatalf("expected at least two log files, found %d", len(logs))
   663  	}
   664  
   665  	// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
   666  	// of the file.
   667  	f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
   668  	require.NoError(t, err)
   669  	off, err := f.Seek(-100, 2)
   670  	require.NoError(t, err)
   671  	_, err = f.Write([]byte{0, 0, 0, 0})
   672  	require.NoError(t, err)
   673  	require.NoError(t, f.Close())
   674  	t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)
   675  
   676  	// Re-opening the database should detect and report the corruption.
   677  	_, err = Open(dir, nil)
   678  	require.Error(t, err, "bitalostable: corruption")
   679  }
   680  
   681  // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
   682  // WALs is corrupted with an sstable checksum error and the OPTIONS file does
   683  // not enable the private strict_wal_tail option, indicating that the WAL was
   684  // produced by a database that did not guarantee clean WAL tails. See #864.
   685  func TestTwoWALReplayPermissive(t *testing.T) {
   686  	// Use the real filesystem so that we can seek and overwrite WAL data
   687  	// easily.
   688  	dir, err := ioutil.TempDir("", "wal-replay")
   689  	require.NoError(t, err)
   690  	defer os.RemoveAll(dir)
   691  
   692  	opts := &Options{
   693  		MemTableStopWritesThreshold: 4,
   694  		MemTableSize:                2048,
   695  	}
   696  	opts.testingRandomized()
   697  	opts.EnsureDefaults()
   698  	d, err := Open(dir, opts)
   699  	require.NoError(t, err)
   700  	d.mu.Lock()
   701  	d.mu.compact.flushing = true
   702  	d.mu.Unlock()
   703  	require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
   704  	require.NoError(t, d.Set([]byte("2"), nil, nil))
   705  	d.mu.Lock()
   706  	d.mu.compact.flushing = false
   707  	d.mu.Unlock()
   708  	require.NoError(t, d.Close())
   709  
   710  	// We should have two WALs.
   711  	var logs []string
   712  	var optionFilename string
   713  	ls, err := vfs.Default.List(dir)
   714  	require.NoError(t, err)
   715  	for _, name := range ls {
   716  		if filepath.Ext(name) == ".log" {
   717  			logs = append(logs, name)
   718  		}
   719  		if strings.HasPrefix(filepath.Base(name), "OPTIONS") {
   720  			optionFilename = name
   721  		}
   722  	}
   723  	sort.Strings(logs)
   724  	if len(logs) < 2 {
   725  		t.Fatalf("expected at least two log files, found %d", len(logs))
   726  	}
   727  
   728  	// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
   729  	// of the file.
   730  	f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
   731  	require.NoError(t, err)
   732  	off, err := f.Seek(-100, 2)
   733  	require.NoError(t, err)
   734  	_, err = f.Write([]byte{0, 0, 0, 0})
   735  	require.NoError(t, err)
   736  	require.NoError(t, f.Close())
   737  	t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)
   738  
   739  	// Remove the OPTIONS file containing the strict_wal_tail option.
   740  	require.NoError(t, vfs.Default.Remove(filepath.Join(dir, optionFilename)))
   741  
   742  	// Re-opening the database should not report the corruption.
   743  	d, err = Open(dir, nil)
   744  	require.NoError(t, err)
   745  	require.NoError(t, d.Close())
   746  }
   747  
   748  // TestCrashOpenCrashAfterWALCreation tests a database that exits
   749  // ungracefully, begins recovery, creates the new WAL but promptly exits
   750  // ungracefully again.
   751  //
   752  // This sequence has the potential to be problematic with the strict_wal_tail
   753  // behavior because the first crash's WAL has an unclean tail. By the time the
   754  // new WAL is created, the current manifest's MinUnflushedLogNum must be
   755  // higher than the previous WAL.
   756  func TestCrashOpenCrashAfterWALCreation(t *testing.T) {
   757  	fs := vfs.NewStrictMem()
   758  
   759  	getLogs := func() (logs []string) {
   760  		ls, err := fs.List("")
   761  		require.NoError(t, err)
   762  		for _, name := range ls {
   763  			if filepath.Ext(name) == ".log" {
   764  				logs = append(logs, name)
   765  			}
   766  		}
   767  		return logs
   768  	}
   769  
   770  	{
   771  		d, err := Open("", testingRandomized(&Options{FS: fs}))
   772  		require.NoError(t, err)
   773  		require.NoError(t, d.Set([]byte("abc"), nil, Sync))
   774  
   775  		// Ignore syncs during close to simulate a crash. This will leave the WAL
   776  		// without an EOF trailer. It won't be an 'unclean tail' yet since the
   777  		// log file was not recycled, but we'll fix that down below.
   778  		fs.SetIgnoreSyncs(true)
   779  		require.NoError(t, d.Close())
   780  		fs.ResetToSyncedState()
   781  		fs.SetIgnoreSyncs(false)
   782  	}
   783  
   784  	// There should be one WAL.
   785  	logs := getLogs()
   786  	if len(logs) != 1 {
   787  		t.Fatalf("expected one log file, found %d", len(logs))
   788  	}
   789  
   790  	// The one WAL file doesn't have an EOF trailer, but since it wasn't
   791  	// recycled it won't have garbage at the end. Rewrite it so that it has
   792  	// the same contents it currently has, followed by garbage.
   793  	{
   794  		f, err := fs.Open(logs[0])
   795  		require.NoError(t, err)
   796  		b, err := ioutil.ReadAll(f)
   797  		require.NoError(t, err)
   798  		require.NoError(t, f.Close())
   799  		f, err = fs.Create(logs[0])
   800  		require.NoError(t, err)
   801  		_, err = f.Write(b)
   802  		require.NoError(t, err)
   803  		_, err = f.Write([]byte{0xde, 0xad, 0xbe, 0xef})
   804  		require.NoError(t, err)
   805  		require.NoError(t, f.Sync())
   806  		require.NoError(t, f.Close())
   807  		dir, err := fs.OpenDir("")
   808  		require.NoError(t, err)
   809  		require.NoError(t, dir.Sync())
   810  		require.NoError(t, dir.Close())
   811  	}
   812  
   813  	// Open the database again (with syncs respected again). Wrap the
   814  	// filesystem with an errorfs that will turn off syncs after a new .log
   815  	// file is created and after a subsequent directory sync occurs. This
   816  	// simulates a crash after the new log file is created and synced.
   817  	{
   818  		var atomicWALCreated, atomicDirSynced uint32
   819  		d, err := Open("", &Options{
   820  			FS: errorfs.Wrap(fs, errorfs.InjectorFunc(func(op errorfs.Op, path string) error {
   821  				if atomic.LoadUint32(&atomicDirSynced) == 1 {
   822  					fs.SetIgnoreSyncs(true)
   823  				}
   824  				if op == errorfs.OpCreate && filepath.Ext(path) == ".log" {
   825  					atomic.StoreUint32(&atomicWALCreated, 1)
   826  				}
   827  				// Record when there's a sync of the data directory after the
   828  				// WAL was created. The data directory will have an empty
   829  				// path because that's what we passed into Open.
   830  				if op == errorfs.OpFileSync && path == "" && atomic.LoadUint32(&atomicWALCreated) == 1 {
   831  					atomic.StoreUint32(&atomicDirSynced, 1)
   832  				}
   833  				return nil
   834  			})),
   835  		})
   836  		require.NoError(t, err)
   837  		require.NoError(t, d.Close())
   838  	}
   839  
   840  	fs.ResetToSyncedState()
   841  	fs.SetIgnoreSyncs(false)
   842  
   843  	if n := len(getLogs()); n != 2 {
   844  		t.Fatalf("expected two logs, found %d\n", n)
   845  	}
   846  
   847  	// Finally, open the database with syncs enabled.
   848  	d, err := Open("", testingRandomized(&Options{FS: fs}))
   849  	require.NoError(t, err)
   850  	require.NoError(t, d.Close())
   851  }
   852  
   853  // TestOpenWALReplayReadOnlySeqNums tests opening a database:
   854  //   - in read-only mode
   855  //   - with multiple unflushed log files that must replayed
   856  //   - a MANIFEST that sets the last sequence number to a number greater than
   857  //     the unflushed log files
   858  //
   859  // See cockroachdb/cockroach#48660.
   860  func TestOpenWALReplayReadOnlySeqNums(t *testing.T) {
   861  	const root = ""
   862  	mem := vfs.NewMem()
   863  
   864  	copyFiles := func(srcDir, dstDir string) {
   865  		files, err := mem.List(srcDir)
   866  		require.NoError(t, err)
   867  		for _, f := range files {
   868  			require.NoError(t, vfs.Copy(mem, mem.PathJoin(srcDir, f), mem.PathJoin(dstDir, f)))
   869  		}
   870  	}
   871  
   872  	// Create a new database under `/original` with a couple sstables.
   873  	dir := mem.PathJoin(root, "original")
   874  	d, err := Open(dir, testingRandomized(&Options{FS: mem}))
   875  	require.NoError(t, err)
   876  	require.NoError(t, d.Set([]byte("a"), nil, nil))
   877  	require.NoError(t, d.Flush())
   878  	require.NoError(t, d.Set([]byte("a"), nil, nil))
   879  	require.NoError(t, d.Flush())
   880  
   881  	// Prevent flushes so that multiple unflushed log files build up.
   882  	d.mu.Lock()
   883  	d.mu.compact.flushing = true
   884  	d.mu.Unlock()
   885  
   886  	require.NoError(t, d.Set([]byte("b"), nil, nil))
   887  	d.AsyncFlush()
   888  	require.NoError(t, d.Set([]byte("c"), nil, nil))
   889  	d.AsyncFlush()
   890  	require.NoError(t, d.Set([]byte("e"), nil, nil))
   891  
   892  	// Manually compact some of the key space so that the latest `logSeqNum` is
   893  	// written to the MANIFEST. This produces a MANIFEST where the `logSeqNum`
   894  	// is greater than the sequence numbers contained in the
   895  	// `minUnflushedLogNum` log file
   896  	require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false))
   897  	d.mu.Lock()
   898  	for d.mu.compact.compactingCount > 0 {
   899  		d.mu.compact.cond.Wait()
   900  	}
   901  	d.mu.Unlock()
   902  
   903  	// While the MANIFEST is still in this state, copy all the files in the
   904  	// database to a new directory.
   905  	replayDir := mem.PathJoin(root, "replay")
   906  	require.NoError(t, mem.MkdirAll(replayDir, os.ModePerm))
   907  	copyFiles(dir, replayDir)
   908  
   909  	d.mu.Lock()
   910  	d.mu.compact.flushing = false
   911  	d.mu.Unlock()
   912  	require.NoError(t, d.Close())
   913  
   914  	// Open the copy of the database in read-only mode. Since we copied all
   915  	// the files before the flushes were allowed to complete, there should be
   916  	// multiple unflushed log files that need to replay. Since the manual
   917  	// compaction completed, the `logSeqNum` read from the manifest should be
   918  	// greater than the unflushed log files' sequence numbers.
   919  	d, err = Open(replayDir, testingRandomized(&Options{
   920  		FS:       mem,
   921  		ReadOnly: true,
   922  	}))
   923  	require.NoError(t, err)
   924  	require.NoError(t, d.Close())
   925  }
   926  
   927  func TestOpenWALReplayMemtableGrowth(t *testing.T) {
   928  	mem := vfs.NewMem()
   929  	const memTableSize = 64 * 1024 * 1024
   930  	opts := &Options{
   931  		MemTableSize: memTableSize,
   932  		FS:           mem,
   933  	}
   934  	opts.testingRandomized()
   935  	func() {
   936  		db, err := Open("", opts)
   937  		require.NoError(t, err)
   938  		defer db.Close()
   939  		b := db.NewBatch()
   940  		defer b.Close()
   941  		key := make([]byte, 8)
   942  		val := make([]byte, 16*1024*1024)
   943  		b.Set(key, val, nil)
   944  		require.NoError(t, db.Apply(b, Sync))
   945  	}()
   946  	db, err := Open("", opts)
   947  	require.NoError(t, err)
   948  	db.Close()
   949  }
   950  
   951  func TestGetVersion(t *testing.T) {
   952  	mem := vfs.NewMem()
   953  	opts := &Options{
   954  		FS: mem,
   955  	}
   956  	opts.testingRandomized()
   957  
   958  	// Case 1: No options file.
   959  	version, err := GetVersion("", mem)
   960  	require.NoError(t, err)
   961  	require.Empty(t, version)
   962  
   963  	// Case 2: Pebble created file.
   964  	db, err := Open("", opts)
   965  	require.NoError(t, err)
   966  	require.NoError(t, db.Close())
   967  	version, err = GetVersion("", mem)
   968  	require.NoError(t, err)
   969  	require.Equal(t, "0.1", version)
   970  
   971  	// Case 3: Manually created OPTIONS file with a higher number.
   972  	highestOptionsNum := FileNum(0)
   973  	ls, err := mem.List("")
   974  	require.NoError(t, err)
   975  	for _, filename := range ls {
   976  		ft, fn, ok := base.ParseFilename(mem, filename)
   977  		if !ok {
   978  			continue
   979  		}
   980  		switch ft {
   981  		case fileTypeOptions:
   982  			if fn > highestOptionsNum {
   983  				highestOptionsNum = fn
   984  			}
   985  		}
   986  	}
   987  	f, _ := mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+1))
   988  	_, err = f.Write([]byte("[Version]\n  bitalostable_version=0.2\n"))
   989  	require.NoError(t, err)
   990  	err = f.Close()
   991  	require.NoError(t, err)
   992  	version, err = GetVersion("", mem)
   993  	require.NoError(t, err)
   994  	require.Equal(t, "0.2", version)
   995  
   996  	// Case 4: Manually created OPTIONS file with a RocksDB number.
   997  	f, _ = mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+2))
   998  	_, err = f.Write([]byte("[Version]\n  rocksdb_version=6.2.1\n"))
   999  	require.NoError(t, err)
  1000  	err = f.Close()
  1001  	require.NoError(t, err)
  1002  	version, err = GetVersion("", mem)
  1003  	require.NoError(t, err)
  1004  	require.Equal(t, "rocksdb v6.2.1", version)
  1005  }
  1006  
  1007  func TestRocksDBNoFlushManifest(t *testing.T) {
  1008  	mem := vfs.NewMem()
  1009  	// Have the comparer and merger names match what's in the testdata
  1010  	// directory.
  1011  	comparer := *DefaultComparer
  1012  	merger := *DefaultMerger
  1013  	comparer.Name = "cockroach_comparator"
  1014  	merger.Name = "cockroach_merge_operator"
  1015  	opts := &Options{
  1016  		FS:       mem,
  1017  		Comparer: &comparer,
  1018  		Merger:   &merger,
  1019  	}
  1020  
  1021  	// rocksdb-ingest-only is a RocksDB-generated db directory that has not had
  1022  	// a single flush yet, only ingestion operations. The manifest contains
  1023  	// a next-log-num but no log-num entry. Ensure that bitalostable can read these
  1024  	// directories without an issue.
  1025  	_, err := vfs.Clone(vfs.Default, mem, "testdata/rocksdb-ingest-only", "testdata")
  1026  	require.NoError(t, err)
  1027  
  1028  	db, err := Open("testdata", opts)
  1029  	require.NoError(t, err)
  1030  	defer db.Close()
  1031  
  1032  	val, closer, err := db.Get([]byte("ajulxeiombjiyw\x00\x00\x00\x00\x00\x00\x00\x01\x12\x09"))
  1033  	require.NoError(t, err)
  1034  	require.NotEmpty(t, val)
  1035  	require.NoError(t, closer.Close())
  1036  }
  1037  
  1038  func TestOpen_ErrorIfUnknownFormatVersion(t *testing.T) {
  1039  	fs := vfs.NewMem()
  1040  	d, err := Open("", &Options{
  1041  		FS:                 fs,
  1042  		FormatMajorVersion: FormatVersioned,
  1043  	})
  1044  	require.NoError(t, err)
  1045  	require.NoError(t, d.Close())
  1046  
  1047  	// Move the marker to a version that does not exist.
  1048  	m, _, err := atomicfs.LocateMarker(fs, "", formatVersionMarkerName)
  1049  	require.NoError(t, err)
  1050  	require.NoError(t, m.Move("999999"))
  1051  	require.NoError(t, m.Close())
  1052  
  1053  	_, err = Open("", &Options{
  1054  		FS:                 fs,
  1055  		FormatMajorVersion: FormatVersioned,
  1056  	})
  1057  	require.Error(t, err)
  1058  	require.EqualError(t, err, `bitalostable: database "" written in format major version 999999`)
  1059  }
  1060  
  1061  // ensureFilesClosed updates the provided Options to wrap the filesystem. It
  1062  // returns a closure that when invoked fails the test if any files opened by the
  1063  // filesystem are not closed.
  1064  //
  1065  // This function is intended to be used in tests with defer.
  1066  //
  1067  //	opts := &Options{FS: vfs.NewMem()}
  1068  //	defer ensureFilesClosed(t, opts)()
  1069  //	/* test code */
  1070  func ensureFilesClosed(t *testing.T, o *Options) func() {
  1071  	fs := &closeTrackingFS{
  1072  		FS:    o.FS,
  1073  		files: map[*closeTrackingFile]struct{}{},
  1074  	}
  1075  	o.FS = fs
  1076  	return func() {
  1077  		// fs.files should be empty if all the files were closed.
  1078  		for f := range fs.files {
  1079  			t.Errorf("An open file was never closed. Opened at:\n%s", f.stack)
  1080  		}
  1081  	}
  1082  }
  1083  
  1084  type closeTrackingFS struct {
  1085  	vfs.FS
  1086  	files map[*closeTrackingFile]struct{}
  1087  }
  1088  
  1089  func (fs *closeTrackingFS) wrap(file vfs.File, err error) (vfs.File, error) {
  1090  	if err != nil {
  1091  		return nil, err
  1092  	}
  1093  	f := &closeTrackingFile{
  1094  		File:  file,
  1095  		fs:    fs,
  1096  		stack: debug.Stack(),
  1097  	}
  1098  	fs.files[f] = struct{}{}
  1099  	return f, err
  1100  }
  1101  
  1102  func (fs *closeTrackingFS) Create(name string) (vfs.File, error) {
  1103  	return fs.wrap(fs.FS.Create(name))
  1104  }
  1105  
  1106  func (fs *closeTrackingFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) {
  1107  	return fs.wrap(fs.FS.Open(name))
  1108  }
  1109  
  1110  func (fs *closeTrackingFS) OpenDir(name string) (vfs.File, error) {
  1111  	return fs.wrap(fs.FS.OpenDir(name))
  1112  }
  1113  
  1114  func (fs *closeTrackingFS) ReuseForWrite(oldname, newname string) (vfs.File, error) {
  1115  	return fs.wrap(fs.FS.ReuseForWrite(oldname, newname))
  1116  }
  1117  
  1118  type closeTrackingFile struct {
  1119  	vfs.File
  1120  	fs    *closeTrackingFS
  1121  	stack []byte
  1122  }
  1123  
  1124  func (f *closeTrackingFile) Close() error {
  1125  	delete(f.fs.files, f)
  1126  	return f.File.Close()
  1127  }