github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/test.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package metamorphic
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"path"
    13  	"sort"
    14  	"strings"
    15  
    16  	"github.com/cockroachdb/errors"
    17  	"github.com/cockroachdb/pebble"
    18  	"github.com/cockroachdb/pebble/vfs"
    19  	"github.com/cockroachdb/pebble/vfs/errorfs"
    20  )
    21  
    22  type test struct {
    23  	// The list of ops to execute. The ops refer to slots in the batches, iters,
    24  	// and snapshots slices.
    25  	ops       []op
    26  	opsWaitOn [][]int         // op index -> op indexes
    27  	opsDone   []chan struct{} // op index -> done channel
    28  	idx       int
    29  	dir       string
    30  	opts      *pebble.Options
    31  	testOpts  *TestOptions
    32  	writeOpts *pebble.WriteOptions
    33  	tmpDir    string
    34  	// The DBs the test is run on.
    35  	dbs []*pebble.DB
    36  	// The slots for the batches, iterators, and snapshots. These are read and
    37  	// written by the ops to pass state from one op to another.
    38  	batches   []*pebble.Batch
    39  	iters     []*retryableIter
    40  	snapshots []readerCloser
    41  }
    42  
    43  func newTest(ops []op) *test {
    44  	return &test{
    45  		ops: ops,
    46  	}
    47  }
    48  
    49  func (t *test) init(h *history, dir string, testOpts *TestOptions, numInstances int) error {
    50  	t.dir = dir
    51  	t.testOpts = testOpts
    52  	t.writeOpts = pebble.NoSync
    53  	if testOpts.strictFS {
    54  		t.writeOpts = pebble.Sync
    55  	}
    56  	t.opts = testOpts.Opts.EnsureDefaults()
    57  	t.opts.Logger = h
    58  	lel := pebble.MakeLoggingEventListener(t.opts.Logger)
    59  	t.opts.EventListener = &lel
    60  	t.opts.DebugCheck = func(db *pebble.DB) error {
    61  		// Wrap the ordinary DebugCheckLevels with retrying
    62  		// of injected errors.
    63  		return withRetries(func() error {
    64  			return pebble.DebugCheckLevels(db)
    65  		})
    66  	}
    67  	if numInstances < 1 {
    68  		numInstances = 1
    69  	}
    70  
    71  	t.opsWaitOn, t.opsDone = computeSynchronizationPoints(t.ops)
    72  
    73  	defer t.opts.Cache.Unref()
    74  
    75  	// If an error occurs and we were using an in-memory FS, attempt to clone to
    76  	// on-disk in order to allow post-mortem debugging. Note that always using
    77  	// the on-disk FS isn't desirable because there is a large performance
    78  	// difference between in-memory and on-disk which causes different code paths
    79  	// and timings to be exercised.
    80  	maybeExit := func(err error) {
    81  		if err == nil || errors.Is(err, errorfs.ErrInjected) || errors.Is(err, pebble.ErrCancelledCompaction) {
    82  			return
    83  		}
    84  		t.maybeSaveData()
    85  		fmt.Fprintln(os.Stderr, err)
    86  		os.Exit(1)
    87  	}
    88  
    89  	// Exit early on any error from a background operation.
    90  	t.opts.EventListener.BackgroundError = func(err error) {
    91  		t.opts.Logger.Infof("background error: %s", err)
    92  		maybeExit(err)
    93  	}
    94  	t.opts.EventListener.CompactionEnd = func(info pebble.CompactionInfo) {
    95  		t.opts.Logger.Infof("%s", info)
    96  		maybeExit(info.Err)
    97  	}
    98  	t.opts.EventListener.FlushEnd = func(info pebble.FlushInfo) {
    99  		t.opts.Logger.Infof("%s", info)
   100  		if info.Err != nil && !strings.Contains(info.Err.Error(), "pebble: empty table") {
   101  			maybeExit(info.Err)
   102  		}
   103  	}
   104  	t.opts.EventListener.ManifestCreated = func(info pebble.ManifestCreateInfo) {
   105  		t.opts.Logger.Infof("%s", info)
   106  		maybeExit(info.Err)
   107  	}
   108  	t.opts.EventListener.ManifestDeleted = func(info pebble.ManifestDeleteInfo) {
   109  		t.opts.Logger.Infof("%s", info)
   110  		maybeExit(info.Err)
   111  	}
   112  	t.opts.EventListener.TableDeleted = func(info pebble.TableDeleteInfo) {
   113  		t.opts.Logger.Infof("%s", info)
   114  		maybeExit(info.Err)
   115  	}
   116  	t.opts.EventListener.TableIngested = func(info pebble.TableIngestInfo) {
   117  		t.opts.Logger.Infof("%s", info)
   118  		maybeExit(info.Err)
   119  	}
   120  	t.opts.EventListener.WALCreated = func(info pebble.WALCreateInfo) {
   121  		t.opts.Logger.Infof("%s", info)
   122  		maybeExit(info.Err)
   123  	}
   124  	t.opts.EventListener.WALDeleted = func(info pebble.WALDeleteInfo) {
   125  		t.opts.Logger.Infof("%s", info)
   126  		maybeExit(info.Err)
   127  	}
   128  
   129  	for i := range t.testOpts.CustomOpts {
   130  		if err := t.testOpts.CustomOpts[i].Open(t.opts); err != nil {
   131  			return err
   132  		}
   133  	}
   134  
   135  	t.dbs = make([]*pebble.DB, numInstances)
   136  	for i := range t.dbs {
   137  		var db *pebble.DB
   138  		var err error
   139  		if len(t.dbs) > 1 {
   140  			dir = path.Join(t.dir, fmt.Sprintf("db%d", i+1))
   141  		}
   142  		err = withRetries(func() error {
   143  			db, err = pebble.Open(dir, t.opts)
   144  			return err
   145  		})
   146  		if err != nil {
   147  			return err
   148  		}
   149  		t.dbs[i] = db
   150  		h.log.Printf("// db%d.Open() %v", i+1, err)
   151  
   152  		if t.testOpts.sharedStorageEnabled {
   153  			err = withRetries(func() error {
   154  				return db.SetCreatorID(uint64(i + 1))
   155  			})
   156  			if err != nil {
   157  				return err
   158  			}
   159  			h.log.Printf("// db%d.SetCreatorID() %v", i+1, err)
   160  		}
   161  	}
   162  
   163  	var err error
   164  	t.tmpDir = t.opts.FS.PathJoin(t.dir, "tmp")
   165  	if err = t.opts.FS.MkdirAll(t.tmpDir, 0755); err != nil {
   166  		return err
   167  	}
   168  	if t.testOpts.strictFS {
   169  		// Sync the whole directory path for the tmpDir, since restartDB() is executed during
   170  		// the test. That would reset MemFS to the synced state, which would make an unsynced
   171  		// directory disappear in the middle of the test. It is the responsibility of the test
   172  		// (not Pebble) to ensure that it can write the ssts that it will subsequently ingest
   173  		// into Pebble.
   174  		for {
   175  			f, err := t.opts.FS.OpenDir(dir)
   176  			if err != nil {
   177  				return err
   178  			}
   179  			if err = f.Sync(); err != nil {
   180  				return err
   181  			}
   182  			if err = f.Close(); err != nil {
   183  				return err
   184  			}
   185  			if len(dir) == 1 {
   186  				break
   187  			}
   188  			dir = t.opts.FS.PathDir(dir)
   189  			// TODO(sbhola): PathDir returns ".", which OpenDir() complains about. Fix.
   190  			if len(dir) == 1 {
   191  				dir = "/"
   192  			}
   193  		}
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  func (t *test) isFMV(dbID objID, fmv pebble.FormatMajorVersion) bool {
   200  	db := t.getDB(dbID)
   201  	return db.FormatMajorVersion() >= fmv
   202  }
   203  
   204  func (t *test) restartDB(dbID objID) error {
   205  	db := t.getDB(dbID)
   206  	if !t.testOpts.strictFS {
   207  		return nil
   208  	}
   209  	t.opts.Cache.Ref()
   210  	// The fs isn't necessarily a MemFS.
   211  	fs, ok := vfs.Root(t.opts.FS).(*vfs.MemFS)
   212  	if ok {
   213  		fs.SetIgnoreSyncs(true)
   214  	}
   215  	if err := db.Close(); err != nil {
   216  		return err
   217  	}
   218  	// Release any resources held by custom options. This may be used, for
   219  	// example, by the encryption-at-rest custom option (within the Cockroach
   220  	// repository) to close the file registry.
   221  	for i := range t.testOpts.CustomOpts {
   222  		if err := t.testOpts.CustomOpts[i].Close(t.opts); err != nil {
   223  			return err
   224  		}
   225  	}
   226  	if ok {
   227  		fs.ResetToSyncedState()
   228  		fs.SetIgnoreSyncs(false)
   229  	}
   230  
   231  	// TODO(jackson): Audit errorRate and ensure custom options' hooks semantics
   232  	// are well defined within the context of retries.
   233  	err := withRetries(func() (err error) {
   234  		// Reacquire any resources required by custom options. This may be used, for
   235  		// example, by the encryption-at-rest custom option (within the Cockroach
   236  		// repository) to reopen the file registry.
   237  		for i := range t.testOpts.CustomOpts {
   238  			if err := t.testOpts.CustomOpts[i].Open(t.opts); err != nil {
   239  				return err
   240  			}
   241  		}
   242  		dir := t.dir
   243  		if len(t.dbs) > 1 {
   244  			dir = path.Join(dir, fmt.Sprintf("db%d", dbID.slot()))
   245  		}
   246  		t.dbs[dbID.slot()-1], err = pebble.Open(dir, t.opts)
   247  		if err != nil {
   248  			return err
   249  		}
   250  		return err
   251  	})
   252  	t.opts.Cache.Unref()
   253  	return err
   254  }
   255  
   256  func (t *test) maybeSaveDataInternal() error {
   257  	rootFS := vfs.Root(t.opts.FS)
   258  	if rootFS == vfs.Default {
   259  		return nil
   260  	}
   261  	if err := os.RemoveAll(t.dir); err != nil {
   262  		return err
   263  	}
   264  	if _, err := vfs.Clone(rootFS, vfs.Default, t.dir, t.dir); err != nil {
   265  		return err
   266  	}
   267  	if t.testOpts.sharedStorageEnabled {
   268  		fs := t.testOpts.sharedStorageFS
   269  		outputDir := vfs.Default.PathJoin(t.dir, "shared", string(t.testOpts.Opts.Experimental.CreateOnSharedLocator))
   270  		vfs.Default.MkdirAll(outputDir, 0755)
   271  		objs, err := fs.List("", "")
   272  		if err != nil {
   273  			return err
   274  		}
   275  		for i := range objs {
   276  			reader, readSize, err := fs.ReadObject(context.TODO(), objs[i])
   277  			if err != nil {
   278  				return err
   279  			}
   280  			buf := make([]byte, readSize)
   281  			if err := reader.ReadAt(context.TODO(), buf, 0); err != nil {
   282  				return err
   283  			}
   284  			outputPath := vfs.Default.PathJoin(outputDir, objs[i])
   285  			outputFile, err := vfs.Default.Create(outputPath)
   286  			if err != nil {
   287  				return err
   288  			}
   289  			if _, err := outputFile.Write(buf); err != nil {
   290  				outputFile.Close()
   291  				return err
   292  			}
   293  			if err := outputFile.Close(); err != nil {
   294  				return err
   295  			}
   296  		}
   297  	}
   298  	return nil
   299  }
   300  
   301  // If an in-memory FS is being used, save the contents to disk.
   302  func (t *test) maybeSaveData() {
   303  	if err := t.maybeSaveDataInternal(); err != nil {
   304  		t.opts.Logger.Infof("unable to save data: %s: %v", t.dir, err)
   305  	}
   306  }
   307  
   308  func (t *test) step(h *history) bool {
   309  	if t.idx >= len(t.ops) {
   310  		return false
   311  	}
   312  	t.ops[t.idx].run(t, h.recorder(-1 /* thread */, t.idx))
   313  	t.idx++
   314  	return true
   315  }
   316  
   317  func (t *test) setBatch(id objID, b *pebble.Batch) {
   318  	if id.tag() != batchTag {
   319  		panic(fmt.Sprintf("invalid batch ID: %s", id))
   320  	}
   321  	t.batches[id.slot()] = b
   322  }
   323  
   324  func (t *test) setIter(id objID, i *pebble.Iterator) {
   325  	if id.tag() != iterTag {
   326  		panic(fmt.Sprintf("invalid iter ID: %s", id))
   327  	}
   328  	t.iters[id.slot()] = &retryableIter{
   329  		iter:    i,
   330  		lastKey: nil,
   331  	}
   332  }
   333  
   334  type readerCloser interface {
   335  	pebble.Reader
   336  	io.Closer
   337  }
   338  
   339  func (t *test) setSnapshot(id objID, s readerCloser) {
   340  	if id.tag() != snapTag {
   341  		panic(fmt.Sprintf("invalid snapshot ID: %s", id))
   342  	}
   343  	t.snapshots[id.slot()] = s
   344  }
   345  
   346  func (t *test) clearObj(id objID) {
   347  	switch id.tag() {
   348  	case dbTag:
   349  		t.dbs[id.slot()-1] = nil
   350  	case batchTag:
   351  		t.batches[id.slot()] = nil
   352  	case iterTag:
   353  		t.iters[id.slot()] = nil
   354  	case snapTag:
   355  		t.snapshots[id.slot()] = nil
   356  	}
   357  }
   358  
   359  func (t *test) getBatch(id objID) *pebble.Batch {
   360  	if id.tag() != batchTag {
   361  		panic(fmt.Sprintf("invalid batch ID: %s", id))
   362  	}
   363  	return t.batches[id.slot()]
   364  }
   365  
   366  func (t *test) getCloser(id objID) io.Closer {
   367  	switch id.tag() {
   368  	case dbTag:
   369  		return t.dbs[id.slot()-1]
   370  	case batchTag:
   371  		return t.batches[id.slot()]
   372  	case iterTag:
   373  		return t.iters[id.slot()]
   374  	case snapTag:
   375  		return t.snapshots[id.slot()]
   376  	}
   377  	panic(fmt.Sprintf("cannot close ID: %s", id))
   378  }
   379  
   380  func (t *test) getIter(id objID) *retryableIter {
   381  	if id.tag() != iterTag {
   382  		panic(fmt.Sprintf("invalid iter ID: %s", id))
   383  	}
   384  	return t.iters[id.slot()]
   385  }
   386  
   387  func (t *test) getReader(id objID) pebble.Reader {
   388  	switch id.tag() {
   389  	case dbTag:
   390  		return t.dbs[id.slot()-1]
   391  	case batchTag:
   392  		return t.batches[id.slot()]
   393  	case snapTag:
   394  		return t.snapshots[id.slot()]
   395  	}
   396  	panic(fmt.Sprintf("invalid reader ID: %s", id))
   397  }
   398  
   399  func (t *test) getWriter(id objID) pebble.Writer {
   400  	switch id.tag() {
   401  	case dbTag:
   402  		return t.dbs[id.slot()-1]
   403  	case batchTag:
   404  		return t.batches[id.slot()]
   405  	}
   406  	panic(fmt.Sprintf("invalid writer ID: %s", id))
   407  }
   408  
   409  func (t *test) getDB(id objID) *pebble.DB {
   410  	switch id.tag() {
   411  	case dbTag:
   412  		return t.dbs[id.slot()-1]
   413  	default:
   414  		panic(fmt.Sprintf("invalid writer tag: %v", id.tag()))
   415  	}
   416  }
   417  
   418  // Compute the synchronization points between operations. When operating
   419  // with more than 1 thread, operations must synchronize access to shared
   420  // objects. Compute two slices the same length as ops.
   421  //
   422  // opsWaitOn: the value v at index i indicates that operation i must wait
   423  // for the operation at index v to finish before it may run. NB: v < i
   424  //
   425  // opsDone: the channel at index i must be closed when the operation at index i
   426  // completes. This slice is sparse. Operations that are never used as
   427  // synchronization points may have a nil channel.
   428  func computeSynchronizationPoints(ops []op) (opsWaitOn [][]int, opsDone []chan struct{}) {
   429  	opsDone = make([]chan struct{}, len(ops)) // operation index -> done channel
   430  	opsWaitOn = make([][]int, len(ops))       // operation index -> operation index
   431  	lastOpReference := make(map[objID]int)    // objID -> operation index
   432  	for i, o := range ops {
   433  		// Find the last operation that involved the same receiver object. We at
   434  		// least need to wait on that operation.
   435  		receiver := o.receiver()
   436  		waitIndex, ok := lastOpReference[receiver]
   437  		lastOpReference[receiver] = i
   438  		if !ok {
   439  			// Only valid for i=0. For all other operations, the receiver should
   440  			// have been referenced by some other operation before it's used as
   441  			// a receiver.
   442  			if i != 0 && receiver.tag() != dbTag {
   443  				panic(fmt.Sprintf("op %s on receiver %s; first reference of %s", ops[i].String(), receiver, receiver))
   444  			}
   445  			// The initOp is a little special. We do want to store the objects it's
   446  			// syncing on, in `lastOpReference`.
   447  			if i != 0 {
   448  				continue
   449  			}
   450  		}
   451  
   452  		// The last operation that referenced `receiver` is the one at index
   453  		// `waitIndex`. All operations with the same receiver are performed on
   454  		// the same thread. We only need to synchronize on the operation at
   455  		// `waitIndex` if `receiver` isn't also the receiver on that operation
   456  		// too.
   457  		if ops[waitIndex].receiver() != receiver {
   458  			opsWaitOn[i] = append(opsWaitOn[i], waitIndex)
   459  		}
   460  
   461  		// In additional to synchronizing on the operation's receiver operation,
   462  		// we may need to synchronize on additional objects. For example,
   463  		// batch0.Commit() must synchronize its receiver, batch0, but also on
   464  		// the DB since it mutates database state.
   465  		for _, syncObjID := range o.syncObjs() {
   466  			if vi, vok := lastOpReference[syncObjID]; vok {
   467  				opsWaitOn[i] = append(opsWaitOn[i], vi)
   468  			}
   469  			lastOpReference[syncObjID] = i
   470  		}
   471  
   472  		waitIndexes := opsWaitOn[i]
   473  		sort.Ints(waitIndexes)
   474  		for _, waitIndex := range waitIndexes {
   475  			// If this is the first operation that must wait on the operation at
   476  			// `waitIndex`, then there will be no channel for the operation yet.
   477  			// Create one.
   478  			if opsDone[waitIndex] == nil {
   479  				opsDone[waitIndex] = make(chan struct{})
   480  			}
   481  		}
   482  	}
   483  	return opsWaitOn, opsDone
   484  }