github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/checkpoint.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"os"
     9  
    10  	"github.com/cockroachdb/errors/oserror"
    11  	"github.com/zuoyebang/bitalostable/internal/base"
    12  	"github.com/zuoyebang/bitalostable/vfs"
    13  	"github.com/zuoyebang/bitalostable/vfs/atomicfs"
    14  )
    15  
    16  // checkpointOptions hold the optional parameters to construct checkpoint
    17  // snapshots.
    18  type checkpointOptions struct {
    19  	// flushWAL set to true will force a flush and sync of the WAL prior to
    20  	// checkpointing.
    21  	flushWAL bool
    22  }
    23  
    24  // CheckpointOption set optional parameters used by `DB.Checkpoint`.
    25  type CheckpointOption func(*checkpointOptions)
    26  
    27  // WithFlushedWAL enables flushing and syncing the WAL prior to constructing a
    28  // checkpoint. This guarantees that any writes committed before calling
    29  // DB.Checkpoint will be part of that checkpoint.
    30  //
    31  // Note that this setting can only be useful in cases when some writes are
    32  // performed with Sync = false. Otherwise, the guarantee will already be met.
    33  //
    34  // Passing this option is functionally equivalent to calling
    35  // DB.LogData(nil, Sync) right before DB.Checkpoint.
    36  func WithFlushedWAL() CheckpointOption {
    37  	return func(opt *checkpointOptions) {
    38  		opt.flushWAL = true
    39  	}
    40  }
    41  
    42  // mkdirAllAndSyncParents creates destDir and any of its missing parents.
    43  // Those missing parents, as well as the closest existing ancestor, are synced.
    44  // Returns a handle to the directory created at destDir.
    45  func mkdirAllAndSyncParents(fs vfs.FS, destDir string) (vfs.File, error) {
    46  	// Collect paths for all directories between destDir (excluded) and its
    47  	// closest existing ancestor (included).
    48  	var parentPaths []string
    49  	foundExistingAncestor := false
    50  	for parentPath := fs.PathDir(destDir); parentPath != "."; parentPath = fs.PathDir(parentPath) {
    51  		parentPaths = append(parentPaths, parentPath)
    52  		_, err := fs.Stat(parentPath)
    53  		if err == nil {
    54  			// Exit loop at the closest existing ancestor.
    55  			foundExistingAncestor = true
    56  			break
    57  		}
    58  		if !oserror.IsNotExist(err) {
    59  			return nil, err
    60  		}
    61  	}
    62  	// Handle empty filesystem edge case.
    63  	if !foundExistingAncestor {
    64  		parentPaths = append(parentPaths, "")
    65  	}
    66  	// Create destDir and any of its missing parents.
    67  	if err := fs.MkdirAll(destDir, 0755); err != nil {
    68  		return nil, err
    69  	}
    70  	// Sync all the parent directories up to the closest existing ancestor,
    71  	// included.
    72  	for _, parentPath := range parentPaths {
    73  		parentDir, err := fs.OpenDir(parentPath)
    74  		if err != nil {
    75  			return nil, err
    76  		}
    77  		err = parentDir.Sync()
    78  		if err != nil {
    79  			_ = parentDir.Close()
    80  			return nil, err
    81  		}
    82  		err = parentDir.Close()
    83  		if err != nil {
    84  			return nil, err
    85  		}
    86  	}
    87  	return fs.OpenDir(destDir)
    88  }
    89  
    90  // Checkpoint constructs a snapshot of the DB instance in the specified
    91  // directory. The WAL, MANIFEST, OPTIONS, and sstables will be copied into the
    92  // snapshot. Hard links will be used when possible. Beware of the significant
    93  // space overhead for a checkpoint if hard links are disabled. Also beware that
    94  // even if hard links are used, the space overhead for the checkpoint will
    95  // increase over time as the DB performs compactions.
    96  func (d *DB) Checkpoint(
    97  	destDir string, opts ...CheckpointOption,
    98  ) (
    99  	ckErr error, /* used in deferred cleanup */
   100  ) {
   101  	opt := &checkpointOptions{}
   102  	for _, fn := range opts {
   103  		fn(opt)
   104  	}
   105  
   106  	if _, err := d.opts.FS.Stat(destDir); !oserror.IsNotExist(err) {
   107  		if err == nil {
   108  			return &os.PathError{
   109  				Op:   "checkpoint",
   110  				Path: destDir,
   111  				Err:  oserror.ErrExist,
   112  			}
   113  		}
   114  		return err
   115  	}
   116  
   117  	if opt.flushWAL && !d.opts.DisableWAL {
   118  		// Write an empty log-data record to flush and sync the WAL.
   119  		if err := d.LogData(nil /* data */, Sync); err != nil {
   120  			return err
   121  		}
   122  	}
   123  
   124  	// Disable file deletions.
   125  	d.mu.Lock()
   126  	d.disableFileDeletions()
   127  	defer func() {
   128  		d.mu.Lock()
   129  		defer d.mu.Unlock()
   130  		d.enableFileDeletions()
   131  	}()
   132  
   133  	// TODO(peter): RocksDB provides the option to roll the manifest if the
   134  	// MANIFEST size is too large. Should we do this too?
   135  
   136  	// Lock the manifest before getting the current version. We need the
   137  	// length of the manifest that we read to match the current version that
   138  	// we read, otherwise we might copy a versionEdit not reflected in the
   139  	// sstables we copy/link.
   140  	d.mu.versions.logLock()
   141  	// Get the unflushed log files, the current version, and the current manifest
   142  	// file number.
   143  	memQueue := d.mu.mem.queue
   144  	current := d.mu.versions.currentVersion()
   145  	formatVers := d.mu.formatVers.vers
   146  	manifestFileNum := d.mu.versions.manifestFileNum
   147  	manifestSize := d.mu.versions.manifest.Size()
   148  	optionsFileNum := d.optionsFileNum
   149  
   150  	// Release the manifest and DB.mu so we don't block other operations on
   151  	// the database.
   152  	d.mu.versions.logUnlock()
   153  	d.mu.Unlock()
   154  
   155  	// Wrap the normal filesystem with one which wraps newly created files with
   156  	// vfs.NewSyncingFile.
   157  	fs := syncingFS{
   158  		FS: d.opts.FS,
   159  		syncOpts: vfs.SyncingFileOptions{
   160  			NoSyncOnClose: d.opts.NoSyncOnClose,
   161  			BytesPerSync:  d.opts.BytesPerSync,
   162  		},
   163  	}
   164  
   165  	// Create the dir and its parents (if necessary), and sync them.
   166  	var dir vfs.File
   167  	defer func() {
   168  		if dir != nil {
   169  			_ = dir.Close()
   170  		}
   171  		if ckErr != nil {
   172  			// Attempt to cleanup on error.
   173  			paths, _ := fs.List(destDir)
   174  			for _, path := range paths {
   175  				_ = fs.Remove(path)
   176  			}
   177  			_ = fs.Remove(destDir)
   178  		}
   179  	}()
   180  	dir, ckErr = mkdirAllAndSyncParents(fs, destDir)
   181  	if ckErr != nil {
   182  		return ckErr
   183  	}
   184  
   185  	{
   186  		// Link or copy the OPTIONS.
   187  		srcPath := base.MakeFilepath(fs, d.dirname, fileTypeOptions, optionsFileNum)
   188  		destPath := fs.PathJoin(destDir, fs.PathBase(srcPath))
   189  		ckErr = vfs.LinkOrCopy(fs, srcPath, destPath)
   190  		if ckErr != nil {
   191  			return ckErr
   192  		}
   193  	}
   194  
   195  	{
   196  		// Set the format major version in the destination directory.
   197  		var versionMarker *atomicfs.Marker
   198  		versionMarker, _, ckErr = atomicfs.LocateMarker(fs, destDir, formatVersionMarkerName)
   199  		if ckErr != nil {
   200  			return ckErr
   201  		}
   202  
   203  		// We use the marker to encode the active format version in the
   204  		// marker filename. Unlike other uses of the atomic marker,
   205  		// there is no file with the filename `formatVers.String()` on
   206  		// the filesystem.
   207  		ckErr = versionMarker.Move(formatVers.String())
   208  		if ckErr != nil {
   209  			return ckErr
   210  		}
   211  		ckErr = versionMarker.Close()
   212  		if ckErr != nil {
   213  			return ckErr
   214  		}
   215  	}
   216  
   217  	{
   218  		// Copy the MANIFEST, and create a pointer to it. We copy rather
   219  		// than link because additional version edits added to the
   220  		// MANIFEST after we took our snapshot of the sstables will
   221  		// reference sstables that aren't in our checkpoint. For a
   222  		// similar reason, we need to limit how much of the MANIFEST we
   223  		// copy.
   224  		srcPath := base.MakeFilepath(fs, d.dirname, fileTypeManifest, manifestFileNum)
   225  		destPath := fs.PathJoin(destDir, fs.PathBase(srcPath))
   226  		ckErr = vfs.LimitedCopy(fs, srcPath, destPath, manifestSize)
   227  		if ckErr != nil {
   228  			return ckErr
   229  		}
   230  
   231  		// Recent format versions use an atomic marker for setting the
   232  		// active manifest. Older versions use the CURRENT file. The
   233  		// setCurrentFunc function will return a closure that will
   234  		// take the appropriate action for the database's format
   235  		// version.
   236  		var manifestMarker *atomicfs.Marker
   237  		manifestMarker, _, ckErr = atomicfs.LocateMarker(fs, destDir, manifestMarkerName)
   238  		if ckErr != nil {
   239  			return ckErr
   240  		}
   241  		ckErr = setCurrentFunc(formatVers, manifestMarker, fs, destDir, dir)(manifestFileNum)
   242  		if ckErr != nil {
   243  			return ckErr
   244  		}
   245  		ckErr = manifestMarker.Close()
   246  		if ckErr != nil {
   247  			return ckErr
   248  		}
   249  	}
   250  
   251  	// Link or copy the sstables.
   252  	for l := range current.Levels {
   253  		iter := current.Levels[l].Iter()
   254  		for f := iter.First(); f != nil; f = iter.Next() {
   255  			srcPath := base.MakeFilepath(fs, d.dirname, fileTypeTable, f.FileNum)
   256  			destPath := fs.PathJoin(destDir, fs.PathBase(srcPath))
   257  			ckErr = vfs.LinkOrCopy(fs, srcPath, destPath)
   258  			if ckErr != nil {
   259  				return ckErr
   260  			}
   261  		}
   262  	}
   263  
   264  	// Copy the WAL files. We copy rather than link because WAL file recycling
   265  	// will cause the WAL files to be reused which would invalidate the
   266  	// checkpoint.
   267  	for i := range memQueue {
   268  		logNum := memQueue[i].logNum
   269  		if logNum == 0 {
   270  			continue
   271  		}
   272  		srcPath := base.MakeFilepath(fs, d.walDirname, fileTypeLog, logNum)
   273  		destPath := fs.PathJoin(destDir, fs.PathBase(srcPath))
   274  		ckErr = vfs.Copy(fs, srcPath, destPath)
   275  		if ckErr != nil {
   276  			return ckErr
   277  		}
   278  	}
   279  
   280  	// Sync and close the checkpoint directory.
   281  	ckErr = dir.Sync()
   282  	if ckErr != nil {
   283  		return ckErr
   284  	}
   285  	ckErr = dir.Close()
   286  	dir = nil
   287  	return ckErr
   288  }