github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/vfs/vfs.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package vfs
     6  
     7  import (
     8  	"io"
     9  	"os"
    10  	"path/filepath"
    11  	"syscall"
    12  
    13  	"github.com/cockroachdb/errors"
    14  	"github.com/cockroachdb/errors/oserror"
    15  )
    16  
    17  // File is a readable, writable sequence of bytes.
    18  //
    19  // Typically, it will be an *os.File, but test code may choose to substitute
    20  // memory-backed implementations.
    21  //
    22  // Write-oriented operations (Write, Sync) must be called sequentially: At most
    23  // 1 call to Write or Sync may be executed at any given time.
    24  type File interface {
    25  	io.Closer
    26  	io.Reader
    27  	io.ReaderAt
    28  	// Unlike the specification for io.Writer.Write(), the vfs.File.Write()
    29  	// method *is* allowed to modify the slice passed in, whether temporarily
    30  	// or permanently. Callers of Write() need to take this into account.
    31  	io.Writer
    32  	// WriteAt() is only supported for files that were opened with FS.OpenReadWrite.
    33  	io.WriterAt
    34  
    35  	// Preallocate optionally preallocates storage for `length` at `offset`
    36  	// within the file. Implementations may choose to do nothing.
    37  	Preallocate(offset, length int64) error
    38  	Stat() (os.FileInfo, error)
    39  	Sync() error
    40  
    41  	// SyncTo requests that a prefix of the file's data be synced to stable
    42  	// storage. The caller passes provides a `length`, indicating how many bytes
    43  	// to sync from the beginning of the file. SyncTo is a no-op for
    44  	// directories, and therefore always returns false.
    45  	//
    46  	// SyncTo returns a fullSync return value, indicating one of two possible
    47  	// outcomes.
    48  	//
    49  	// If fullSync is false, the first `length` bytes of the file was queued to
    50  	// be synced to stable storage. The syncing of the file prefix may happen
    51  	// asynchronously. No persistence guarantee is provided.
    52  	//
    53  	// If fullSync is true, the entirety of the file's contents were
    54  	// synchronously synced to stable storage, and a persistence guarantee is
    55  	// provided. In this outcome, any modified metadata for the file is not
    56  	// guaranteed to be synced unless that metadata is needed in order to allow
    57  	// a subsequent data retrieval to be correctly handled.
    58  	SyncTo(length int64) (fullSync bool, err error)
    59  
    60  	// SyncData requires that all written data be persisted. File metadata is
    61  	// not required to be synced. Unsophisticated implementations may call Sync.
    62  	SyncData() error
    63  
    64  	// Prefetch signals the OS (on supported platforms) to fetch the next length
    65  	// bytes in file (as returned by os.File.Fd()) after offset into cache. Any
    66  	// subsequent reads in that range will not issue disk IO.
    67  	Prefetch(offset int64, length int64) error
    68  
    69  	// Fd returns the raw file descriptor when a File is backed by an *os.File.
    70  	// It can be used for specific functionality like Prefetch.
    71  	// Returns InvalidFd if not supported.
    72  	Fd() uintptr
    73  }
    74  
    75  // InvalidFd is a special value returned by File.Fd() when the file is not
    76  // backed by an OS descriptor.
    77  // Note: the special value is consistent with what os.File implementation
    78  // returns on a nil receiver.
    79  const InvalidFd uintptr = ^(uintptr(0))
    80  
    81  // OpenOption provide an interface to do work on file handles in the Open()
    82  // call.
    83  type OpenOption interface {
    84  	// Apply is called on the file handle after it's opened.
    85  	Apply(File)
    86  }
    87  
    88  // FS is a namespace for files.
    89  //
    90  // The names are filepath names: they may be / separated or \ separated,
    91  // depending on the underlying operating system.
    92  type FS interface {
    93  	// Create creates the named file for reading and writing. If a file
    94  	// already exists at the provided name, it's removed first ensuring the
    95  	// resulting file descriptor points to a new inode.
    96  	Create(name string) (File, error)
    97  
    98  	// Link creates newname as a hard link to the oldname file.
    99  	Link(oldname, newname string) error
   100  
   101  	// Open opens the named file for reading. openOptions provides
   102  	Open(name string, opts ...OpenOption) (File, error)
   103  
   104  	// OpenReadWrite opens the named file for reading and writing. If the file
   105  	// does not exist, it is created.
   106  	OpenReadWrite(name string, opts ...OpenOption) (File, error)
   107  
   108  	// OpenDir opens the named directory for syncing.
   109  	OpenDir(name string) (File, error)
   110  
   111  	// Remove removes the named file or directory.
   112  	Remove(name string) error
   113  
   114  	// Remove removes the named file or directory and any children it
   115  	// contains. It removes everything it can but returns the first error it
   116  	// encounters.
   117  	RemoveAll(name string) error
   118  
   119  	// Rename renames a file. It overwrites the file at newname if one exists,
   120  	// the same as os.Rename.
   121  	Rename(oldname, newname string) error
   122  
   123  	// ReuseForWrite attempts to reuse the file with oldname by renaming it to newname and opening
   124  	// it for writing without truncation. It is acceptable for the implementation to choose not
   125  	// to reuse oldname, and simply create the file with newname -- in this case the implementation
   126  	// should delete oldname. If the caller calls this function with an oldname that does not exist,
   127  	// the implementation may return an error.
   128  	ReuseForWrite(oldname, newname string) (File, error)
   129  
   130  	// MkdirAll creates a directory and all necessary parents. The permission
   131  	// bits perm have the same semantics as in os.MkdirAll. If the directory
   132  	// already exists, MkdirAll does nothing and returns nil.
   133  	MkdirAll(dir string, perm os.FileMode) error
   134  
   135  	// Lock locks the given file, creating the file if necessary, and
   136  	// truncating the file if it already exists. The lock is an exclusive lock
   137  	// (a write lock), but locked files should neither be read from nor written
   138  	// to. Such files should have zero size and only exist to co-ordinate
   139  	// ownership across processes.
   140  	//
   141  	// A nil Closer is returned if an error occurred. Otherwise, close that
   142  	// Closer to release the lock.
   143  	//
   144  	// On Linux and OSX, a lock has the same semantics as fcntl(2)'s advisory
   145  	// locks. In particular, closing any other file descriptor for the same
   146  	// file will release the lock prematurely.
   147  	//
   148  	// Attempting to lock a file that is already locked by the current process
   149  	// returns an error and leaves the existing lock untouched.
   150  	//
   151  	// Lock is not yet implemented on other operating systems, and calling it
   152  	// will return an error.
   153  	Lock(name string) (io.Closer, error)
   154  
   155  	// List returns a listing of the given directory. The names returned are
   156  	// relative to dir.
   157  	List(dir string) ([]string, error)
   158  
   159  	// Stat returns an os.FileInfo describing the named file.
   160  	Stat(name string) (os.FileInfo, error)
   161  
   162  	// PathBase returns the last element of path. Trailing path separators are
   163  	// removed before extracting the last element. If the path is empty, PathBase
   164  	// returns ".".  If the path consists entirely of separators, PathBase returns a
   165  	// single separator.
   166  	PathBase(path string) string
   167  
   168  	// PathJoin joins any number of path elements into a single path, adding a
   169  	// separator if necessary.
   170  	PathJoin(elem ...string) string
   171  
   172  	// PathDir returns all but the last element of path, typically the path's directory.
   173  	PathDir(path string) string
   174  
   175  	// GetDiskUsage returns disk space statistics for the filesystem where
   176  	// path is any file or directory within that filesystem.
   177  	GetDiskUsage(path string) (DiskUsage, error)
   178  }
   179  
   180  // DiskUsage summarizes disk space usage on a filesystem.
   181  type DiskUsage struct {
   182  	// Total disk space available to the current process in bytes.
   183  	AvailBytes uint64
   184  	// Total disk space in bytes.
   185  	TotalBytes uint64
   186  	// Used disk space in bytes.
   187  	UsedBytes uint64
   188  }
   189  
   190  // Default is a FS implementation backed by the underlying operating system's
   191  // file system.
   192  var Default FS = defaultFS{}
   193  
   194  type defaultFS struct{}
   195  
   196  // wrapOSFile takes a standard library OS file and returns a vfs.File. f may be
   197  // nil, in which case wrapOSFile must not panic. In such cases, it's okay if the
   198  // returned vfs.File may panic if used.
   199  func wrapOSFile(f *os.File) File {
   200  	// See the implementations in default_{linux,unix,windows}.go.
   201  	return wrapOSFileImpl(f)
   202  }
   203  
   204  func (defaultFS) Create(name string) (File, error) {
   205  	const openFlags = os.O_RDWR | os.O_CREATE | os.O_EXCL | syscall.O_CLOEXEC
   206  
   207  	osFile, err := os.OpenFile(name, openFlags, 0666)
   208  	// If the file already exists, remove it and try again.
   209  	//
   210  	// NB: We choose to remove the file instead of truncating it, despite the
   211  	// fact that we can't do so atomically, because it's more resistant to
   212  	// misuse when using hard links.
   213  
   214  	// We must loop in case another goroutine/thread/process is also
   215  	// attempting to create the a file at the same path.
   216  	for oserror.IsExist(err) {
   217  		if removeErr := os.Remove(name); removeErr != nil && !oserror.IsNotExist(removeErr) {
   218  			return wrapOSFile(osFile), errors.WithStack(removeErr)
   219  		}
   220  		osFile, err = os.OpenFile(name, openFlags, 0666)
   221  	}
   222  	return wrapOSFile(osFile), errors.WithStack(err)
   223  }
   224  
   225  func (defaultFS) Link(oldname, newname string) error {
   226  	return errors.WithStack(os.Link(oldname, newname))
   227  }
   228  
   229  func (defaultFS) Open(name string, opts ...OpenOption) (File, error) {
   230  	osFile, err := os.OpenFile(name, os.O_RDONLY|syscall.O_CLOEXEC, 0)
   231  	if err != nil {
   232  		return nil, errors.WithStack(err)
   233  	}
   234  	file := wrapOSFile(osFile)
   235  	for _, opt := range opts {
   236  		opt.Apply(file)
   237  	}
   238  	return file, nil
   239  }
   240  
   241  func (defaultFS) OpenReadWrite(name string, opts ...OpenOption) (File, error) {
   242  	osFile, err := os.OpenFile(name, os.O_RDWR|syscall.O_CLOEXEC|os.O_CREATE, 0666)
   243  	if err != nil {
   244  		return nil, errors.WithStack(err)
   245  	}
   246  	file := wrapOSFile(osFile)
   247  	for _, opt := range opts {
   248  		opt.Apply(file)
   249  	}
   250  	return file, nil
   251  }
   252  
   253  func (defaultFS) Remove(name string) error {
   254  	return errors.WithStack(os.Remove(name))
   255  }
   256  
   257  func (defaultFS) RemoveAll(name string) error {
   258  	return errors.WithStack(os.RemoveAll(name))
   259  }
   260  
   261  func (defaultFS) Rename(oldname, newname string) error {
   262  	return errors.WithStack(os.Rename(oldname, newname))
   263  }
   264  
   265  func (fs defaultFS) ReuseForWrite(oldname, newname string) (File, error) {
   266  	if err := fs.Rename(oldname, newname); err != nil {
   267  		return nil, errors.WithStack(err)
   268  	}
   269  	f, err := os.OpenFile(newname, os.O_RDWR|os.O_CREATE|syscall.O_CLOEXEC, 0666)
   270  	return wrapOSFile(f), errors.WithStack(err)
   271  }
   272  
   273  func (defaultFS) MkdirAll(dir string, perm os.FileMode) error {
   274  	return errors.WithStack(os.MkdirAll(dir, perm))
   275  }
   276  
   277  func (defaultFS) List(dir string) ([]string, error) {
   278  	f, err := os.Open(dir)
   279  	if err != nil {
   280  		return nil, err
   281  	}
   282  	defer f.Close()
   283  	dirnames, err := f.Readdirnames(-1)
   284  	return dirnames, errors.WithStack(err)
   285  }
   286  
   287  func (defaultFS) Stat(name string) (os.FileInfo, error) {
   288  	finfo, err := os.Stat(name)
   289  	return finfo, errors.WithStack(err)
   290  }
   291  
   292  func (defaultFS) PathBase(path string) string {
   293  	return filepath.Base(path)
   294  }
   295  
   296  func (defaultFS) PathJoin(elem ...string) string {
   297  	return filepath.Join(elem...)
   298  }
   299  
   300  func (defaultFS) PathDir(path string) string {
   301  	return filepath.Dir(path)
   302  }
   303  
   304  type randomReadsOption struct{}
   305  
   306  // RandomReadsOption is an OpenOption that optimizes opened file handle for
   307  // random reads, by calling  fadvise() with POSIX_FADV_RANDOM on Linux systems
   308  // to disable readahead.
   309  var RandomReadsOption OpenOption = &randomReadsOption{}
   310  
   311  // Apply implements the OpenOption interface.
   312  func (randomReadsOption) Apply(f File) {
   313  	if fd := f.Fd(); fd != InvalidFd {
   314  		_ = fadviseRandom(fd)
   315  	}
   316  }
   317  
   318  type sequentialReadsOption struct{}
   319  
   320  // SequentialReadsOption is an OpenOption that optimizes opened file handle for
   321  // sequential reads, by calling fadvise() with POSIX_FADV_SEQUENTIAL on Linux
   322  // systems to enable readahead.
   323  var SequentialReadsOption OpenOption = &sequentialReadsOption{}
   324  
   325  // Apply implements the OpenOption interface.
   326  func (sequentialReadsOption) Apply(f File) {
   327  	if fd := f.Fd(); fd != InvalidFd {
   328  		_ = fadviseSequential(fd)
   329  	}
   330  }
   331  
   332  // Copy copies the contents of oldname to newname. If newname exists, it will
   333  // be overwritten.
   334  func Copy(fs FS, oldname, newname string) error {
   335  	return CopyAcrossFS(fs, oldname, fs, newname)
   336  }
   337  
   338  // CopyAcrossFS copies the contents of oldname on srcFS to newname dstFS. If
   339  // newname exists, it will be overwritten.
   340  func CopyAcrossFS(srcFS FS, oldname string, dstFS FS, newname string) error {
   341  	src, err := srcFS.Open(oldname, SequentialReadsOption)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	defer src.Close()
   346  
   347  	dst, err := dstFS.Create(newname)
   348  	if err != nil {
   349  		return err
   350  	}
   351  	defer dst.Close()
   352  
   353  	if _, err := io.Copy(dst, src); err != nil {
   354  		return err
   355  	}
   356  	return dst.Sync()
   357  }
   358  
   359  // LimitedCopy copies up to maxBytes from oldname to newname. If newname
   360  // exists, it will be overwritten.
   361  func LimitedCopy(fs FS, oldname, newname string, maxBytes int64) error {
   362  	src, err := fs.Open(oldname, SequentialReadsOption)
   363  	if err != nil {
   364  		return err
   365  	}
   366  	defer src.Close()
   367  
   368  	dst, err := fs.Create(newname)
   369  	if err != nil {
   370  		return err
   371  	}
   372  	defer dst.Close()
   373  
   374  	if _, err := io.Copy(dst, &io.LimitedReader{R: src, N: maxBytes}); err != nil {
   375  		return err
   376  	}
   377  	return dst.Sync()
   378  }
   379  
   380  // LinkOrCopy creates newname as a hard link to the oldname file. If creating
   381  // the hard link fails, LinkOrCopy falls back to copying the file (which may
   382  // also fail if oldname doesn't exist or newname already exists).
   383  func LinkOrCopy(fs FS, oldname, newname string) error {
   384  	err := fs.Link(oldname, newname)
   385  	if err == nil {
   386  		return nil
   387  	}
   388  	// Permit a handful of errors which we know won't be fixed by copying the
   389  	// file. Note that we don't check for the specifics of the error code as it
   390  	// isn't easy to do so in a portable manner. On Unix we'd have to check for
   391  	// LinkError.Err == syscall.EXDEV. On Windows we'd have to check for
   392  	// ERROR_NOT_SAME_DEVICE, ERROR_INVALID_FUNCTION, and
   393  	// ERROR_INVALID_PARAMETER. Rather that such OS specific checks, we fall back
   394  	// to always trying to copy if hard-linking failed.
   395  	if oserror.IsExist(err) || oserror.IsNotExist(err) || oserror.IsPermission(err) {
   396  		return err
   397  	}
   398  	return Copy(fs, oldname, newname)
   399  }
   400  
   401  // Root returns the base FS implementation, unwrapping all nested FSs that
   402  // expose an Unwrap method.
   403  func Root(fs FS) FS {
   404  	type unwrapper interface {
   405  		Unwrap() FS
   406  	}
   407  
   408  	for {
   409  		u, ok := fs.(unwrapper)
   410  		if !ok {
   411  			break
   412  		}
   413  		fs = u.Unwrap()
   414  	}
   415  	return fs
   416  }
   417  
   418  // ErrUnsupported may be returned a FS when it does not support an operation.
   419  var ErrUnsupported = errors.New("pebble: not supported")