github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/vfs/mem_fs.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package vfs // import "github.com/zuoyebang/bitalostable/vfs"
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"path"
    13  	"sort"
    14  	"strings"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/errors"
    20  	"github.com/cockroachdb/errors/oserror"
    21  	"github.com/zuoyebang/bitalostable/internal/invariants"
    22  )
    23  
    24  const sep = "/"
    25  
    26  // NewMem returns a new memory-backed FS implementation.
    27  func NewMem() *MemFS {
    28  	return &MemFS{
    29  		root: newRootMemNode(),
    30  	}
    31  }
    32  
    33  // NewStrictMem returns a "strict" memory-backed FS implementation. The behaviour is strict wrt
    34  // needing a Sync() call on files or directories for the state changes to be finalized. Any
    35  // changes that are not finalized are visible to reads until MemFS.ResetToSyncedState() is called,
    36  // at which point they are discarded and no longer visible.
    37  //
    38  // Expected usage:
    39  //
    40  //	strictFS := NewStrictMem()
    41  //	db := Open(..., &Options{FS: strictFS})
    42  //	// Do and commit various operations.
    43  //	...
    44  //	// Prevent any more changes to finalized state.
    45  //	strictFS.SetIgnoreSyncs(true)
    46  //	// This will finish any ongoing background flushes, compactions but none of these writes will
    47  //	// be finalized since syncs are being ignored.
    48  //	db.Close()
    49  //	// Discard unsynced state.
    50  //	strictFS.ResetToSyncedState()
    51  //	// Allow changes to finalized state.
    52  //	strictFS.SetIgnoreSyncs(false)
    53  //	// Open the DB. This DB should have the same state as if the earlier strictFS operations and
    54  //	// db.Close() were not called.
    55  //	db := Open(..., &Options{FS: strictFS})
    56  func NewStrictMem() *MemFS {
    57  	return &MemFS{
    58  		root:   newRootMemNode(),
    59  		strict: true,
    60  	}
    61  }
    62  
    63  // NewMemFile returns a memory-backed File implementation. The memory-backed
    64  // file takes ownership of data.
    65  func NewMemFile(data []byte) File {
    66  	n := &memNode{refs: 1}
    67  	n.mu.data = data
    68  	n.mu.modTime = time.Now()
    69  	return &memFile{
    70  		n:    n,
    71  		read: true,
    72  	}
    73  }
    74  
    75  // MemFS implements FS.
    76  type MemFS struct {
    77  	mu   sync.Mutex
    78  	root *memNode
    79  
    80  	strict      bool
    81  	ignoreSyncs bool
    82  }
    83  
    84  var _ FS = &MemFS{}
    85  
    86  // String dumps the contents of the MemFS.
    87  func (y *MemFS) String() string {
    88  	y.mu.Lock()
    89  	defer y.mu.Unlock()
    90  
    91  	s := new(bytes.Buffer)
    92  	y.root.dump(s, 0)
    93  	return s.String()
    94  }
    95  
    96  // SetIgnoreSyncs sets the MemFS.ignoreSyncs field. See the usage comment with NewStrictMem() for
    97  // details.
    98  func (y *MemFS) SetIgnoreSyncs(ignoreSyncs bool) {
    99  	y.mu.Lock()
   100  	if !y.strict {
   101  		// noop
   102  		return
   103  	}
   104  	y.ignoreSyncs = ignoreSyncs
   105  	y.mu.Unlock()
   106  }
   107  
   108  // ResetToSyncedState discards state in the FS that is not synced. See the usage comment with
   109  // NewStrictMem() for details.
   110  func (y *MemFS) ResetToSyncedState() {
   111  	if !y.strict {
   112  		// noop
   113  		return
   114  	}
   115  	y.mu.Lock()
   116  	y.root.resetToSyncedState()
   117  	y.mu.Unlock()
   118  }
   119  
   120  // walk walks the directory tree for the fullname, calling f at each step. If
   121  // f returns an error, the walk will be aborted and return that same error.
   122  //
   123  // Each walk is atomic: y's mutex is held for the entire operation, including
   124  // all calls to f.
   125  //
   126  // dir is the directory at that step, frag is the name fragment, and final is
   127  // whether it is the final step. For example, walking "/foo/bar/x" will result
   128  // in 3 calls to f:
   129  //   - "/", "foo", false
   130  //   - "/foo/", "bar", false
   131  //   - "/foo/bar/", "x", true
   132  //
   133  // Similarly, walking "/y/z/", with a trailing slash, will result in 3 calls to f:
   134  //   - "/", "y", false
   135  //   - "/y/", "z", false
   136  //   - "/y/z/", "", true
   137  func (y *MemFS) walk(fullname string, f func(dir *memNode, frag string, final bool) error) error {
   138  	y.mu.Lock()
   139  	defer y.mu.Unlock()
   140  
   141  	// For memfs, the current working directory is the same as the root directory,
   142  	// so we strip off any leading "/"s to make fullname a relative path, and
   143  	// the walk starts at y.root.
   144  	for len(fullname) > 0 && fullname[0] == sep[0] {
   145  		fullname = fullname[1:]
   146  	}
   147  	dir := y.root
   148  
   149  	for {
   150  		frag, remaining := fullname, ""
   151  		i := strings.IndexRune(fullname, rune(sep[0]))
   152  		final := i < 0
   153  		if !final {
   154  			frag, remaining = fullname[:i], fullname[i+1:]
   155  			for len(remaining) > 0 && remaining[0] == sep[0] {
   156  				remaining = remaining[1:]
   157  			}
   158  		}
   159  		if err := f(dir, frag, final); err != nil {
   160  			return err
   161  		}
   162  		if final {
   163  			break
   164  		}
   165  		child := dir.children[frag]
   166  		if child == nil {
   167  			return &os.PathError{
   168  				Op:   "open",
   169  				Path: fullname,
   170  				Err:  oserror.ErrNotExist,
   171  			}
   172  		}
   173  		if !child.isDir {
   174  			return &os.PathError{
   175  				Op:   "open",
   176  				Path: fullname,
   177  				Err:  errors.New("not a directory"),
   178  			}
   179  		}
   180  		dir, fullname = child, remaining
   181  	}
   182  	return nil
   183  }
   184  
   185  // Create implements FS.Create.
   186  func (y *MemFS) Create(fullname string) (File, error) {
   187  	var ret *memFile
   188  	err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
   189  		if final {
   190  			if frag == "" {
   191  				return errors.New("bitalostable/vfs: empty file name")
   192  			}
   193  			n := &memNode{name: frag}
   194  			dir.children[frag] = n
   195  			ret = &memFile{
   196  				n:     n,
   197  				fs:    y,
   198  				write: true,
   199  			}
   200  		}
   201  		return nil
   202  	})
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	atomic.AddInt32(&ret.n.refs, 1)
   207  	return ret, nil
   208  }
   209  
   210  // Link implements FS.Link.
   211  func (y *MemFS) Link(oldname, newname string) error {
   212  	var n *memNode
   213  	err := y.walk(oldname, func(dir *memNode, frag string, final bool) error {
   214  		if final {
   215  			if frag == "" {
   216  				return errors.New("bitalostable/vfs: empty file name")
   217  			}
   218  			n = dir.children[frag]
   219  		}
   220  		return nil
   221  	})
   222  	if err != nil {
   223  		return err
   224  	}
   225  	if n == nil {
   226  		return &os.LinkError{
   227  			Op:  "link",
   228  			Old: oldname,
   229  			New: newname,
   230  			Err: oserror.ErrNotExist,
   231  		}
   232  	}
   233  	return y.walk(newname, func(dir *memNode, frag string, final bool) error {
   234  		if final {
   235  			if frag == "" {
   236  				return errors.New("bitalostable/vfs: empty file name")
   237  			}
   238  			if _, ok := dir.children[frag]; ok {
   239  				return &os.LinkError{
   240  					Op:  "link",
   241  					Old: oldname,
   242  					New: newname,
   243  					Err: oserror.ErrExist,
   244  				}
   245  			}
   246  			dir.children[frag] = n
   247  		}
   248  		return nil
   249  	})
   250  }
   251  
   252  func (y *MemFS) open(fullname string) (File, error) {
   253  	var ret *memFile
   254  	err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
   255  		if final {
   256  			if frag == "" {
   257  				ret = &memFile{
   258  					n:  dir,
   259  					fs: y,
   260  				}
   261  				return nil
   262  			}
   263  			if n := dir.children[frag]; n != nil {
   264  				ret = &memFile{
   265  					n:    n,
   266  					fs:   y,
   267  					read: true,
   268  				}
   269  			}
   270  		}
   271  		return nil
   272  	})
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	if ret == nil {
   277  		return nil, &os.PathError{
   278  			Op:   "open",
   279  			Path: fullname,
   280  			Err:  oserror.ErrNotExist,
   281  		}
   282  	}
   283  	atomic.AddInt32(&ret.n.refs, 1)
   284  	return ret, nil
   285  }
   286  
   287  // Open implements FS.Open.
   288  func (y *MemFS) Open(fullname string, opts ...OpenOption) (File, error) {
   289  	return y.open(fullname)
   290  }
   291  
   292  // OpenDir implements FS.OpenDir.
   293  func (y *MemFS) OpenDir(fullname string) (File, error) {
   294  	return y.open(fullname)
   295  }
   296  
   297  // Remove implements FS.Remove.
   298  func (y *MemFS) Remove(fullname string) error {
   299  	return y.walk(fullname, func(dir *memNode, frag string, final bool) error {
   300  		if final {
   301  			if frag == "" {
   302  				return errors.New("bitalostable/vfs: empty file name")
   303  			}
   304  			child, ok := dir.children[frag]
   305  			if !ok {
   306  				return oserror.ErrNotExist
   307  			}
   308  			// Disallow removal of open files/directories which implements Windows
   309  			// semantics. This ensures that we don't regress in the ordering of
   310  			// operations and try to remove a file while it is still open.
   311  			if n := atomic.LoadInt32(&child.refs); n > 0 {
   312  				return oserror.ErrInvalid
   313  			}
   314  			if len(child.children) > 0 {
   315  				return errNotEmpty
   316  			}
   317  			delete(dir.children, frag)
   318  		}
   319  		return nil
   320  	})
   321  }
   322  
   323  // RemoveAll implements FS.RemoveAll.
   324  func (y *MemFS) RemoveAll(fullname string) error {
   325  	err := y.walk(fullname, func(dir *memNode, frag string, final bool) error {
   326  		if final {
   327  			if frag == "" {
   328  				return errors.New("bitalostable/vfs: empty file name")
   329  			}
   330  			_, ok := dir.children[frag]
   331  			if !ok {
   332  				return nil
   333  			}
   334  			delete(dir.children, frag)
   335  		}
   336  		return nil
   337  	})
   338  	// Match os.RemoveAll which returns a nil error even if the parent
   339  	// directories don't exist.
   340  	if oserror.IsNotExist(err) {
   341  		err = nil
   342  	}
   343  	return err
   344  }
   345  
   346  // Rename implements FS.Rename.
   347  func (y *MemFS) Rename(oldname, newname string) error {
   348  	var n *memNode
   349  	err := y.walk(oldname, func(dir *memNode, frag string, final bool) error {
   350  		if final {
   351  			if frag == "" {
   352  				return errors.New("bitalostable/vfs: empty file name")
   353  			}
   354  			n = dir.children[frag]
   355  			delete(dir.children, frag)
   356  		}
   357  		return nil
   358  	})
   359  	if err != nil {
   360  		return err
   361  	}
   362  	if n == nil {
   363  		return &os.PathError{
   364  			Op:   "open",
   365  			Path: oldname,
   366  			Err:  oserror.ErrNotExist,
   367  		}
   368  	}
   369  	return y.walk(newname, func(dir *memNode, frag string, final bool) error {
   370  		if final {
   371  			if frag == "" {
   372  				return errors.New("bitalostable/vfs: empty file name")
   373  			}
   374  			dir.children[frag] = n
   375  			n.name = frag
   376  		}
   377  		return nil
   378  	})
   379  }
   380  
   381  // ReuseForWrite implements FS.ReuseForWrite.
   382  func (y *MemFS) ReuseForWrite(oldname, newname string) (File, error) {
   383  	if err := y.Rename(oldname, newname); err != nil {
   384  		return nil, err
   385  	}
   386  	f, err := y.Open(newname)
   387  	if err != nil {
   388  		return nil, err
   389  	}
   390  	y.mu.Lock()
   391  	defer y.mu.Unlock()
   392  
   393  	mf := f.(*memFile)
   394  	mf.read = false
   395  	mf.write = true
   396  	return f, nil
   397  }
   398  
   399  // MkdirAll implements FS.MkdirAll.
   400  func (y *MemFS) MkdirAll(dirname string, perm os.FileMode) error {
   401  	return y.walk(dirname, func(dir *memNode, frag string, final bool) error {
   402  		if frag == "" {
   403  			if final {
   404  				return nil
   405  			}
   406  			return errors.New("bitalostable/vfs: empty file name")
   407  		}
   408  		child := dir.children[frag]
   409  		if child == nil {
   410  			dir.children[frag] = &memNode{
   411  				name:     frag,
   412  				children: make(map[string]*memNode),
   413  				isDir:    true,
   414  			}
   415  			return nil
   416  		}
   417  		if !child.isDir {
   418  			return &os.PathError{
   419  				Op:   "open",
   420  				Path: dirname,
   421  				Err:  errors.New("not a directory"),
   422  			}
   423  		}
   424  		return nil
   425  	})
   426  }
   427  
   428  // Lock implements FS.Lock.
   429  func (y *MemFS) Lock(fullname string) (io.Closer, error) {
   430  	// FS.Lock excludes other processes, but other processes cannot see this
   431  	// process' memory. We translate Lock into Create so that have the normal
   432  	// detection of non-existent directory paths.
   433  	return y.Create(fullname)
   434  }
   435  
   436  // List implements FS.List.
   437  func (y *MemFS) List(dirname string) ([]string, error) {
   438  	if !strings.HasSuffix(dirname, sep) {
   439  		dirname += sep
   440  	}
   441  	var ret []string
   442  	err := y.walk(dirname, func(dir *memNode, frag string, final bool) error {
   443  		if final {
   444  			if frag != "" {
   445  				panic("unreachable")
   446  			}
   447  			ret = make([]string, 0, len(dir.children))
   448  			for s := range dir.children {
   449  				ret = append(ret, s)
   450  			}
   451  		}
   452  		return nil
   453  	})
   454  	return ret, err
   455  }
   456  
   457  // Stat implements FS.Stat.
   458  func (y *MemFS) Stat(name string) (os.FileInfo, error) {
   459  	f, err := y.Open(name)
   460  	if err != nil {
   461  		if pe, ok := err.(*os.PathError); ok {
   462  			pe.Op = "stat"
   463  		}
   464  		return nil, err
   465  	}
   466  	defer f.Close()
   467  	return f.Stat()
   468  }
   469  
   470  // PathBase implements FS.PathBase.
   471  func (*MemFS) PathBase(p string) string {
   472  	// Note that MemFS uses forward slashes for its separator, hence the use of
   473  	// path.Base, not filepath.Base.
   474  	return path.Base(p)
   475  }
   476  
   477  // PathJoin implements FS.PathJoin.
   478  func (*MemFS) PathJoin(elem ...string) string {
   479  	// Note that MemFS uses forward slashes for its separator, hence the use of
   480  	// path.Join, not filepath.Join.
   481  	return path.Join(elem...)
   482  }
   483  
   484  // PathDir implements FS.PathDir.
   485  func (*MemFS) PathDir(p string) string {
   486  	// Note that MemFS uses forward slashes for its separator, hence the use of
   487  	// path.Dir, not filepath.Dir.
   488  	return path.Dir(p)
   489  }
   490  
   491  // GetDiskUsage implements FS.GetDiskUsage.
   492  func (*MemFS) GetDiskUsage(string) (DiskUsage, error) {
   493  	return DiskUsage{}, ErrUnsupported
   494  }
   495  
   496  // memNode holds a file's data or a directory's children, and implements os.FileInfo.
   497  type memNode struct {
   498  	name  string
   499  	isDir bool
   500  	refs  int32
   501  
   502  	// Mutable state.
   503  	// - For a file: data, syncedDate, modTime: A file is only being mutated by a single goroutine,
   504  	//   but there can be concurrent readers e.g. DB.Checkpoint() which can read WAL or MANIFEST
   505  	//   files that are being written to. Additionally Sync() calls can be concurrent with writing.
   506  	// - For a directory: children and syncedChildren. Concurrent writes are possible, and
   507  	//   these are protected using MemFS.mu.
   508  	mu struct {
   509  		sync.Mutex
   510  		data       []byte
   511  		syncedData []byte
   512  		modTime    time.Time
   513  	}
   514  
   515  	children       map[string]*memNode
   516  	syncedChildren map[string]*memNode
   517  }
   518  
   519  func newRootMemNode() *memNode {
   520  	return &memNode{
   521  		name:     "/", // set the name to match what file systems do
   522  		children: make(map[string]*memNode),
   523  		isDir:    true,
   524  	}
   525  }
   526  
   527  func (f *memNode) IsDir() bool {
   528  	return f.isDir
   529  }
   530  
   531  func (f *memNode) ModTime() time.Time {
   532  	f.mu.Lock()
   533  	defer f.mu.Unlock()
   534  	return f.mu.modTime
   535  }
   536  
   537  func (f *memNode) Mode() os.FileMode {
   538  	if f.isDir {
   539  		return os.ModeDir | 0755
   540  	}
   541  	return 0755
   542  }
   543  
   544  func (f *memNode) Name() string {
   545  	return f.name
   546  }
   547  
   548  func (f *memNode) Size() int64 {
   549  	f.mu.Lock()
   550  	defer f.mu.Unlock()
   551  	return int64(len(f.mu.data))
   552  }
   553  
   554  func (f *memNode) Sys() interface{} {
   555  	return nil
   556  }
   557  
   558  func (f *memNode) dump(w *bytes.Buffer, level int) {
   559  	if f.isDir {
   560  		w.WriteString("          ")
   561  	} else {
   562  		f.mu.Lock()
   563  		fmt.Fprintf(w, "%8d  ", len(f.mu.data))
   564  		f.mu.Unlock()
   565  	}
   566  	for i := 0; i < level; i++ {
   567  		w.WriteString("  ")
   568  	}
   569  	w.WriteString(f.name)
   570  	if !f.isDir {
   571  		w.WriteByte('\n')
   572  		return
   573  	}
   574  	if level > 0 { // deal with the fact that the root's name is already "/"
   575  		w.WriteByte(sep[0])
   576  	}
   577  	w.WriteByte('\n')
   578  	names := make([]string, 0, len(f.children))
   579  	for name := range f.children {
   580  		names = append(names, name)
   581  	}
   582  	sort.Strings(names)
   583  	for _, name := range names {
   584  		f.children[name].dump(w, level+1)
   585  	}
   586  }
   587  
   588  func (f *memNode) resetToSyncedState() {
   589  	if f.isDir {
   590  		f.children = make(map[string]*memNode)
   591  		for k, v := range f.syncedChildren {
   592  			f.children[k] = v
   593  		}
   594  		for _, v := range f.children {
   595  			v.resetToSyncedState()
   596  		}
   597  	} else {
   598  		f.mu.Lock()
   599  		f.mu.data = append([]byte(nil), f.mu.syncedData...)
   600  		f.mu.Unlock()
   601  	}
   602  }
   603  
   604  // memFile is a reader or writer of a node's data, and implements File.
   605  type memFile struct {
   606  	n           *memNode
   607  	fs          *MemFS // nil for a standalone memFile
   608  	rpos        int
   609  	wpos        int
   610  	read, write bool
   611  }
   612  
   613  func (f *memFile) Close() error {
   614  	if n := atomic.AddInt32(&f.n.refs, -1); n < 0 {
   615  		panic(fmt.Sprintf("bitalostable: close of unopened file: %d", n))
   616  	}
   617  	f.n = nil
   618  	return nil
   619  }
   620  
   621  func (f *memFile) Read(p []byte) (int, error) {
   622  	if !f.read {
   623  		return 0, errors.New("bitalostable/vfs: file was not opened for reading")
   624  	}
   625  	if f.n.isDir {
   626  		return 0, errors.New("bitalostable/vfs: cannot read a directory")
   627  	}
   628  	f.n.mu.Lock()
   629  	defer f.n.mu.Unlock()
   630  	if f.rpos >= len(f.n.mu.data) {
   631  		return 0, io.EOF
   632  	}
   633  	n := copy(p, f.n.mu.data[f.rpos:])
   634  	f.rpos += n
   635  	return n, nil
   636  }
   637  
   638  func (f *memFile) ReadAt(p []byte, off int64) (int, error) {
   639  	if !f.read {
   640  		return 0, errors.New("bitalostable/vfs: file was not opened for reading")
   641  	}
   642  	if f.n.isDir {
   643  		return 0, errors.New("bitalostable/vfs: cannot read a directory")
   644  	}
   645  	f.n.mu.Lock()
   646  	defer f.n.mu.Unlock()
   647  	if off >= int64(len(f.n.mu.data)) {
   648  		return 0, io.EOF
   649  	}
   650  	return copy(p, f.n.mu.data[off:]), nil
   651  }
   652  
   653  func (f *memFile) Write(p []byte) (int, error) {
   654  	if !f.write {
   655  		return 0, errors.New("bitalostable/vfs: file was not created for writing")
   656  	}
   657  	if f.n.isDir {
   658  		return 0, errors.New("bitalostable/vfs: cannot write a directory")
   659  	}
   660  	f.n.mu.Lock()
   661  	defer f.n.mu.Unlock()
   662  	f.n.mu.modTime = time.Now()
   663  	if f.wpos+len(p) <= len(f.n.mu.data) {
   664  		n := copy(f.n.mu.data[f.wpos:f.wpos+len(p)], p)
   665  		if n != len(p) {
   666  			panic("stuff")
   667  		}
   668  	} else {
   669  		f.n.mu.data = append(f.n.mu.data[:f.wpos], p...)
   670  	}
   671  	f.wpos += len(p)
   672  
   673  	if invariants.Enabled {
   674  		// Mutate the input buffer to flush out bugs in Pebble which expect the
   675  		// input buffer to be unmodified.
   676  		for i := range p {
   677  			p[i] ^= 0xff
   678  		}
   679  	}
   680  	return len(p), nil
   681  }
   682  
   683  func (f *memFile) Stat() (os.FileInfo, error) {
   684  	return f.n, nil
   685  }
   686  
   687  func (f *memFile) Sync() error {
   688  	if f.fs != nil && f.fs.strict {
   689  		f.fs.mu.Lock()
   690  		defer f.fs.mu.Unlock()
   691  		if f.fs.ignoreSyncs {
   692  			return nil
   693  		}
   694  		if f.n.isDir {
   695  			f.n.syncedChildren = make(map[string]*memNode)
   696  			for k, v := range f.n.children {
   697  				f.n.syncedChildren[k] = v
   698  			}
   699  		} else {
   700  			f.n.mu.Lock()
   701  			f.n.mu.syncedData = append([]byte(nil), f.n.mu.data...)
   702  			f.n.mu.Unlock()
   703  		}
   704  	}
   705  	return nil
   706  }
   707  
   708  // Flush is a no-op and present only to prevent buffering at higher levels
   709  // (e.g. it prevents sstable.Writer from using a bufio.Writer).
   710  func (f *memFile) Flush() error {
   711  	return nil
   712  }