github.com/creachadair/ffs@v0.17.3/file/file.go (about)

     1  // Copyright 2019 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package file implements a File API over a content-addressable blob.Store.
    16  //
    17  // A File as defined by this package differs from the POSIX file model in that
    18  // any File may have both binary content and "children". Thus, any File is also
    19  // a directory, which can contain other files in a Merkle tree structure.
    20  //
    21  // A File is addressed by a storage key, corresponding to the current state of
    22  // its content, metadata, and children (recursively). File metadata are stored
    23  // as wire-format protocol buffers, as defined in file/wiretype/wiretype.proto.
    24  //
    25  // Basic usage:
    26  //
    27  //	ctx := context.Background()
    28  //
    29  //	f := file.New(cas, nil)   // create a new, empty file
    30  //	f.WriteAt(ctx, data, 0)   // write some data to the file
    31  //	key, err := f.Flush(ctx)  // commit the file to storage
    32  //
    33  // To open an existing file,
    34  //
    35  //	f, err := file.Open(ctx, cas, key)
    36  //
    37  // The I/O methods of a File require a context argument. For compatibility with
    38  // the standard interfaces in the io package, a File provides a wrapper for a
    39  // request-scoped context:
    40  //
    41  //	_, err := io.Copy(dst, f.Cursor(ctx))
    42  //
    43  // A value of the file.Cursor type should not be used outside the dynamic
    44  // extent of the request whose context it captures.
    45  //
    46  // # Metadata
    47  //
    48  // A File supports a subset of POSIX style data metadata, including mode,
    49  // modification time, and owner/group identity. These metadata are not
    50  // interpreted by the API, but will be persisted if they are set.
    51  //
    52  // By default, a File does not persist stat metadata. To enable stat
    53  // persistence, you may either set the Stat field of file.NewOptions when the
    54  // File is created, or use the Persist method of the Stat value to enable or
    55  // disable persistence:
    56  //
    57  //	s := f.Stat()
    58  //	s.ModTime = time.Now()
    59  //	s.Update().Persist(true)
    60  //
    61  // The file.Stat type defines the stat attributes that can be persisted.
    62  //
    63  // # Synchronization
    64  //
    65  // The exported methods of *File and the views of its data (Child, Data, Stat,
    66  // XAttr) are safe for concurrent use by multiple goroutines.
    67  package file
    68  
    69  import (
    70  	"context"
    71  	"errors"
    72  	"fmt"
    73  	"io"
    74  	"slices"
    75  	"sort"
    76  	"sync"
    77  	"time"
    78  
    79  	"github.com/creachadair/ffs/blob"
    80  	"github.com/creachadair/ffs/block"
    81  	"github.com/creachadair/ffs/file/wiretype"
    82  )
    83  
    84  // New constructs a new, empty File with the given options and backed by s. The
    85  // caller must call the new file's Flush method to ensure it is written to
    86  // storage. If opts == nil, defaults are chosen.
    87  func New(s blob.CAS, opts *NewOptions) *File {
    88  	if opts == nil {
    89  		opts = new(NewOptions)
    90  	}
    91  	f := &File{
    92  		s:        s,
    93  		name:     opts.Name,
    94  		saveStat: opts.PersistStat,
    95  		data:     fileData{sc: opts.Split},
    96  		xattr:    make(map[string]string),
    97  	}
    98  	// If the options contain stat metadata, copy them in.
    99  	if opts.Stat != nil {
   100  		f.setStatLocked(*opts.Stat)
   101  	}
   102  	return f
   103  }
   104  
   105  // NewOptions control the creation of new files.
   106  type NewOptions struct {
   107  	// The name to attribute to the new file. The name of a File is not
   108  	// persisted in storage.
   109  	Name string
   110  
   111  	// Stat, if non-nil, is the initial stat metadata for the file.  Note that
   112  	// stat metadata will not be persisted to storage when the file is flushed
   113  	// unless PersistStat is also true.
   114  	Stat *Stat
   115  
   116  	// PersistStat is whether stat metadata for the new file should be persisted
   117  	// to storage when the file is written out.
   118  	PersistStat bool
   119  
   120  	// The block splitter configuration to use. If omitted, the default values
   121  	// from the split package are used. Split configurations are not persisted
   122  	// in storage, but descendants created from a file (via the New method) will
   123  	// inherit the parent file config if they do not specify their own.
   124  	Split *block.SplitConfig
   125  }
   126  
   127  // Open opens an existing file given its storage key in s.
   128  func Open(ctx context.Context, s blob.CAS, key string) (*File, error) {
   129  	var obj wiretype.Object
   130  	if err := wiretype.Load(ctx, s, key, &obj); err != nil {
   131  		return nil, fmt.Errorf("load %x: %w", key, err)
   132  	}
   133  	f := &File{s: s, key: key}
   134  	if err := f.fromWireType(&obj); err != nil {
   135  		return nil, fmt.Errorf("decode file %x: %w", key, err)
   136  	}
   137  	return f, nil
   138  }
   139  
   140  // A File represents a writable file stored in a content-addressable blobstore.
   141  type File struct {
   142  	s blob.CAS
   143  
   144  	mu   sync.RWMutex
   145  	name string // if this file is a child, its attributed name
   146  	key  string // the storage key for the file record (wiretype.Node)
   147  
   148  	stat     Stat // file metadata
   149  	saveStat bool // whether to persist file metadata
   150  
   151  	data  fileData          // binary file data
   152  	kids  []child           // ordered lexicographically by name
   153  	xattr map[string]string // extended attributes
   154  }
   155  
   156  // A child records the name and storage key of a child file.
   157  type child struct {
   158  	Name string
   159  	Key  string // the storage key of the child
   160  	File *File  // the opened file for the child
   161  
   162  	// When a file is loaded from storage, the Key of each child is populated
   163  	// but its File is not created until explicitly requested.  After the child
   164  	// is opened, the Key may go out of sync with the file due to modifications
   165  	// by the caller: When the enclosing file is flushed, any child with a File
   166  	// attached is also flushed and the Key is updated.
   167  }
   168  
   169  // findChildLocked reports whether f has a child with the specified name and
   170  // its index in the slice if so, or otherwise -1.
   171  func (f *File) findChildLocked(name string) (int, bool) {
   172  	if n := sort.Search(len(f.kids), func(i int) bool {
   173  		return f.kids[i].Name >= name
   174  	}); n < len(f.kids) && f.kids[n].Name == name {
   175  		return n, true
   176  	}
   177  	return -1, false
   178  }
   179  
   180  func (f *File) setStatLocked(s Stat) {
   181  	f.stat = s
   182  	if f.saveStat {
   183  		f.invalLocked()
   184  	}
   185  }
   186  
   187  func (f *File) invalLocked() { f.key = "" }
   188  
   189  func (f *File) modifyLocked() { f.invalLocked(); f.stat.ModTime = time.Now() }
   190  
   191  // New constructs a new empty node backed by the same store as f.
   192  // If f persists stat metadata, then the new file does too, even if
   193  // opts.PersistStat is false. The caller can override this default via the Stat
   194  // view after the file is created.
   195  func (f *File) New(opts *NewOptions) *File {
   196  	out := New(f.s, opts)
   197  	if f.saveStat {
   198  		out.saveStat = true
   199  	}
   200  
   201  	// Propagate the parent split settings to the child, if the child did not
   202  	// have any specifically defined.
   203  	if opts == nil || opts.Split == nil {
   204  		out.data.sc = f.data.sc
   205  	}
   206  	return out
   207  }
   208  
   209  // Stat returns the current stat metadata for f. Calling this method does not
   210  // change stat persistence for f, use the Clear and Update methods of the Stat
   211  // value to do that.
   212  func (f *File) Stat() Stat {
   213  	f.mu.RLock()
   214  	defer f.mu.RUnlock()
   215  	cp := f.stat
   216  	cp.f = f
   217  	return cp
   218  }
   219  
   220  // FileInfo returns a [FileInfo] record for f. The resulting value is a
   221  // snapshot at the moment of construction, and does not track changes to the
   222  // file after the value was constructed.
   223  func (f *File) FileInfo() FileInfo {
   224  	if f == nil {
   225  		return FileInfo{}
   226  	}
   227  	f.mu.Lock()
   228  	defer f.mu.Unlock()
   229  	return FileInfo{
   230  		name:    f.name,
   231  		size:    f.data.totalBytes,
   232  		mode:    f.stat.Mode,
   233  		modTime: f.stat.ModTime,
   234  		file:    f,
   235  	}
   236  }
   237  
   238  // Data returns a view of the file content for f.
   239  func (f *File) Data() Data { return Data{f: f} }
   240  
   241  var (
   242  	// ErrChildNotFound indicates that a requested child file does not exist.
   243  	ErrChildNotFound = errors.New("child file not found")
   244  )
   245  
   246  // Open opens the specified child file of f, or returns ErrChildNotFound if no
   247  // such child exists.
   248  func (f *File) Open(ctx context.Context, name string) (*File, error) {
   249  	f.mu.Lock()
   250  	defer f.mu.Unlock()
   251  	i, ok := f.findChildLocked(name)
   252  	if !ok {
   253  		return nil, fmt.Errorf("open %q: %w", name, ErrChildNotFound)
   254  	}
   255  	if c := f.kids[i].File; c != nil {
   256  		return c, nil
   257  	}
   258  	c, err := Open(ctx, f.s, f.kids[i].Key)
   259  	if err == nil {
   260  		c.name = name // remember the name the file was opened with
   261  		f.kids[i].File = c
   262  	}
   263  	return c, err
   264  }
   265  
   266  // Load loads an existing file given its storage key in the store used by f.
   267  // The specified file need not necessarily be a child of f.
   268  func (f *File) Load(ctx context.Context, key string) (*File, error) {
   269  	return Open(ctx, f.s, key)
   270  }
   271  
   272  // Child returns a view of the children of f.
   273  func (f *File) Child() Child { return Child{f: f} }
   274  
   275  // ReadAt reads up to len(data) bytes into data from the given offset, and
   276  // reports the number of bytes successfully read, as io.ReaderAt.
   277  func (f *File) ReadAt(ctx context.Context, data []byte, offset int64) (int, error) {
   278  	f.mu.RLock()
   279  	defer f.mu.RUnlock()
   280  	return f.data.readAt(ctx, f.s, data, offset)
   281  }
   282  
   283  // WriteAt writes len(data) bytes from data at the given offset, and reports
   284  // the number of bytes successfully written, as io.WriterAt.
   285  func (f *File) WriteAt(ctx context.Context, data []byte, offset int64) (int, error) {
   286  	f.mu.Lock()
   287  	defer f.mu.Unlock()
   288  	defer f.modifyLocked()
   289  	return f.data.writeAt(ctx, f.s, data, offset)
   290  }
   291  
   292  // Flush flushes the current state of the file to storage if necessary, and
   293  // returns the resulting storage key. This is the canonical way to obtain the
   294  // storage key for a file.
   295  func (f *File) Flush(ctx context.Context) (string, error) {
   296  	f.mu.Lock()
   297  	defer f.mu.Unlock()
   298  	return f.recFlushLocked(ctx, nil)
   299  }
   300  
   301  // Key returns the storage key of f if it is known, or "" if the file has not
   302  // been flushed to storage in its current form.
   303  func (f *File) Key() string { f.mu.RLock(); defer f.mu.RUnlock(); return f.key }
   304  
   305  // recFlushLocked recursively flushes f and all its child nodes. The path gives
   306  // the path of nodes from the root to the current flush target, and is used to
   307  // verify that there are no cycles in the graph.
   308  func (f *File) recFlushLocked(ctx context.Context, path []*File) (string, error) {
   309  	// Recursive flush is a long operation, check for timeout/cancellation.
   310  	if ctx.Err() != nil {
   311  		return "", ctx.Err()
   312  	}
   313  	needsUpdate := f.key == ""
   314  
   315  	// Flush any cached children.
   316  	for i, kid := range f.kids {
   317  		if kf := kid.File; kf != nil {
   318  			// Check for direct or indirect cycles. This check is quadratic in the
   319  			// height of the DAG over the whole scan in the worst case. In
   320  			// practice, this doesn't cause any real issues, since it's not common
   321  			// for file structures to be very deep. Compared to the cost of
   322  			// marshaling and writing back invalid entries to storage, the array
   323  			// scan is minor.
   324  			if slices.Contains(path, kf) {
   325  				return "", fmt.Errorf("flush: cycle in path at %p", kf)
   326  			}
   327  			cpath := append(path, f)
   328  			fkey, err := func() (string, error) {
   329  				kf.mu.Lock()
   330  				defer kf.mu.Unlock()
   331  				return kf.recFlushLocked(ctx, cpath)
   332  			}()
   333  			if err != nil {
   334  				return "", err
   335  			}
   336  			if fkey != kid.Key {
   337  				needsUpdate = true
   338  			}
   339  			f.kids[i].Key = fkey
   340  		}
   341  	}
   342  
   343  	if needsUpdate {
   344  		key, err := wiretype.Save(ctx, f.s, f.toWireTypeLocked())
   345  		if err != nil {
   346  			return "", fmt.Errorf("flushing file %x: %w", key, err)
   347  		}
   348  		f.key = key
   349  	}
   350  	return f.key, nil
   351  }
   352  
   353  // Truncate modifies the length of f to end at offset, extending or contracting
   354  // it as necessary.
   355  func (f *File) Truncate(ctx context.Context, offset int64) error {
   356  	f.mu.Lock()
   357  	defer f.mu.Unlock()
   358  	defer f.modifyLocked()
   359  	return f.data.truncate(ctx, f.s, offset)
   360  }
   361  
   362  // SetData fully reads r replaces the binary contents of f with its data.
   363  // On success, any existing data for f are discarded. In case of error, the
   364  // contents of f are not changed.
   365  func (f *File) SetData(ctx context.Context, r io.Reader) error {
   366  	s := block.NewSplitter(r, f.data.sc)
   367  	fd, err := newFileData(s, func(data []byte) (string, error) {
   368  		return f.s.CASPut(ctx, data)
   369  	})
   370  	if err != nil {
   371  		return err
   372  	}
   373  	f.mu.Lock()
   374  	defer f.mu.Unlock()
   375  	f.invalLocked()
   376  	f.data = fd
   377  	return nil
   378  }
   379  
   380  // Name reports the attributed name of f, which may be "" if f is not a child
   381  // file and was not assigned a name at creation.
   382  func (f *File) Name() string { f.mu.RLock(); defer f.mu.RUnlock(); return f.name }
   383  
   384  func (f *File) setName(name string) { f.mu.Lock(); defer f.mu.Unlock(); f.name = name }
   385  
   386  // A ScanItem is the argument to the Scan callback.
   387  type ScanItem struct {
   388  	*File // the current file being visited
   389  
   390  	Name string // the name of File within its parent ("" at the root)
   391  }
   392  
   393  // Scan recursively visits f and all its descendants in depth-first
   394  // left-to-right order, calling visit for each file.  If visit returns false,
   395  // no descendants of f are visited.
   396  //
   397  // The visit function may modify the attributes or contents of the files it
   398  // visits, but the caller is responsible for flushing the root of the scan
   399  // afterward to persist changes to storage.
   400  func (f *File) Scan(ctx context.Context, visit func(ScanItem) bool) error {
   401  	f.mu.Lock()
   402  	defer f.mu.Unlock()
   403  	return f.recScanLocked(ctx, "", func(s ScanItem) bool {
   404  		// Yield the lock while the caller visitor runs, then reacquire it.  We
   405  		// do this so that the visitor can use methods that may themselves update
   406  		// the file, without deadlocking on the scan.
   407  		s.File.mu.Unlock() // N.B. unlock → lock
   408  		defer s.File.mu.Lock()
   409  		return visit(s)
   410  	})
   411  }
   412  
   413  // recScanLocked recursively scans f and all its child nodes in depth-first
   414  // left-to-right order, calling visit for each file.
   415  func (f *File) recScanLocked(ctx context.Context, name string, visit func(ScanItem) bool) error {
   416  	if err := ctx.Err(); err != nil {
   417  		return err
   418  	}
   419  	if !visit(ScanItem{File: f, Name: name}) {
   420  		return nil // skip the descendants of f
   421  	}
   422  	for i, kid := range f.kids {
   423  		fp := kid.File
   424  		if fp == nil {
   425  			// If the child was not already open, we need to do so to scan it, but
   426  			// we won't persist it in the parent unless the visitor invalidated it.
   427  			var err error
   428  			fp, err = Open(ctx, f.s, kid.Key)
   429  			if err != nil {
   430  				return err
   431  			}
   432  		}
   433  		err := func() error {
   434  			fp.mu.Lock()
   435  			defer fp.mu.Unlock()
   436  			return fp.recScanLocked(ctx, kid.Name, visit)
   437  		}()
   438  		if err != nil {
   439  			return err
   440  		}
   441  
   442  		// If scanning invalidated fp, make sure the parent copy is updated.
   443  		// This ensures the parent will include these changes in a flush.
   444  		if fp.key == "" {
   445  			f.kids[i].File = fp
   446  		}
   447  	}
   448  	return nil
   449  }
   450  
   451  // Cursor binds f with a context so that it can be used to satisfy the standard
   452  // interfaces defined by the io package.  The resulting cursor may be used only
   453  // during the lifetime of the request whose context it binds.
   454  func (f *File) Cursor(ctx context.Context) *Cursor { return &Cursor{ctx: ctx, file: f} }
   455  
   456  // XAttr returns a view of the extended attributes of f.
   457  func (f *File) XAttr() XAttr { return XAttr{f: f} }
   458  
   459  // Precondition: The caller holds f.mu exclusively, or has the only reference to f.
   460  func (f *File) fromWireType(obj *wiretype.Object) error {
   461  	pb, ok := obj.Value.(*wiretype.Object_Node)
   462  	if !ok {
   463  		return errors.New("object does not contain a node")
   464  	}
   465  
   466  	pb.Node.Normalize()
   467  	f.data = fileData{} // reset
   468  	if err := f.data.fromWireType(pb.Node.Index); err != nil {
   469  		return fmt.Errorf("index: %w", err)
   470  	}
   471  	f.stat.fromWireType(pb.Node.Stat)
   472  	f.saveStat = pb.Node.Stat != nil
   473  
   474  	f.xattr = make(map[string]string)
   475  	for _, xa := range pb.Node.XAttrs {
   476  		f.xattr[xa.Name] = string(xa.Value)
   477  	}
   478  
   479  	f.kids = nil
   480  	for _, kid := range pb.Node.Children {
   481  		f.kids = append(f.kids, child{
   482  			Name: kid.Name,
   483  			Key:  string(kid.Key),
   484  		})
   485  	}
   486  	return nil
   487  }
   488  
   489  func (f *File) toWireTypeLocked() *wiretype.Object {
   490  	n := &wiretype.Node{Index: f.data.toWireType()}
   491  	if f.saveStat {
   492  		n.Stat = f.stat.toWireType()
   493  	}
   494  	for name, value := range f.xattr {
   495  		n.XAttrs = append(n.XAttrs, &wiretype.XAttr{
   496  			Name:  name,
   497  			Value: []byte(value),
   498  		})
   499  	}
   500  	for _, kid := range f.kids {
   501  		n.Children = append(n.Children, &wiretype.Child{
   502  			Name: kid.Name,
   503  			Key:  []byte(kid.Key),
   504  		})
   505  	}
   506  	n.Normalize()
   507  	return &wiretype.Object{Value: &wiretype.Object_Node{Node: n}}
   508  }
   509  
   510  // Encode translates f as a protobuf message for storage.
   511  func Encode(f *File) *wiretype.Object {
   512  	f.mu.RLock()
   513  	defer f.mu.RUnlock()
   514  	return f.toWireTypeLocked()
   515  }