github.com/grailbio/base@v0.0.11/file/gfilefs/gfile.go (about)

     1  // Copyright 2022 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package gfilefs
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"os"
    14  	"time"
    15  
    16  	"github.com/grailbio/base/errors"
    17  	"github.com/grailbio/base/file"
    18  	"github.com/grailbio/base/file/fsnodefuse"
    19  	"github.com/grailbio/base/file/internal/readmatcher"
    20  	"github.com/grailbio/base/ioctx"
    21  	"github.com/grailbio/base/ioctx/fsctx"
    22  	"github.com/grailbio/base/sync/ctxsync"
    23  	"github.com/hanwen/go-fuse/v2/fuse"
    24  )
    25  
    26  // gfile implements fsctx.File and fsnodefuse.Writable to represent open
    27  // gfilefs files.
    28  type gfile struct {
    29  	// n is the node for which this instance is an open file.
    30  	n *fileNode
    31  	// flag holds the flag bits specified when this file was opened.
    32  	flag int
    33  
    34  	// readerAt is an optional ReaderAt implementation. It may only be set upon
    35  	// construction, and must not be modified later. Thus, it can be read by
    36  	// multiple goroutines without holding the lock, without a data race.
    37  	// When non-nil, it serves ReadAt requests concurrently, without ops.
    38  	// Otherwise, gfile.ReadAt uses ops.ReadAt.
    39  	readerAt ioctx.ReaderAt
    40  
    41  	// mu provides mutually exclusive access to the fields below.
    42  	mu ctxsync.Mutex
    43  	// requestedSize is the size requested by Truncate.  Note that we only
    44  	// really support truncation to 0 well, as it is mainly used by go-fuse for
    45  	// truncation when handling O_TRUNC.
    46  	requestedSize int64
    47  	// flushed is true if there are no writes that need to be flushed.  If
    48  	// flushed == true, Flush is a no-op.
    49  	flushed bool
    50  	// anyWritten tracks whether we have written any bytes to this file.  We
    51  	// use this to decide whether we can use direct writing.
    52  	anyWritten bool
    53  	// ops handles underlying I/O operations.  See ioOps.  ops may be lazily
    54  	// populated, and it may be reassigned over the lifetime of the file, e.g.
    55  	// after truncation, we may switch to an ops that no longer uses a
    56  	// temporary file.
    57  	ops ioOps
    58  }
    59  
    60  var (
    61  	_ fsctx.File          = (*gfile)(nil)
    62  	_ ioctx.ReaderAt      = (*gfile)(nil)
    63  	_ fsnodefuse.Writable = (*gfile)(nil)
    64  )
    65  
    66  // OpenFile opens the file at n and returns a *gfile representing it for file
    67  // operations.
    68  func OpenFile(ctx context.Context, n *fileNode, flag int) (*gfile, error) {
    69  	gf := &gfile{
    70  		n:             n,
    71  		flag:          flag,
    72  		requestedSize: -1,
    73  		// Creation and truncation require flushing.
    74  		flushed: (flag&os.O_CREATE) == 0 && (flag&os.O_TRUNC) == 0,
    75  	}
    76  	if (flag & int(fuse.O_ANYWRITE)) == 0 {
    77  		// Read-only files are initialized eagerly, as it is cheap, and we can
    78  		// immediately return any errors.  Writable files are initialized
    79  		// lazily; see lockedInitOps.
    80  		f, err := file.Open(ctx, n.path)
    81  		if err != nil {
    82  			return nil, err
    83  		}
    84  		dr := directRead{
    85  			f:       f,
    86  			matcher: readmatcher.New(f.OffsetReader),
    87  			r:       f.Reader(context.Background()), // TODO: Tie to gf lifetime?
    88  		}
    89  		gf.ops = &dr
    90  		gf.readerAt = dr.matcher
    91  		return gf, nil
    92  	}
    93  	return gf, nil
    94  }
    95  
    96  // Stat implements fsctx.File.
    97  func (gf *gfile) Stat(ctx context.Context) (os.FileInfo, error) {
    98  	if err := gf.mu.Lock(ctx); err != nil {
    99  		return nil, err
   100  	}
   101  	defer gf.mu.Unlock()
   102  	if err := gf.lockedInitOps(ctx); err != nil {
   103  		return nil, err
   104  	}
   105  	info, err := gf.ops.Stat(ctx)
   106  	if err != nil {
   107  		if errors.Recover(err).Kind == errors.NotSupported {
   108  			return gf.n.Info(), nil
   109  		}
   110  		return nil, errors.E(err, "getting stat info from underlying I/O")
   111  	}
   112  	newInfo := gf.n.fsnodeInfo().
   113  		WithModTime(info.ModTime()).
   114  		WithSize(info.Size())
   115  	gf.n.setFsnodeInfo(newInfo)
   116  	return newInfo, nil
   117  }
   118  
   119  // Read implements fsctx.File.
   120  func (gf *gfile) Read(ctx context.Context, p []byte) (int, error) {
   121  	if err := gf.mu.Lock(ctx); err != nil {
   122  		return 0, err
   123  	}
   124  	defer gf.mu.Unlock()
   125  	if err := gf.lockedInitOps(ctx); err != nil {
   126  		return 0, err
   127  	}
   128  	return gf.ops.Read(ctx, p)
   129  }
   130  
   131  // ReadAt implements ioctx.ReaderAt.
   132  func (gf *gfile) ReadAt(ctx context.Context, p []byte, off int64) (int, error) {
   133  	if gf.readerAt != nil {
   134  		return gf.readerAt.ReadAt(ctx, p, off)
   135  	}
   136  	if err := gf.mu.Lock(ctx); err != nil {
   137  		return 0, err
   138  	}
   139  	defer gf.mu.Unlock()
   140  	if err := gf.lockedInitOps(ctx); err != nil {
   141  		return 0, err
   142  	}
   143  	return gf.ops.ReadAt(ctx, p, off)
   144  }
   145  
   146  // WriteAt implements fsnodefuse.Writable.
   147  func (gf *gfile) WriteAt(ctx context.Context, p []byte, off int64) (int, error) {
   148  	if err := gf.mu.Lock(ctx); err != nil {
   149  		return 0, err
   150  	}
   151  	defer gf.mu.Unlock()
   152  	if err := gf.lockedInitOps(ctx); err != nil {
   153  		return 0, err
   154  	}
   155  	n, err := gf.ops.WriteAt(ctx, p, off)
   156  	if err != nil {
   157  		return n, err
   158  	}
   159  	gf.anyWritten = true
   160  	gf.flushed = false
   161  	return n, err
   162  }
   163  
   164  // Truncate implements fsnodefuse.Writable.
   165  func (gf *gfile) Truncate(ctx context.Context, size int64) error {
   166  	if err := gf.mu.Lock(ctx); err != nil {
   167  		return err
   168  	}
   169  	defer gf.mu.Unlock()
   170  	gf.flushed = false
   171  	if gf.ops == nil {
   172  		gf.requestedSize = 0
   173  		return nil
   174  	}
   175  	return gf.ops.Truncate(ctx, size)
   176  }
   177  
   178  // Flush implements fsnodefuse.Writable.
   179  func (gf *gfile) Flush(ctx context.Context) error {
   180  	if err := gf.mu.Lock(ctx); err != nil {
   181  		return err
   182  	}
   183  	defer gf.mu.Unlock()
   184  	return gf.lockedFlush()
   185  }
   186  
   187  // Fsync implements fsnodefuse.Writable.
   188  func (gf *gfile) Fsync(ctx context.Context) error {
   189  	// We treat Fsync as Flush, mostly because leaving it unimplemented
   190  	// (ENOSYS) breaks too many applications.
   191  	return gf.Flush(ctx)
   192  }
   193  
   194  // Close implements fsctx.File.
   195  func (gf *gfile) Close(ctx context.Context) error {
   196  	if err := gf.mu.Lock(ctx); err != nil {
   197  		return err
   198  	}
   199  	defer gf.mu.Unlock()
   200  	if gf.ops == nil {
   201  		return nil
   202  	}
   203  	return gf.ops.Close(ctx)
   204  }
   205  
   206  // lockedInitOps initializes the ops that handle the underlying I/O operations
   207  // of gf.  This is done lazily in some cases, as it may be expensive, e.g.
   208  // downloading a remotely stored file locally.  Initialization may also depend
   209  // on other operations, e.g. if the first manipulation is truncation, then we
   210  // won't download existing data.  gf.ops is non-nil iff lockedInitOps returns a
   211  // nil error.  The caller must have gf.mu locked.
   212  func (gf *gfile) lockedInitOps(ctx context.Context) (err error) {
   213  	if gf.ops != nil {
   214  		return nil
   215  	}
   216  	// base/file does not expose an API to open a file for writing without
   217  	// creating it, so writing implies creation.
   218  	const tmpPattern = "gfilefs-"
   219  	var (
   220  		rdwr = (gf.flag & os.O_RDWR) == os.O_RDWR
   221  		// Treat O_EXCL as O_TRUNC, as the file package does not support
   222  		// O_EXCL.
   223  		trunc = !gf.anyWritten &&
   224  			(gf.requestedSize == 0 ||
   225  				(gf.flag&os.O_TRUNC) == os.O_TRUNC ||
   226  				(gf.flag&os.O_EXCL) == os.O_EXCL)
   227  	)
   228  	switch {
   229  	case trunc && rdwr:
   230  		tmp, err := ioutil.TempFile("", tmpPattern)
   231  		if err != nil {
   232  			return errors.E(err, "making temp file")
   233  		}
   234  		gf.ops = &tmpIO{n: gf.n, f: tmp}
   235  		return nil
   236  	case trunc:
   237  		f, err := file.Create(ctx, gf.n.path)
   238  		if err != nil {
   239  			return errors.E(err, fmt.Sprintf("creating file at %q", gf.n.path))
   240  		}
   241  		// This is a workaround for the fact that directWrite ops do not
   242  		// support Stat (as write-only s3files do not support Stat).  Callers,
   243  		// e.g. fsnodefuse, may fall back to use the node's information, so we
   244  		// zero that to keep a sensible view.
   245  		gf.n.setFsnodeInfo(gf.n.fsnodeInfo().WithSize(0))
   246  		gf.ops = &directWrite{
   247  			n:   gf.n,
   248  			f:   f,
   249  			w:   f.Writer(context.Background()), // TODO: Tie to gf lifetime?
   250  			off: 0,
   251  		}
   252  		return nil
   253  	default:
   254  		// existing reads out existing file contents.  Contents may be empty if
   255  		// no file exists yet.
   256  		var existing io.Reader
   257  		f, err := file.Open(ctx, gf.n.path)
   258  		if err == nil {
   259  			existing = f.Reader(ctx)
   260  		} else {
   261  			if errors.Is(errors.NotExist, err) {
   262  				if !rdwr {
   263  					// Write-only and no existing file, so we can use direct
   264  					// I/O.
   265  					f, err = file.Create(ctx, gf.n.path)
   266  					if err != nil {
   267  						return errors.E(err, fmt.Sprintf("creating file at %q", gf.n.path))
   268  					}
   269  					gf.ops = &directWrite{
   270  						n:   gf.n,
   271  						f:   f,
   272  						w:   f.Writer(context.Background()), // TODO: Tie to gf lifetime?
   273  						off: 0,
   274  					}
   275  					return nil
   276  				}
   277  				// No existing file, so there are no existing contents.
   278  				err = nil
   279  				existing = &bytes.Buffer{}
   280  			} else {
   281  				return errors.E(err, fmt.Sprintf("opening file for %q", gf.n.path))
   282  			}
   283  		}
   284  		tmp, err := ioutil.TempFile("", tmpPattern)
   285  		if err != nil {
   286  			// fp was opened for reading, so don't worry about the error on
   287  			// Close.
   288  			_ = f.Close(ctx)
   289  			return errors.E(err, "making temp file")
   290  		}
   291  		_, err = io.Copy(tmp, existing)
   292  		if err != nil {
   293  			// We're going to report the copy error, so we treat closing as
   294  			// best-effort.
   295  			_ = f.Close(ctx)
   296  			_ = tmp.Close()
   297  			return errors.E(err, fmt.Sprintf("copying current contents to temp file %q", tmp.Name()))
   298  		}
   299  		gf.ops = &tmpIO{n: gf.n, f: tmp}
   300  		return nil
   301  	}
   302  }
   303  
   304  // lockedFlush flushes writes to the backing write I/O state.  The caller must
   305  // have gf.mu locked.
   306  func (gf *gfile) lockedFlush() (err error) {
   307  	// We use a background context when flushing as a workaround for handling
   308  	// interrupted operations, particularly from Go clients.  As of Go 1.14,
   309  	// slow system calls may see more EINTR errors[1].  While most file
   310  	// operations are automatically retried[2], closing (which results in
   311  	// flushing) is not[3].  Ultimately, clients may see spurious, confusing
   312  	// failures calling (*os.File).Close.  Given that it is extremely uncommon
   313  	// for callers to retry, we ignore interrupts to avoid the confusion.  The
   314  	// significant downside is that intentional interruption, e.g. CTRL-C on a
   315  	// program that is taking too long, is also ignored, so processes can
   316  	// appear hung.
   317  	//
   318  	// TODO: Consider a better way of handling this problem.
   319  	//
   320  	// [1] https://go.dev/doc/go1.14#runtime
   321  	// [2] https://github.com/golang/go/commit/6b420169d798c7ebe733487b56ea5c3fa4aab5ce
   322  	// [3] https://github.com/golang/go/blob/go1.17.8/src/internal/poll/fd_unix.go#L79-L83
   323  	ctx := context.Background()
   324  	if gf.flushed {
   325  		return nil
   326  	}
   327  	defer func() {
   328  		if err == nil {
   329  			gf.flushed = true
   330  		}
   331  	}()
   332  	if (gf.flag & int(fuse.O_ANYWRITE)) != 0 {
   333  		if err = gf.lockedInitOps(ctx); err != nil {
   334  			return err
   335  		}
   336  	}
   337  	reuseOps, err := gf.ops.Flush(ctx)
   338  	if err != nil {
   339  		return err
   340  	}
   341  	if !reuseOps {
   342  		gf.ops = nil
   343  	}
   344  	return nil
   345  }
   346  
   347  // ioOps handles the underlying I/O operations for a *gfile.  Implementations
   348  // may directly call base/file or use a temporary file on local disk until
   349  // flush.
   350  type ioOps interface {
   351  	Stat(ctx context.Context) (file.Info, error)
   352  	Read(ctx context.Context, p []byte) (int, error)
   353  	ReadAt(ctx context.Context, p []byte, off int64) (int, error)
   354  	WriteAt(ctx context.Context, p []byte, off int64) (int, error)
   355  	Truncate(ctx context.Context, size int64) error
   356  	Flush(ctx context.Context) (reuseOps bool, _ error)
   357  	Close(ctx context.Context) error
   358  }
   359  
   360  // directRead implements ioOps.  It reads directly using base/file and does not
   361  // support writes, e.g. to handle O_RDONLY.
   362  type directRead struct {
   363  	f       file.File
   364  	matcher interface {
   365  		ioctx.ReaderAt
   366  		ioctx.Closer
   367  	}
   368  	r io.ReadSeeker
   369  }
   370  
   371  var _ ioOps = (*directRead)(nil)
   372  
   373  func (ops *directRead) Stat(ctx context.Context) (file.Info, error) {
   374  	return ops.f.Stat(ctx)
   375  }
   376  
   377  func (ops *directRead) Read(ctx context.Context, p []byte) (int, error) {
   378  	return ops.r.Read(p)
   379  }
   380  
   381  func (ops *directRead) ReadAt(ctx context.Context, p []byte, off int64) (_ int, err error) {
   382  	return ops.matcher.ReadAt(ctx, p, off)
   383  }
   384  
   385  func (*directRead) WriteAt(ctx context.Context, p []byte, off int64) (int, error) {
   386  	return 0, errors.E(errors.Invalid, "writing read-only file")
   387  }
   388  
   389  func (*directRead) Truncate(ctx context.Context, size int64) error {
   390  	return errors.E(errors.Invalid, "cannot truncate read-only file")
   391  }
   392  
   393  func (*directRead) Flush(ctx context.Context) (reuseOps bool, _ error) {
   394  	return true, nil
   395  }
   396  
   397  func (ops *directRead) Close(ctx context.Context) error {
   398  	err := ops.matcher.Close(ctx)
   399  	errors.CleanUpCtx(ctx, ops.f.Close, &err)
   400  	return err
   401  }
   402  
   403  // directWrite implements ioOps.  It writes directly using base/file and does
   404  // not support reads, e.g. to handle O_WRONLY|O_TRUNC.
   405  type directWrite struct {
   406  	n   *fileNode
   407  	f   file.File
   408  	w   io.Writer
   409  	off int64
   410  }
   411  
   412  var _ ioOps = (*directWrite)(nil)
   413  
   414  func (ops directWrite) Stat(ctx context.Context) (file.Info, error) {
   415  	return ops.f.Stat(ctx)
   416  }
   417  
   418  func (directWrite) Read(ctx context.Context, p []byte) (int, error) {
   419  	return 0, errors.E(errors.Invalid, "reading write-only file")
   420  }
   421  
   422  func (directWrite) ReadAt(ctx context.Context, p []byte, off int64) (int, error) {
   423  	return 0, errors.E(errors.Invalid, "reading write-only file")
   424  }
   425  
   426  func (ops *directWrite) WriteAt(ctx context.Context, p []byte, off int64) (int, error) {
   427  	if off != ops.off {
   428  		return 0, errors.E(errors.NotSupported, "non-contiguous write")
   429  	}
   430  	n, err := ops.w.Write(p)
   431  	ops.off += int64(n)
   432  	return n, err
   433  }
   434  
   435  func (ops directWrite) Truncate(ctx context.Context, size int64) error {
   436  	if ops.off != size {
   437  		return errors.E(errors.NotSupported, "truncating to %d not supported by direct I/O")
   438  	}
   439  	return nil
   440  }
   441  
   442  func (ops *directWrite) Flush(ctx context.Context) (reuseOps bool, _ error) {
   443  	err := ops.f.Close(ctx)
   444  	ops.n.setFsnodeInfo(
   445  		ops.n.fsnodeInfo().
   446  			WithModTime(time.Now()).
   447  			WithSize(ops.off),
   448  	)
   449  	// Clear to catch accidental reuse.
   450  	*ops = directWrite{}
   451  	return false, err
   452  }
   453  
   454  func (ops directWrite) Close(ctx context.Context) error {
   455  	return ops.f.Close(ctx)
   456  }
   457  
   458  // tmpIO implements ioOps.  It is backed by a temporary local file, e.g. to
   459  // handle O_RDWR.
   460  type tmpIO struct {
   461  	n *fileNode
   462  	f *os.File // refers to a file in -tmp-dir.
   463  }
   464  
   465  var _ ioOps = (*tmpIO)(nil)
   466  
   467  func (ops tmpIO) Stat(_ context.Context) (file.Info, error) {
   468  	return ops.f.Stat()
   469  }
   470  
   471  func (ops tmpIO) Read(_ context.Context, p []byte) (int, error) {
   472  	return ops.f.Read(p)
   473  }
   474  
   475  func (ops tmpIO) ReadAt(_ context.Context, p []byte, off int64) (int, error) {
   476  	return ops.f.ReadAt(p, off)
   477  }
   478  
   479  func (ops tmpIO) WriteAt(_ context.Context, p []byte, off int64) (int, error) {
   480  	return ops.f.WriteAt(p, off)
   481  }
   482  
   483  func (ops tmpIO) Truncate(_ context.Context, size int64) error {
   484  	return ops.f.Truncate(size)
   485  }
   486  
   487  func (ops *tmpIO) Flush(ctx context.Context) (reuseOps bool, err error) {
   488  	dst, err := file.Create(ctx, ops.n.path)
   489  	if err != nil {
   490  		return false, errors.E(err, fmt.Sprintf("creating file %q", ops.n.path))
   491  	}
   492  	defer file.CloseAndReport(ctx, dst, &err)
   493  	n, err := io.Copy(dst.Writer(ctx), &readerAdapter{r: ops.f})
   494  	if err != nil {
   495  		return false, errors.E(
   496  			err,
   497  			fmt.Sprintf("copying from %q to %q", ops.f.Name(), ops.n.path),
   498  		)
   499  	}
   500  	ops.n.setFsnodeInfo(
   501  		ops.n.fsnodeInfo().
   502  			WithModTime(time.Now()).
   503  			WithSize(n),
   504  	)
   505  	return true, nil
   506  }
   507  
   508  // readerAdapter adapts an io.ReaderAt to be an io.Reader, calling ReadAt and
   509  // maintaining the offset for the next Read.
   510  type readerAdapter struct {
   511  	r   io.ReaderAt
   512  	off int64
   513  }
   514  
   515  func (a *readerAdapter) Read(p []byte) (int, error) {
   516  	n, err := a.r.ReadAt(p, a.off)
   517  	a.off += int64(n)
   518  	return n, err
   519  }
   520  
   521  func (ops *tmpIO) Close(_ context.Context) error {
   522  	err := ops.f.Close()
   523  	if errRemove := os.Remove(ops.f.Name()); errRemove != nil && err == nil {
   524  		err = errors.E(errRemove, "removing tmpIO file")
   525  	}
   526  	return err
   527  }