github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/file.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"math"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/amutex"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/refs"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fs/lock"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fsmetric"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/limits"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/uniqueid"
    29  	"github.com/SagerNet/gvisor/pkg/sync"
    30  	"github.com/SagerNet/gvisor/pkg/syserror"
    31  	"github.com/SagerNet/gvisor/pkg/usermem"
    32  	"github.com/SagerNet/gvisor/pkg/waiter"
    33  )
    34  
    35  // FileMaxOffset is the maximum possible file offset.
    36  const FileMaxOffset = math.MaxInt64
    37  
    38  // File is an open file handle. It is thread-safe.
    39  //
    40  // File provides stronger synchronization guarantees than Linux. Linux
    41  // synchronizes lseek(2), read(2), and write(2) with respect to the file
    42  // offset for regular files and only for those interfaces. See
    43  // fs/read_write.c:fdget_pos, fs.read_write.c:fdput_pos and FMODE_ATOMIC_POS.
    44  //
    45  // In contrast, File synchronizes any operation that could take a long time
    46  // under a single abortable mutex which also synchronizes lseek(2), read(2),
    47  // and write(2).
    48  //
    49  // FIXME(b/38451980): Split synchronization from cancellation.
    50  //
    51  // +stateify savable
    52  type File struct {
    53  	refs.AtomicRefCount
    54  
    55  	// UniqueID is the globally unique identifier of the File.
    56  	UniqueID uint64
    57  
    58  	// Dirent is the Dirent backing this File. This encodes the name
    59  	// of the File via Dirent.FullName() as well as its identity via the
    60  	// Dirent's Inode. The Dirent is non-nil.
    61  	//
    62  	// A File holds a reference to this Dirent. Using the returned Dirent is
    63  	// only safe as long as a reference on the File is held. The association
    64  	// between a File and a Dirent is immutable.
    65  	//
    66  	// Files that are not parented in a filesystem return a root Dirent
    67  	// that holds a reference to their Inode.
    68  	//
    69  	// The name of the Dirent may reflect parentage if the Dirent is not a
    70  	// root Dirent or the identity of the File on a pseudo filesystem (pipefs,
    71  	// sockfs, etc).
    72  	//
    73  	// Multiple Files may hold a reference to the same Dirent. This is the
    74  	// common case for Files that are parented and maintain consistency with
    75  	// other files via the Dirent cache.
    76  	Dirent *Dirent
    77  
    78  	// flagsMu protects flags and async below.
    79  	flagsMu sync.Mutex `state:"nosave"`
    80  
    81  	// flags are the File's flags. Setting or getting flags is fully atomic
    82  	// and is not protected by mu (below).
    83  	flags FileFlags
    84  
    85  	// async handles O_ASYNC notifications.
    86  	async FileAsync
    87  
    88  	// saving indicates that this file is in the process of being saved.
    89  	saving bool `state:"nosave"`
    90  
    91  	// mu is dual-purpose: first, to make read(2) and write(2) thread-safe
    92  	// in conformity with POSIX, and second, to cancel operations before they
    93  	// begin in response to interruptions (i.e. signals).
    94  	mu amutex.AbortableMutex `state:"nosave"`
    95  
    96  	// FileOperations implements file system specific behavior for this File.
    97  	FileOperations FileOperations `state:"wait"`
    98  
    99  	// offset is the File's offset. Updating offset is protected by mu but
   100  	// can be read atomically via File.Offset() outside of mu.
   101  	offset int64
   102  }
   103  
   104  // NewFile returns a File. It takes a reference on the Dirent and owns the
   105  // lifetime of the FileOperations. Files that do not support reading and
   106  // writing at an arbitrary offset should set flags.Pread and flags.Pwrite
   107  // to false respectively.
   108  func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOperations) *File {
   109  	dirent.IncRef()
   110  	f := File{
   111  		UniqueID:       uniqueid.GlobalFromContext(ctx),
   112  		Dirent:         dirent,
   113  		FileOperations: fops,
   114  		flags:          flags,
   115  	}
   116  	f.mu.Init()
   117  	f.EnableLeakCheck("fs.File")
   118  	return &f
   119  }
   120  
   121  // DecRef destroys the File when it is no longer referenced.
   122  func (f *File) DecRef(ctx context.Context) {
   123  	f.DecRefWithDestructor(ctx, func(context.Context) {
   124  		// Drop BSD style locks.
   125  		lockRng := lock.LockRange{Start: 0, End: lock.LockEOF}
   126  		f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng)
   127  
   128  		// Release resources held by the FileOperations.
   129  		f.FileOperations.Release(ctx)
   130  
   131  		// Release a reference on the Dirent.
   132  		f.Dirent.DecRef(ctx)
   133  
   134  		// Only unregister if we are currently registered. There is nothing
   135  		// to register if f.async is nil (this happens when async mode is
   136  		// enabled without setting an owner). Also, we unregister during
   137  		// save.
   138  		f.flagsMu.Lock()
   139  		if !f.saving && f.flags.Async && f.async != nil {
   140  			f.async.Unregister(f)
   141  		}
   142  		f.async = nil
   143  		f.flagsMu.Unlock()
   144  	})
   145  }
   146  
   147  // Flags atomically loads the File's flags.
   148  func (f *File) Flags() FileFlags {
   149  	f.flagsMu.Lock()
   150  	flags := f.flags
   151  	f.flagsMu.Unlock()
   152  	return flags
   153  }
   154  
   155  // SetFlags atomically changes the File's flags to the values contained
   156  // in newFlags. See SettableFileFlags for values that can be set.
   157  func (f *File) SetFlags(newFlags SettableFileFlags) {
   158  	f.flagsMu.Lock()
   159  	f.flags.Direct = newFlags.Direct
   160  	f.flags.NonBlocking = newFlags.NonBlocking
   161  	f.flags.Append = newFlags.Append
   162  	if f.async != nil {
   163  		if newFlags.Async && !f.flags.Async {
   164  			f.async.Register(f)
   165  		}
   166  		if !newFlags.Async && f.flags.Async {
   167  			f.async.Unregister(f)
   168  		}
   169  	}
   170  	f.flags.Async = newFlags.Async
   171  	f.flagsMu.Unlock()
   172  }
   173  
   174  // Offset atomically loads the File's offset.
   175  func (f *File) Offset() int64 {
   176  	return atomic.LoadInt64(&f.offset)
   177  }
   178  
   179  // Readiness implements waiter.Waitable.Readiness.
   180  func (f *File) Readiness(mask waiter.EventMask) waiter.EventMask {
   181  	return f.FileOperations.Readiness(mask)
   182  }
   183  
   184  // EventRegister implements waiter.Waitable.EventRegister.
   185  func (f *File) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   186  	f.FileOperations.EventRegister(e, mask)
   187  }
   188  
   189  // EventUnregister implements waiter.Waitable.EventUnregister.
   190  func (f *File) EventUnregister(e *waiter.Entry) {
   191  	f.FileOperations.EventUnregister(e)
   192  }
   193  
   194  // Seek calls f.FileOperations.Seek with f as the File, updating the file
   195  // offset to the value returned by f.FileOperations.Seek if the operation
   196  // is successful.
   197  //
   198  // Returns syserror.ErrInterrupted if seeking was interrupted.
   199  func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64, error) {
   200  	if !f.mu.Lock(ctx) {
   201  		return 0, syserror.ErrInterrupted
   202  	}
   203  	defer f.mu.Unlock()
   204  
   205  	newOffset, err := f.FileOperations.Seek(ctx, f, whence, offset)
   206  	if err == nil {
   207  		atomic.StoreInt64(&f.offset, newOffset)
   208  	}
   209  	return newOffset, err
   210  }
   211  
   212  // Readdir reads the directory entries of this File and writes them out
   213  // to the DentrySerializer until entries can no longer be written. If even
   214  // a single directory entry is written then Readdir returns a nil error
   215  // and the directory offset is advanced.
   216  //
   217  // Readdir unconditionally updates the access time on the File's Inode,
   218  // see fs/readdir.c:iterate_dir.
   219  //
   220  // Returns syserror.ErrInterrupted if reading was interrupted.
   221  func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error {
   222  	if !f.mu.Lock(ctx) {
   223  		return syserror.ErrInterrupted
   224  	}
   225  	defer f.mu.Unlock()
   226  
   227  	offset, err := f.FileOperations.Readdir(ctx, f, serializer)
   228  	atomic.StoreInt64(&f.offset, offset)
   229  	return err
   230  }
   231  
   232  // Readv calls f.FileOperations.Read with f as the File, advancing the file
   233  // offset if f.FileOperations.Read returns bytes read > 0.
   234  //
   235  // Returns syserror.ErrInterrupted if reading was interrupted.
   236  func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error) {
   237  	start := fsmetric.StartReadWait()
   238  	defer fsmetric.FinishReadWait(fsmetric.ReadWait, start)
   239  
   240  	if !f.mu.Lock(ctx) {
   241  		return 0, syserror.ErrInterrupted
   242  	}
   243  
   244  	fsmetric.Reads.Increment()
   245  	n, err := f.FileOperations.Read(ctx, f, dst, f.offset)
   246  	if n > 0 && !f.flags.NonSeekable {
   247  		atomic.AddInt64(&f.offset, n)
   248  	}
   249  	f.mu.Unlock()
   250  	return n, err
   251  }
   252  
   253  // Preadv calls f.FileOperations.Read with f as the File. It does not
   254  // advance the file offset. If !f.Flags().Pread, Preadv should not be
   255  // called.
   256  //
   257  // Otherwise same as Readv.
   258  func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) {
   259  	start := fsmetric.StartReadWait()
   260  	defer fsmetric.FinishReadWait(fsmetric.ReadWait, start)
   261  
   262  	if !f.mu.Lock(ctx) {
   263  		return 0, syserror.ErrInterrupted
   264  	}
   265  
   266  	fsmetric.Reads.Increment()
   267  	n, err := f.FileOperations.Read(ctx, f, dst, offset)
   268  	f.mu.Unlock()
   269  	return n, err
   270  }
   271  
   272  // Writev calls f.FileOperations.Write with f as the File, advancing the
   273  // file offset if f.FileOperations.Write returns bytes written > 0.
   274  //
   275  // Writev positions the write offset at EOF if f.Flags().Append. This is
   276  // unavoidably racy for network file systems. Writev also truncates src
   277  // to avoid overrunning the current file size limit if necessary.
   278  //
   279  // Returns syserror.ErrInterrupted if writing was interrupted.
   280  func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error) {
   281  	if !f.mu.Lock(ctx) {
   282  		return 0, syserror.ErrInterrupted
   283  	}
   284  	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
   285  	// Handle append mode.
   286  	if f.Flags().Append {
   287  		if err := f.offsetForAppend(ctx, &f.offset); err != nil {
   288  			unlockAppendMu()
   289  			f.mu.Unlock()
   290  			return 0, err
   291  		}
   292  	}
   293  
   294  	// Enforce file limits.
   295  	limit, ok := f.checkLimit(ctx, f.offset)
   296  	switch {
   297  	case ok && limit == 0:
   298  		unlockAppendMu()
   299  		f.mu.Unlock()
   300  		return 0, syserror.ErrExceedsFileSizeLimit
   301  	case ok:
   302  		src = src.TakeFirst64(limit)
   303  	}
   304  
   305  	// We must hold the lock during the write.
   306  	n, err := f.FileOperations.Write(ctx, f, src, f.offset)
   307  	if n >= 0 && !f.flags.NonSeekable {
   308  		atomic.StoreInt64(&f.offset, f.offset+n)
   309  	}
   310  	unlockAppendMu()
   311  	f.mu.Unlock()
   312  	return n, err
   313  }
   314  
   315  // Pwritev calls f.FileOperations.Write with f as the File. It does not
   316  // advance the file offset. If !f.Flags().Pwritev, Pwritev should not be
   317  // called.
   318  //
   319  // Otherwise same as Writev.
   320  func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
   321  	// "POSIX requires that opening a file with the O_APPEND flag should
   322  	// have no effect on the location at which pwrite() writes data.
   323  	// However, on Linux, if a file is opened with O_APPEND,  pwrite()
   324  	// appends data to the end of the file, regardless of the value of
   325  	// offset."
   326  	unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append)
   327  	defer unlockAppendMu()
   328  	if f.Flags().Append {
   329  		if err := f.offsetForAppend(ctx, &offset); err != nil {
   330  			return 0, err
   331  		}
   332  	}
   333  
   334  	// Enforce file limits.
   335  	limit, ok := f.checkLimit(ctx, offset)
   336  	switch {
   337  	case ok && limit == 0:
   338  		return 0, syserror.ErrExceedsFileSizeLimit
   339  	case ok:
   340  		src = src.TakeFirst64(limit)
   341  	}
   342  
   343  	return f.FileOperations.Write(ctx, f, src, offset)
   344  }
   345  
   346  // offsetForAppend atomically sets the given offset to the end of the file.
   347  //
   348  // Precondition: the file.Dirent.Inode.appendMu mutex should be held for
   349  // writing.
   350  func (f *File) offsetForAppend(ctx context.Context, offset *int64) error {
   351  	uattr, err := f.Dirent.Inode.UnstableAttr(ctx)
   352  	if err != nil {
   353  		// This is an odd error, we treat it as evidence that
   354  		// something is terribly wrong with the filesystem.
   355  		return syserror.EIO
   356  	}
   357  
   358  	// Update the offset.
   359  	atomic.StoreInt64(offset, uattr.Size)
   360  
   361  	return nil
   362  }
   363  
   364  // checkLimit checks the offset that the write will be performed at. The
   365  // returned boolean indicates that the write must be limited. The returned
   366  // integer indicates the new maximum write length.
   367  func (f *File) checkLimit(ctx context.Context, offset int64) (int64, bool) {
   368  	if IsRegular(f.Dirent.Inode.StableAttr) {
   369  		// Enforce size limits.
   370  		fileSizeLimit := limits.FromContext(ctx).Get(limits.FileSize).Cur
   371  		if fileSizeLimit <= math.MaxInt64 {
   372  			if offset >= int64(fileSizeLimit) {
   373  				return 0, true
   374  			}
   375  			return int64(fileSizeLimit) - offset, true
   376  		}
   377  	}
   378  
   379  	return 0, false
   380  }
   381  
   382  // Fsync calls f.FileOperations.Fsync with f as the File.
   383  //
   384  // Returns syserror.ErrInterrupted if syncing was interrupted.
   385  func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncType) error {
   386  	if !f.mu.Lock(ctx) {
   387  		return syserror.ErrInterrupted
   388  	}
   389  	defer f.mu.Unlock()
   390  
   391  	return f.FileOperations.Fsync(ctx, f, start, end, syncType)
   392  }
   393  
   394  // Flush calls f.FileOperations.Flush with f as the File.
   395  //
   396  // Returns syserror.ErrInterrupted if syncing was interrupted.
   397  func (f *File) Flush(ctx context.Context) error {
   398  	if !f.mu.Lock(ctx) {
   399  		return syserror.ErrInterrupted
   400  	}
   401  	defer f.mu.Unlock()
   402  
   403  	return f.FileOperations.Flush(ctx, f)
   404  }
   405  
   406  // ConfigureMMap calls f.FileOperations.ConfigureMMap with f as the File.
   407  //
   408  // Returns syserror.ErrInterrupted if interrupted.
   409  func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   410  	if !f.mu.Lock(ctx) {
   411  		return syserror.ErrInterrupted
   412  	}
   413  	defer f.mu.Unlock()
   414  
   415  	return f.FileOperations.ConfigureMMap(ctx, f, opts)
   416  }
   417  
   418  // UnstableAttr calls f.FileOperations.UnstableAttr with f as the File.
   419  //
   420  // Returns syserror.ErrInterrupted if interrupted.
   421  func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) {
   422  	if !f.mu.Lock(ctx) {
   423  		return UnstableAttr{}, syserror.ErrInterrupted
   424  	}
   425  	defer f.mu.Unlock()
   426  
   427  	return f.FileOperations.UnstableAttr(ctx, f)
   428  }
   429  
   430  // MappedName implements memmap.MappingIdentity.MappedName.
   431  func (f *File) MappedName(ctx context.Context) string {
   432  	root := RootFromContext(ctx)
   433  	if root != nil {
   434  		defer root.DecRef(ctx)
   435  	}
   436  	name, _ := f.Dirent.FullName(root)
   437  	return name
   438  }
   439  
   440  // DeviceID implements memmap.MappingIdentity.DeviceID.
   441  func (f *File) DeviceID() uint64 {
   442  	return f.Dirent.Inode.StableAttr.DeviceID
   443  }
   444  
   445  // InodeID implements memmap.MappingIdentity.InodeID.
   446  func (f *File) InodeID() uint64 {
   447  	return f.Dirent.Inode.StableAttr.InodeID
   448  }
   449  
   450  // Msync implements memmap.MappingIdentity.Msync.
   451  func (f *File) Msync(ctx context.Context, mr memmap.MappableRange) error {
   452  	return f.Fsync(ctx, int64(mr.Start), int64(mr.End-1), SyncData)
   453  }
   454  
   455  // A FileAsync sends signals to its owner when w is ready for IO.
   456  type FileAsync interface {
   457  	Register(w waiter.Waitable)
   458  	Unregister(w waiter.Waitable)
   459  }
   460  
   461  // Async gets the stored FileAsync or creates a new one with the supplied
   462  // function. If the supplied function is nil, no FileAsync is created and the
   463  // current value is returned.
   464  func (f *File) Async(newAsync func() FileAsync) FileAsync {
   465  	f.flagsMu.Lock()
   466  	defer f.flagsMu.Unlock()
   467  	if f.async == nil && newAsync != nil {
   468  		f.async = newAsync()
   469  		if f.flags.Async {
   470  			f.async.Register(f)
   471  		}
   472  	}
   473  	return f.async
   474  }
   475  
   476  // lockedReader implements io.Reader and io.ReaderAt.
   477  //
   478  // Note this reads the underlying file using the file operations directly. It
   479  // is the responsibility of the caller to ensure that locks are appropriately
   480  // held and offsets updated if required. This should be used only by internal
   481  // functions that perform these operations and checks at other times.
   482  type lockedReader struct {
   483  	// Ctx is the context for the file reader.
   484  	Ctx context.Context
   485  
   486  	// File is the file to read from.
   487  	File *File
   488  
   489  	// Offset is the offset to start at.
   490  	//
   491  	// This applies only to Read, not ReadAt.
   492  	Offset int64
   493  }
   494  
   495  // Read implements io.Reader.Read.
   496  func (r *lockedReader) Read(buf []byte) (int, error) {
   497  	if r.Ctx.Interrupted() {
   498  		return 0, syserror.ErrInterrupted
   499  	}
   500  	n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), r.Offset)
   501  	r.Offset += n
   502  	return int(n), err
   503  }
   504  
   505  // ReadAt implements io.Reader.ReadAt.
   506  func (r *lockedReader) ReadAt(buf []byte, offset int64) (int, error) {
   507  	if r.Ctx.Interrupted() {
   508  		return 0, syserror.ErrInterrupted
   509  	}
   510  	n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), offset)
   511  	return int(n), err
   512  }
   513  
   514  // lockedWriter implements io.Writer and io.WriterAt.
   515  //
   516  // The same constraints as lockedReader apply; see above.
   517  type lockedWriter struct {
   518  	// Ctx is the context for the file writer.
   519  	Ctx context.Context
   520  
   521  	// File is the file to write to.
   522  	File *File
   523  
   524  	// Offset is the offset to start at.
   525  	//
   526  	// This applies only to Write, not WriteAt.
   527  	Offset int64
   528  }
   529  
   530  // Write implements io.Writer.Write.
   531  func (w *lockedWriter) Write(buf []byte) (int, error) {
   532  	if w.Ctx.Interrupted() {
   533  		return 0, syserror.ErrInterrupted
   534  	}
   535  	n, err := w.WriteAt(buf, w.Offset)
   536  	w.Offset += int64(n)
   537  	return int(n), err
   538  }
   539  
   540  // WriteAt implements io.Writer.WriteAt.
   541  func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) {
   542  	var (
   543  		written int
   544  		err     error
   545  	)
   546  	// The io.Writer contract requires that Write writes all available
   547  	// bytes and does not return short writes. This causes errors with
   548  	// io.Copy, since our own Write interface does not have this same
   549  	// contract. Enforce that here.
   550  	for written < len(buf) {
   551  		if w.Ctx.Interrupted() {
   552  			return written, syserror.ErrInterrupted
   553  		}
   554  		var n int64
   555  		n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written))
   556  		if n > 0 {
   557  			written += int(n)
   558  		}
   559  		if err != nil {
   560  			break
   561  		}
   562  	}
   563  	return written, err
   564  }