github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/special_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"sync/atomic"
    19  
    20  	"golang.org/x/sys/unix"
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/fdnotifier"
    25  	"github.com/SagerNet/gvisor/pkg/metric"
    26  	"github.com/SagerNet/gvisor/pkg/p9"
    27  	"github.com/SagerNet/gvisor/pkg/safemem"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fsmetric"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    30  	"github.com/SagerNet/gvisor/pkg/sync"
    31  	"github.com/SagerNet/gvisor/pkg/syserror"
    32  	"github.com/SagerNet/gvisor/pkg/usermem"
    33  	"github.com/SagerNet/gvisor/pkg/waiter"
    34  )
    35  
    36  // specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
    37  // special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
    38  // in effect) regular files. specialFileFD differs from regularFileFD by using
    39  // per-FD handles instead of shared per-dentry handles, and never buffering I/O.
    40  //
    41  // +stateify savable
    42  type specialFileFD struct {
    43  	fileDescription
    44  
    45  	// handle is used for file I/O. handle is immutable.
    46  	handle handle `state:"nosave"`
    47  
    48  	// isRegularFile is true if this FD represents a regular file which is only
    49  	// possible when filesystemOptions.regularFilesUseSpecialFileFD is in
    50  	// effect. isRegularFile is immutable.
    51  	isRegularFile bool
    52  
    53  	// seekable is true if this file description represents a file for which
    54  	// file offset is significant, i.e. a regular file, character device or
    55  	// block device. seekable is immutable.
    56  	seekable bool
    57  
    58  	// haveQueue is true if this file description represents a file for which
    59  	// queue may send I/O readiness events. haveQueue is immutable.
    60  	haveQueue bool `state:"nosave"`
    61  	queue     waiter.Queue
    62  
    63  	// If seekable is true, off is the file offset. off is protected by mu.
    64  	mu  sync.Mutex `state:"nosave"`
    65  	off int64
    66  
    67  	// If haveBuf is non-zero, this FD represents a pipe, and buf contains data
    68  	// read from the pipe from previous calls to specialFileFD.savePipeData().
    69  	// haveBuf and buf are protected by bufMu. haveBuf is accessed using atomic
    70  	// memory operations.
    71  	bufMu   sync.Mutex `state:"nosave"`
    72  	haveBuf uint32
    73  	buf     []byte
    74  }
    75  
    76  func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
    77  	ftype := d.fileType()
    78  	seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK
    79  	haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && h.fd >= 0
    80  	fd := &specialFileFD{
    81  		handle:        h,
    82  		isRegularFile: ftype == linux.S_IFREG,
    83  		seekable:      seekable,
    84  		haveQueue:     haveQueue,
    85  	}
    86  	fd.LockFD.Init(&d.locks)
    87  	if haveQueue {
    88  		if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil {
    89  			return nil, err
    90  		}
    91  	}
    92  	if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
    93  		DenyPRead:  !seekable,
    94  		DenyPWrite: !seekable,
    95  	}); err != nil {
    96  		if haveQueue {
    97  			fdnotifier.RemoveFD(h.fd)
    98  		}
    99  		return nil, err
   100  	}
   101  	d.fs.syncMu.Lock()
   102  	d.fs.specialFileFDs[fd] = struct{}{}
   103  	d.fs.syncMu.Unlock()
   104  	if fd.vfsfd.IsWritable() && (atomic.LoadUint32(&d.mode)&0111 != 0) {
   105  		metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file")
   106  	}
   107  	if h.fd >= 0 {
   108  		fsmetric.GoferOpensHost.Increment()
   109  	} else {
   110  		fsmetric.GoferOpens9P.Increment()
   111  	}
   112  	return fd, nil
   113  }
   114  
   115  // Release implements vfs.FileDescriptionImpl.Release.
   116  func (fd *specialFileFD) Release(ctx context.Context) {
   117  	if fd.haveQueue {
   118  		fdnotifier.RemoveFD(fd.handle.fd)
   119  	}
   120  	fd.handle.close(ctx)
   121  	fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
   122  	fs.syncMu.Lock()
   123  	delete(fs.specialFileFDs, fd)
   124  	fs.syncMu.Unlock()
   125  }
   126  
   127  // OnClose implements vfs.FileDescriptionImpl.OnClose.
   128  func (fd *specialFileFD) OnClose(ctx context.Context) error {
   129  	if !fd.vfsfd.IsWritable() {
   130  		return nil
   131  	}
   132  	return fd.handle.file.flush(ctx)
   133  }
   134  
   135  // Readiness implements waiter.Waitable.Readiness.
   136  func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   137  	if fd.haveQueue {
   138  		return fdnotifier.NonBlockingPoll(fd.handle.fd, mask)
   139  	}
   140  	return fd.fileDescription.Readiness(mask)
   141  }
   142  
   143  // EventRegister implements waiter.Waitable.EventRegister.
   144  func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   145  	if fd.haveQueue {
   146  		fd.queue.EventRegister(e, mask)
   147  		fdnotifier.UpdateFD(fd.handle.fd)
   148  		return
   149  	}
   150  	fd.fileDescription.EventRegister(e, mask)
   151  }
   152  
   153  // EventUnregister implements waiter.Waitable.EventUnregister.
   154  func (fd *specialFileFD) EventUnregister(e *waiter.Entry) {
   155  	if fd.haveQueue {
   156  		fd.queue.EventUnregister(e)
   157  		fdnotifier.UpdateFD(fd.handle.fd)
   158  		return
   159  	}
   160  	fd.fileDescription.EventUnregister(e)
   161  }
   162  
   163  func (fd *specialFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   164  	if fd.isRegularFile {
   165  		d := fd.dentry()
   166  		return d.doAllocate(ctx, offset, length, func() error {
   167  			return fd.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length)
   168  		})
   169  	}
   170  	return fd.FileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
   171  }
   172  
   173  // PRead implements vfs.FileDescriptionImpl.PRead.
   174  func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   175  	start := fsmetric.StartReadWait()
   176  	defer func() {
   177  		if fd.handle.fd >= 0 {
   178  			fsmetric.GoferReadsHost.Increment()
   179  			fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start)
   180  		} else {
   181  			fsmetric.GoferReads9P.Increment()
   182  			fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start)
   183  		}
   184  	}()
   185  
   186  	if fd.seekable && offset < 0 {
   187  		return 0, linuxerr.EINVAL
   188  	}
   189  
   190  	// Check that flags are supported.
   191  	//
   192  	// TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags.
   193  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   194  		return 0, syserror.EOPNOTSUPP
   195  	}
   196  
   197  	if d := fd.dentry(); d.cachedMetadataAuthoritative() {
   198  		d.touchAtime(fd.vfsfd.Mount())
   199  	}
   200  
   201  	bufN := int64(0)
   202  	if atomic.LoadUint32(&fd.haveBuf) != 0 {
   203  		var err error
   204  		fd.bufMu.Lock()
   205  		if len(fd.buf) != 0 {
   206  			var n int
   207  			n, err = dst.CopyOut(ctx, fd.buf)
   208  			dst = dst.DropFirst(n)
   209  			fd.buf = fd.buf[n:]
   210  			if len(fd.buf) == 0 {
   211  				atomic.StoreUint32(&fd.haveBuf, 0)
   212  				fd.buf = nil
   213  			}
   214  			bufN = int64(n)
   215  			if offset >= 0 {
   216  				offset += bufN
   217  			}
   218  		}
   219  		fd.bufMu.Unlock()
   220  		if err != nil {
   221  			return bufN, err
   222  		}
   223  	}
   224  
   225  	// Going through dst.CopyOutFrom() would hold MM locks around file
   226  	// operations of unknown duration. For regularFileFD, doing so is necessary
   227  	// to support mmap due to lock ordering; MM locks precede dentry.dataMu.
   228  	// That doesn't hold here since specialFileFD doesn't client-cache data.
   229  	// Just buffer the read instead.
   230  	buf := make([]byte, dst.NumBytes())
   231  	n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
   232  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   233  		err = syserror.ErrWouldBlock
   234  	}
   235  	if n == 0 {
   236  		return bufN, err
   237  	}
   238  	if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil {
   239  		return bufN + int64(cp), cperr
   240  	}
   241  	return bufN + int64(n), err
   242  }
   243  
   244  // Read implements vfs.FileDescriptionImpl.Read.
   245  func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   246  	if !fd.seekable {
   247  		return fd.PRead(ctx, dst, -1, opts)
   248  	}
   249  
   250  	fd.mu.Lock()
   251  	n, err := fd.PRead(ctx, dst, fd.off, opts)
   252  	fd.off += n
   253  	fd.mu.Unlock()
   254  	return n, err
   255  }
   256  
   257  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   258  func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   259  	n, _, err := fd.pwrite(ctx, src, offset, opts)
   260  	return n, err
   261  }
   262  
   263  // pwrite returns the number of bytes written, final offset, error. The final
   264  // offset should be ignored by PWrite.
   265  func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
   266  	if fd.seekable && offset < 0 {
   267  		return 0, offset, linuxerr.EINVAL
   268  	}
   269  
   270  	// Check that flags are supported.
   271  	//
   272  	// TODO(github.com/SagerNet/issue/2601): Support select pwritev2 flags.
   273  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   274  		return 0, offset, syserror.EOPNOTSUPP
   275  	}
   276  
   277  	d := fd.dentry()
   278  	if fd.isRegularFile {
   279  		// If the regular file fd was opened with O_APPEND, make sure the file
   280  		// size is updated. There is a possible race here if size is modified
   281  		// externally after metadata cache is updated.
   282  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
   283  			if err := d.updateFromGetattr(ctx); err != nil {
   284  				return 0, offset, err
   285  			}
   286  		}
   287  
   288  		// We need to hold the metadataMu *while* writing to a regular file.
   289  		d.metadataMu.Lock()
   290  		defer d.metadataMu.Unlock()
   291  
   292  		// Set offset to file size if the regular file was opened with O_APPEND.
   293  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
   294  			// Holding d.metadataMu is sufficient for reading d.size.
   295  			offset = int64(d.size)
   296  		}
   297  		limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
   298  		if err != nil {
   299  			return 0, offset, err
   300  		}
   301  		src = src.TakeFirst64(limit)
   302  	}
   303  
   304  	if d.cachedMetadataAuthoritative() {
   305  		if fd.isRegularFile {
   306  			d.touchCMtimeLocked()
   307  		} else {
   308  			d.touchCMtime()
   309  		}
   310  	}
   311  
   312  	// Do a buffered write. See rationale in PRead.
   313  	buf := make([]byte, src.NumBytes())
   314  	copied, copyErr := src.CopyIn(ctx, buf)
   315  	if copied == 0 && copyErr != nil {
   316  		// Only return the error if we didn't get any data.
   317  		return 0, offset, copyErr
   318  	}
   319  	n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset))
   320  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   321  		err = syserror.ErrWouldBlock
   322  	}
   323  	// Update offset if the offset is valid.
   324  	if offset >= 0 {
   325  		offset += int64(n)
   326  	}
   327  	// Update file size for regular files.
   328  	if fd.isRegularFile {
   329  		// d.metadataMu is already locked at this point.
   330  		if uint64(offset) > d.size {
   331  			d.dataMu.Lock()
   332  			defer d.dataMu.Unlock()
   333  			atomic.StoreUint64(&d.size, uint64(offset))
   334  		}
   335  	}
   336  	if err != nil {
   337  		return int64(n), offset, err
   338  	}
   339  	return int64(n), offset, copyErr
   340  }
   341  
   342  // Write implements vfs.FileDescriptionImpl.Write.
   343  func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   344  	if !fd.seekable {
   345  		return fd.PWrite(ctx, src, -1, opts)
   346  	}
   347  
   348  	fd.mu.Lock()
   349  	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
   350  	fd.off = off
   351  	fd.mu.Unlock()
   352  	return n, err
   353  }
   354  
   355  // Seek implements vfs.FileDescriptionImpl.Seek.
   356  func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   357  	if !fd.seekable {
   358  		return 0, linuxerr.ESPIPE
   359  	}
   360  	fd.mu.Lock()
   361  	defer fd.mu.Unlock()
   362  	newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence)
   363  	if err != nil {
   364  		return 0, err
   365  	}
   366  	fd.off = newOffset
   367  	return newOffset, nil
   368  }
   369  
   370  // Sync implements vfs.FileDescriptionImpl.Sync.
   371  func (fd *specialFileFD) Sync(ctx context.Context) error {
   372  	return fd.sync(ctx, false /* forFilesystemSync */)
   373  }
   374  
   375  func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error {
   376  	err := func() error {
   377  		// If we have a host FD, fsyncing it is likely to be faster than an fsync
   378  		// RPC.
   379  		if fd.handle.fd >= 0 {
   380  			ctx.UninterruptibleSleepStart(false)
   381  			err := unix.Fsync(int(fd.handle.fd))
   382  			ctx.UninterruptibleSleepFinish(false)
   383  			return err
   384  		}
   385  		return fd.handle.file.fsync(ctx)
   386  	}()
   387  	if err != nil {
   388  		if !forFilesystemSync {
   389  			return err
   390  		}
   391  		// Only return err if we can reasonably have expected sync to succeed
   392  		// (fd represents a regular file that was opened for writing).
   393  		if fd.isRegularFile && fd.vfsfd.IsWritable() {
   394  			return err
   395  		}
   396  		ctx.Debugf("gofer.specialFileFD.sync: syncing non-writable or non-regular-file FD failed: %v", err)
   397  	}
   398  	return nil
   399  }