gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/gofer/special_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"gvisor.dev/gvisor/pkg/abi/linux"
    21  	"gvisor.dev/gvisor/pkg/atomicbitops"
    22  	"gvisor.dev/gvisor/pkg/context"
    23  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    24  	"gvisor.dev/gvisor/pkg/fdnotifier"
    25  	"gvisor.dev/gvisor/pkg/hostarch"
    26  	"gvisor.dev/gvisor/pkg/metric"
    27  	"gvisor.dev/gvisor/pkg/safemem"
    28  	"gvisor.dev/gvisor/pkg/sentry/fsmetric"
    29  	"gvisor.dev/gvisor/pkg/sentry/fsutil"
    30  	"gvisor.dev/gvisor/pkg/sentry/memmap"
    31  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    32  	"gvisor.dev/gvisor/pkg/sync"
    33  	"gvisor.dev/gvisor/pkg/usermem"
    34  	"gvisor.dev/gvisor/pkg/waiter"
    35  )
    36  
    37  // specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
    38  // special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
    39  // in effect) regular files. specialFileFD differs from regularFileFD by using
    40  // per-FD handles instead of shared per-dentry handles, and never buffering I/O.
    41  //
    42  // +stateify savable
    43  type specialFileFD struct {
    44  	fileDescription
    45  	specialFDEntry
    46  	memmap.NoBufferedIOFallback
    47  
    48  	// releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe
    49  	// to access fd.handle without locking for operations that require a ref to
    50  	// be held by the caller, e.g. vfs.FileDescriptionImpl implementations.
    51  	releaseMu sync.RWMutex `state:"nosave"`
    52  
    53  	// handle is used for file I/O. handle is immutable.
    54  	handle handle `state:"nosave"`
    55  
    56  	// isRegularFile is true if this FD represents a regular file which is only
    57  	// possible when filesystemOptions.regularFilesUseSpecialFileFD is in
    58  	// effect. isRegularFile is immutable.
    59  	isRegularFile bool
    60  
    61  	// seekable is true if this file description represents a file for which
    62  	// file offset is significant, i.e. a regular file, character device or
    63  	// block device. seekable is immutable.
    64  	seekable bool
    65  
    66  	// haveQueue is true if this file description represents a file for which
    67  	// queue may send I/O readiness events. haveQueue is immutable.
    68  	haveQueue bool `state:"nosave"`
    69  	queue     waiter.Queue
    70  
    71  	// If seekable is true, off is the file offset. off is protected by mu.
    72  	mu  sync.Mutex `state:"nosave"`
    73  	off int64
    74  
    75  	// If haveBuf is non-zero, this FD represents a pipe, and buf contains data
    76  	// read from the pipe from previous calls to specialFileFD.savePipeData().
    77  	// haveBuf and buf are protected by bufMu.
    78  	bufMu   sync.Mutex `state:"nosave"`
    79  	haveBuf atomicbitops.Uint32
    80  	buf     []byte
    81  
    82  	// If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
    83  	// hostFileMapperInitOnce is used to initialize it on first use.
    84  	hostFileMapperInitOnce sync.Once `state:"nosave"`
    85  	hostFileMapper         fsutil.HostFileMapper
    86  
    87  	// If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
    88  	// fileRefs is protected by fileRefsMu.
    89  	fileRefsMu sync.Mutex `state:"nosave"`
    90  	fileRefs   fsutil.FrameRefSet
    91  }
    92  
    93  func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
    94  	ftype := d.fileType()
    95  	seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK
    96  	haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK || ftype == linux.S_IFCHR) && h.fd >= 0
    97  	fd := &specialFileFD{
    98  		handle:        h,
    99  		isRegularFile: ftype == linux.S_IFREG,
   100  		seekable:      seekable,
   101  		haveQueue:     haveQueue,
   102  	}
   103  	fd.LockFD.Init(&d.locks)
   104  	if haveQueue {
   105  		if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil {
   106  			return nil, err
   107  		}
   108  	}
   109  	if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
   110  		AllowDirectIO: true,
   111  		DenyPRead:     !seekable,
   112  		DenyPWrite:    !seekable,
   113  	}); err != nil {
   114  		if haveQueue {
   115  			fdnotifier.RemoveFD(h.fd)
   116  		}
   117  		return nil, err
   118  	}
   119  	d.fs.syncMu.Lock()
   120  	d.fs.specialFileFDs.PushBack(fd)
   121  	d.fs.syncMu.Unlock()
   122  	if fd.vfsfd.IsWritable() && (d.mode.Load()&0111 != 0) {
   123  		metric.SuspiciousOperationsMetric.Increment(&metric.SuspiciousOperationsTypeOpenedWriteExecuteFile)
   124  	}
   125  	if h.fd >= 0 {
   126  		fsmetric.GoferOpensHost.Increment()
   127  	} else {
   128  		fsmetric.GoferOpens9P.Increment()
   129  	}
   130  	return fd, nil
   131  }
   132  
   133  // Release implements vfs.FileDescriptionImpl.Release.
   134  func (fd *specialFileFD) Release(ctx context.Context) {
   135  	if fd.haveQueue {
   136  		fdnotifier.RemoveFD(fd.handle.fd)
   137  	}
   138  	fd.releaseMu.Lock()
   139  	fd.handle.close(ctx)
   140  	fd.releaseMu.Unlock()
   141  
   142  	fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
   143  	fs.syncMu.Lock()
   144  	fs.specialFileFDs.Remove(fd)
   145  	fs.syncMu.Unlock()
   146  }
   147  
   148  // OnClose implements vfs.FileDescriptionImpl.OnClose.
   149  func (fd *specialFileFD) OnClose(ctx context.Context) error {
   150  	if !fd.vfsfd.IsWritable() {
   151  		return nil
   152  	}
   153  	return flush(ctx, fd.handle.fdLisa)
   154  }
   155  
   156  // Readiness implements waiter.Waitable.Readiness.
   157  func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   158  	if fd.haveQueue {
   159  		return fdnotifier.NonBlockingPoll(fd.handle.fd, mask)
   160  	}
   161  	return fd.fileDescription.Readiness(mask)
   162  }
   163  
   164  // EventRegister implements waiter.Waitable.EventRegister.
   165  func (fd *specialFileFD) EventRegister(e *waiter.Entry) error {
   166  	if fd.haveQueue {
   167  		fd.queue.EventRegister(e)
   168  		if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil {
   169  			fd.queue.EventUnregister(e)
   170  			return err
   171  		}
   172  		return nil
   173  	}
   174  	return fd.fileDescription.EventRegister(e)
   175  }
   176  
   177  // EventUnregister implements waiter.Waitable.EventUnregister.
   178  func (fd *specialFileFD) EventUnregister(e *waiter.Entry) {
   179  	if fd.haveQueue {
   180  		fd.queue.EventUnregister(e)
   181  		if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil {
   182  			panic(fmt.Sprint("UpdateFD:", err))
   183  		}
   184  		return
   185  	}
   186  	fd.fileDescription.EventUnregister(e)
   187  }
   188  
   189  // Epollable implements FileDescriptionImpl.Epollable.
   190  func (fd *specialFileFD) Epollable() bool {
   191  	if fd.haveQueue {
   192  		return true
   193  	}
   194  	return fd.fileDescription.Epollable()
   195  }
   196  
   197  func (fd *specialFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   198  	if fd.isRegularFile {
   199  		d := fd.dentry()
   200  		return d.doAllocate(ctx, offset, length, func() error {
   201  			return fd.handle.allocate(ctx, mode, offset, length)
   202  		})
   203  	}
   204  	return fd.FileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
   205  }
   206  
   207  // PRead implements vfs.FileDescriptionImpl.PRead.
   208  func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   209  	start := fsmetric.StartReadWait()
   210  	defer func() {
   211  		if fd.handle.fd >= 0 {
   212  			fsmetric.GoferReadsHost.Increment()
   213  			fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start)
   214  		} else {
   215  			fsmetric.GoferReads9P.Increment()
   216  			fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start)
   217  		}
   218  	}()
   219  
   220  	if fd.seekable && offset < 0 {
   221  		return 0, linuxerr.EINVAL
   222  	}
   223  
   224  	// Check that flags are supported.
   225  	//
   226  	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
   227  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   228  		return 0, linuxerr.EOPNOTSUPP
   229  	}
   230  
   231  	if d := fd.dentry(); d.cachedMetadataAuthoritative() {
   232  		d.touchAtime(fd.vfsfd.Mount())
   233  	}
   234  
   235  	bufN := int64(0)
   236  	if fd.haveBuf.Load() != 0 {
   237  		var err error
   238  		fd.bufMu.Lock()
   239  		if len(fd.buf) != 0 {
   240  			var n int
   241  			n, err = dst.CopyOut(ctx, fd.buf)
   242  			dst = dst.DropFirst(n)
   243  			fd.buf = fd.buf[n:]
   244  			if len(fd.buf) == 0 {
   245  				fd.haveBuf.Store(0)
   246  				fd.buf = nil
   247  			}
   248  			bufN = int64(n)
   249  			if offset >= 0 {
   250  				offset += bufN
   251  			}
   252  		}
   253  		fd.bufMu.Unlock()
   254  		if err != nil {
   255  			return bufN, err
   256  		}
   257  	}
   258  
   259  	rw := getHandleReadWriter(ctx, &fd.handle, offset)
   260  	n, err := dst.CopyOutFrom(ctx, rw)
   261  	putHandleReadWriter(rw)
   262  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   263  		err = linuxerr.ErrWouldBlock
   264  	}
   265  	return bufN + n, err
   266  }
   267  
   268  // Read implements vfs.FileDescriptionImpl.Read.
   269  func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   270  	if !fd.seekable {
   271  		return fd.PRead(ctx, dst, -1, opts)
   272  	}
   273  
   274  	fd.mu.Lock()
   275  	n, err := fd.PRead(ctx, dst, fd.off, opts)
   276  	fd.off += n
   277  	fd.mu.Unlock()
   278  	return n, err
   279  }
   280  
   281  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   282  func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   283  	n, _, err := fd.pwrite(ctx, src, offset, opts)
   284  	return n, err
   285  }
   286  
   287  // pwrite returns the number of bytes written, final offset, error. The final
   288  // offset should be ignored by PWrite.
   289  func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
   290  	if fd.seekable && offset < 0 {
   291  		return 0, offset, linuxerr.EINVAL
   292  	}
   293  
   294  	// Check that flags are supported.
   295  	//
   296  	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
   297  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   298  		return 0, offset, linuxerr.EOPNOTSUPP
   299  	}
   300  
   301  	d := fd.dentry()
   302  	if fd.isRegularFile {
   303  		// If the regular file fd was opened with O_APPEND, make sure the file
   304  		// size is updated. There is a possible race here if size is modified
   305  		// externally after metadata cache is updated.
   306  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
   307  			if err := d.updateMetadata(ctx); err != nil {
   308  				return 0, offset, err
   309  			}
   310  		}
   311  
   312  		// We need to hold the metadataMu *while* writing to a regular file.
   313  		d.metadataMu.Lock()
   314  		defer d.metadataMu.Unlock()
   315  
   316  		// Set offset to file size if the regular file was opened with O_APPEND.
   317  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
   318  			// Holding d.metadataMu is sufficient for reading d.size.
   319  			offset = int64(d.size.RacyLoad())
   320  		}
   321  		limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
   322  		if err != nil {
   323  			return 0, offset, err
   324  		}
   325  		src = src.TakeFirst64(limit)
   326  	}
   327  
   328  	if d.cachedMetadataAuthoritative() {
   329  		if fd.isRegularFile {
   330  			d.touchCMtimeLocked()
   331  		} else {
   332  			d.touchCMtime()
   333  		}
   334  	}
   335  
   336  	// handleReadWriter always writes to the remote file. So O_DIRECT is
   337  	// effectively always set. Invalidate pages in d.mappings that have been
   338  	// written to.
   339  	pgstart := hostarch.PageRoundDown(uint64(offset))
   340  	pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes()))
   341  	if !ok {
   342  		return 0, offset, linuxerr.EINVAL
   343  	}
   344  	mr := memmap.MappableRange{pgstart, pgend}
   345  	d.mapsMu.Lock()
   346  	d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
   347  	d.mapsMu.Unlock()
   348  
   349  	rw := getHandleReadWriter(ctx, &fd.handle, offset)
   350  	n, err := src.CopyInTo(ctx, rw)
   351  	putHandleReadWriter(rw)
   352  	if n > 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
   353  		// Note that if syncing the remote file fails, then we can't guarantee that
   354  		// any data was actually written with the semantics of O_DSYNC or
   355  		// O_SYNC, so we return zero bytes written. Compare Linux's
   356  		// mm/filemap.c:generic_file_write_iter() =>
   357  		// include/linux/fs.h:generic_write_sync().
   358  		if err := fd.sync(ctx, false /* forFilesystemSync */); err != nil {
   359  			return 0, offset, err
   360  		}
   361  	}
   362  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   363  		err = linuxerr.ErrWouldBlock
   364  	}
   365  	// Update offset if the offset is valid.
   366  	if offset >= 0 {
   367  		offset += n
   368  	}
   369  	// Update file size for regular files.
   370  	if fd.isRegularFile {
   371  		// d.metadataMu is already locked at this point.
   372  		if uint64(offset) > d.size.RacyLoad() {
   373  			d.dataMu.Lock()
   374  			defer d.dataMu.Unlock()
   375  			d.size.Store(uint64(offset))
   376  		}
   377  	}
   378  	return int64(n), offset, err
   379  }
   380  
   381  // Write implements vfs.FileDescriptionImpl.Write.
   382  func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   383  	if !fd.seekable {
   384  		return fd.PWrite(ctx, src, -1, opts)
   385  	}
   386  
   387  	fd.mu.Lock()
   388  	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
   389  	fd.off = off
   390  	fd.mu.Unlock()
   391  	return n, err
   392  }
   393  
   394  // Seek implements vfs.FileDescriptionImpl.Seek.
   395  func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   396  	if !fd.seekable {
   397  		return 0, linuxerr.ESPIPE
   398  	}
   399  	fd.mu.Lock()
   400  	defer fd.mu.Unlock()
   401  	newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence)
   402  	if err != nil {
   403  		return 0, err
   404  	}
   405  	fd.off = newOffset
   406  	return newOffset, nil
   407  }
   408  
   409  // Sync implements vfs.FileDescriptionImpl.Sync.
   410  func (fd *specialFileFD) Sync(ctx context.Context) error {
   411  	return fd.sync(ctx, false /* forFilesystemSync */)
   412  }
   413  
   414  func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error {
   415  	// Locks to ensure it didn't race with fd.Release().
   416  	fd.releaseMu.RLock()
   417  	defer fd.releaseMu.RUnlock()
   418  
   419  	if err := fd.handle.sync(ctx); err != nil {
   420  		if !forFilesystemSync {
   421  			return err
   422  		}
   423  		// Only return err if we can reasonably have expected sync to succeed
   424  		// (fd represents a regular file that was opened for writing).
   425  		if fd.isRegularFile && fd.vfsfd.IsWritable() {
   426  			return err
   427  		}
   428  		ctx.Debugf("gofer.specialFileFD.sync: syncing non-writable or non-regular-file FD failed: %v", err)
   429  	}
   430  	return nil
   431  }
   432  
   433  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   434  func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   435  	if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
   436  		return linuxerr.ENODEV
   437  	}
   438  	// After this point, fd may be used as a memmap.Mappable and memmap.File.
   439  	fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
   440  	return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
   441  }
   442  
   443  // AddMapping implements memmap.Mappable.AddMapping.
   444  func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   445  	d := fd.dentry()
   446  	d.mapsMu.Lock()
   447  	defer d.mapsMu.Unlock()
   448  	d.mappings.AddMapping(ms, ar, offset, writable)
   449  	fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
   450  	return nil
   451  }
   452  
   453  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   454  func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   455  	d := fd.dentry()
   456  	d.mapsMu.Lock()
   457  	defer d.mapsMu.Unlock()
   458  	d.mappings.RemoveMapping(ms, ar, offset, writable)
   459  	fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
   460  }
   461  
   462  // CopyMapping implements memmap.Mappable.CopyMapping.
   463  func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   464  	return fd.AddMapping(ctx, ms, dstAR, offset, writable)
   465  }
   466  
   467  // Translate implements memmap.Mappable.Translate.
   468  func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   469  	mr := optional
   470  	if fd.filesystem().opts.limitHostFDTranslation {
   471  		mr = maxFillRange(required, optional)
   472  	}
   473  	return []memmap.Translation{
   474  		{
   475  			Source: mr,
   476  			File:   fd,
   477  			Offset: mr.Start,
   478  			Perms:  hostarch.AnyAccess,
   479  		},
   480  	}, nil
   481  }
   482  
   483  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   484  func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
   485  	return nil
   486  }
   487  
   488  // IncRef implements memmap.File.IncRef.
   489  func (fd *specialFileFD) IncRef(fr memmap.FileRange, memCgID uint32) {
   490  	fd.fileRefsMu.Lock()
   491  	defer fd.fileRefsMu.Unlock()
   492  	fd.fileRefs.IncRefAndAccount(fr, memCgID)
   493  }
   494  
   495  // DecRef implements memmap.File.DecRef.
   496  func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
   497  	fd.fileRefsMu.Lock()
   498  	defer fd.fileRefsMu.Unlock()
   499  	fd.fileRefs.DecRefAndAccount(fr)
   500  }
   501  
   502  // MapInternal implements memmap.File.MapInternal.
   503  func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
   504  	fd.requireHostFD()
   505  	return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
   506  }
   507  
   508  // FD implements memmap.File.FD.
   509  func (fd *specialFileFD) FD() int {
   510  	fd.requireHostFD()
   511  	return int(fd.handle.fd)
   512  }
   513  
   514  func (fd *specialFileFD) requireHostFD() {
   515  	if fd.handle.fd < 0 {
   516  		// This is possible if fd was successfully mmapped before saving, then
   517  		// was restored without a host FD. This is unrecoverable: without a
   518  		// host FD, we can't mmap this file post-restore.
   519  		panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
   520  	}
   521  }
   522  
   523  func (fd *specialFileFD) updateMetadata(ctx context.Context) error {
   524  	d := fd.dentry()
   525  	d.metadataMu.Lock()
   526  	defer d.metadataMu.Unlock()
   527  	return d.updateMetadataLocked(ctx, fd.handle)
   528  }