github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/gofer/special_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/nicocha30/gvisor-ligolo/pkg/abi/linux"
    21  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    22  	"github.com/nicocha30/gvisor-ligolo/pkg/context"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/fdnotifier"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/metric"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/safemem"
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsmetric"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsutil"
    30  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap"
    31  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs"
    32  	"github.com/nicocha30/gvisor-ligolo/pkg/sync"
    33  	"github.com/nicocha30/gvisor-ligolo/pkg/usermem"
    34  	"github.com/nicocha30/gvisor-ligolo/pkg/waiter"
    35  )
    36  
    37  // specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
    38  // special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
    39  // in effect) regular files. specialFileFD differs from regularFileFD by using
    40  // per-FD handles instead of shared per-dentry handles, and never buffering I/O.
    41  //
    42  // +stateify savable
    43  type specialFileFD struct {
    44  	fileDescription
    45  	specialFDEntry
    46  
    47  	// releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe
    48  	// to access fd.handle without locking for operations that require a ref to
    49  	// be held by the caller, e.g. vfs.FileDescriptionImpl implementations.
    50  	releaseMu sync.RWMutex `state:"nosave"`
    51  
    52  	// handle is used for file I/O. handle is immutable.
    53  	handle handle `state:"nosave"`
    54  
    55  	// isRegularFile is true if this FD represents a regular file which is only
    56  	// possible when filesystemOptions.regularFilesUseSpecialFileFD is in
    57  	// effect. isRegularFile is immutable.
    58  	isRegularFile bool
    59  
    60  	// seekable is true if this file description represents a file for which
    61  	// file offset is significant, i.e. a regular file, character device or
    62  	// block device. seekable is immutable.
    63  	seekable bool
    64  
    65  	// haveQueue is true if this file description represents a file for which
    66  	// queue may send I/O readiness events. haveQueue is immutable.
    67  	haveQueue bool `state:"nosave"`
    68  	queue     waiter.Queue
    69  
    70  	// If seekable is true, off is the file offset. off is protected by mu.
    71  	mu  sync.Mutex `state:"nosave"`
    72  	off int64
    73  
    74  	// If haveBuf is non-zero, this FD represents a pipe, and buf contains data
    75  	// read from the pipe from previous calls to specialFileFD.savePipeData().
    76  	// haveBuf and buf are protected by bufMu.
    77  	bufMu   sync.Mutex `state:"nosave"`
    78  	haveBuf atomicbitops.Uint32
    79  	buf     []byte
    80  
    81  	// If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and
    82  	// hostFileMapperInitOnce is used to initialize it on first use.
    83  	hostFileMapperInitOnce sync.Once `state:"nosave"`
    84  	hostFileMapper         fsutil.HostFileMapper
    85  
    86  	// If handle.fd >= 0, fileRefs counts references on memmap.File offsets.
    87  	// fileRefs is protected by fileRefsMu.
    88  	fileRefsMu sync.Mutex `state:"nosave"`
    89  	fileRefs   fsutil.FrameRefSet
    90  }
    91  
    92  func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) {
    93  	ftype := d.fileType()
    94  	seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK
    95  	haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK || ftype == linux.S_IFCHR) && h.fd >= 0
    96  	fd := &specialFileFD{
    97  		handle:        h,
    98  		isRegularFile: ftype == linux.S_IFREG,
    99  		seekable:      seekable,
   100  		haveQueue:     haveQueue,
   101  	}
   102  	fd.LockFD.Init(&d.locks)
   103  	if haveQueue {
   104  		if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil {
   105  			return nil, err
   106  		}
   107  	}
   108  	if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
   109  		AllowDirectIO: true,
   110  		DenyPRead:     !seekable,
   111  		DenyPWrite:    !seekable,
   112  	}); err != nil {
   113  		if haveQueue {
   114  			fdnotifier.RemoveFD(h.fd)
   115  		}
   116  		return nil, err
   117  	}
   118  	d.fs.syncMu.Lock()
   119  	d.fs.specialFileFDs.PushBack(fd)
   120  	d.fs.syncMu.Unlock()
   121  	if fd.vfsfd.IsWritable() && (d.mode.Load()&0111 != 0) {
   122  		metric.SuspiciousOperationsMetric.Increment(&metric.SuspiciousOperationsTypeOpenedWriteExecuteFile)
   123  	}
   124  	if h.fd >= 0 {
   125  		fsmetric.GoferOpensHost.Increment()
   126  	} else {
   127  		fsmetric.GoferOpens9P.Increment()
   128  	}
   129  	return fd, nil
   130  }
   131  
   132  // Release implements vfs.FileDescriptionImpl.Release.
   133  func (fd *specialFileFD) Release(ctx context.Context) {
   134  	if fd.haveQueue {
   135  		fdnotifier.RemoveFD(fd.handle.fd)
   136  	}
   137  	fd.releaseMu.Lock()
   138  	fd.handle.close(ctx)
   139  	fd.releaseMu.Unlock()
   140  
   141  	fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem)
   142  	fs.syncMu.Lock()
   143  	fs.specialFileFDs.Remove(fd)
   144  	fs.syncMu.Unlock()
   145  }
   146  
   147  // OnClose implements vfs.FileDescriptionImpl.OnClose.
   148  func (fd *specialFileFD) OnClose(ctx context.Context) error {
   149  	if !fd.vfsfd.IsWritable() {
   150  		return nil
   151  	}
   152  	return flush(ctx, fd.handle.fdLisa)
   153  }
   154  
   155  // Readiness implements waiter.Waitable.Readiness.
   156  func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   157  	if fd.haveQueue {
   158  		return fdnotifier.NonBlockingPoll(fd.handle.fd, mask)
   159  	}
   160  	return fd.fileDescription.Readiness(mask)
   161  }
   162  
   163  // EventRegister implements waiter.Waitable.EventRegister.
   164  func (fd *specialFileFD) EventRegister(e *waiter.Entry) error {
   165  	if fd.haveQueue {
   166  		fd.queue.EventRegister(e)
   167  		if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil {
   168  			fd.queue.EventUnregister(e)
   169  			return err
   170  		}
   171  		return nil
   172  	}
   173  	return fd.fileDescription.EventRegister(e)
   174  }
   175  
   176  // EventUnregister implements waiter.Waitable.EventUnregister.
   177  func (fd *specialFileFD) EventUnregister(e *waiter.Entry) {
   178  	if fd.haveQueue {
   179  		fd.queue.EventUnregister(e)
   180  		if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil {
   181  			panic(fmt.Sprint("UpdateFD:", err))
   182  		}
   183  		return
   184  	}
   185  	fd.fileDescription.EventUnregister(e)
   186  }
   187  
   188  // Epollable implements FileDescriptionImpl.Epollable.
   189  func (fd *specialFileFD) Epollable() bool {
   190  	if fd.haveQueue {
   191  		return true
   192  	}
   193  	return fd.fileDescription.Epollable()
   194  }
   195  
   196  func (fd *specialFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   197  	if fd.isRegularFile {
   198  		d := fd.dentry()
   199  		return d.doAllocate(ctx, offset, length, func() error {
   200  			return fd.handle.allocate(ctx, mode, offset, length)
   201  		})
   202  	}
   203  	return fd.FileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
   204  }
   205  
   206  // PRead implements vfs.FileDescriptionImpl.PRead.
   207  func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
   208  	start := fsmetric.StartReadWait()
   209  	defer func() {
   210  		if fd.handle.fd >= 0 {
   211  			fsmetric.GoferReadsHost.Increment()
   212  			fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start)
   213  		} else {
   214  			fsmetric.GoferReads9P.Increment()
   215  			fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start)
   216  		}
   217  	}()
   218  
   219  	if fd.seekable && offset < 0 {
   220  		return 0, linuxerr.EINVAL
   221  	}
   222  
   223  	// Check that flags are supported.
   224  	//
   225  	// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
   226  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   227  		return 0, linuxerr.EOPNOTSUPP
   228  	}
   229  
   230  	if d := fd.dentry(); d.cachedMetadataAuthoritative() {
   231  		d.touchAtime(fd.vfsfd.Mount())
   232  	}
   233  
   234  	bufN := int64(0)
   235  	if fd.haveBuf.Load() != 0 {
   236  		var err error
   237  		fd.bufMu.Lock()
   238  		if len(fd.buf) != 0 {
   239  			var n int
   240  			n, err = dst.CopyOut(ctx, fd.buf)
   241  			dst = dst.DropFirst(n)
   242  			fd.buf = fd.buf[n:]
   243  			if len(fd.buf) == 0 {
   244  				fd.haveBuf.Store(0)
   245  				fd.buf = nil
   246  			}
   247  			bufN = int64(n)
   248  			if offset >= 0 {
   249  				offset += bufN
   250  			}
   251  		}
   252  		fd.bufMu.Unlock()
   253  		if err != nil {
   254  			return bufN, err
   255  		}
   256  	}
   257  
   258  	rw := getHandleReadWriter(ctx, &fd.handle, offset)
   259  	n, err := dst.CopyOutFrom(ctx, rw)
   260  	putHandleReadWriter(rw)
   261  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   262  		err = linuxerr.ErrWouldBlock
   263  	}
   264  	return bufN + n, err
   265  }
   266  
   267  // Read implements vfs.FileDescriptionImpl.Read.
   268  func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
   269  	if !fd.seekable {
   270  		return fd.PRead(ctx, dst, -1, opts)
   271  	}
   272  
   273  	fd.mu.Lock()
   274  	n, err := fd.PRead(ctx, dst, fd.off, opts)
   275  	fd.off += n
   276  	fd.mu.Unlock()
   277  	return n, err
   278  }
   279  
   280  // PWrite implements vfs.FileDescriptionImpl.PWrite.
   281  func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
   282  	n, _, err := fd.pwrite(ctx, src, offset, opts)
   283  	return n, err
   284  }
   285  
   286  // pwrite returns the number of bytes written, final offset, error. The final
   287  // offset should be ignored by PWrite.
   288  func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
   289  	if fd.seekable && offset < 0 {
   290  		return 0, offset, linuxerr.EINVAL
   291  	}
   292  
   293  	// Check that flags are supported.
   294  	//
   295  	// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
   296  	if opts.Flags&^linux.RWF_HIPRI != 0 {
   297  		return 0, offset, linuxerr.EOPNOTSUPP
   298  	}
   299  
   300  	d := fd.dentry()
   301  	if fd.isRegularFile {
   302  		// If the regular file fd was opened with O_APPEND, make sure the file
   303  		// size is updated. There is a possible race here if size is modified
   304  		// externally after metadata cache is updated.
   305  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
   306  			if err := d.updateMetadata(ctx); err != nil {
   307  				return 0, offset, err
   308  			}
   309  		}
   310  
   311  		// We need to hold the metadataMu *while* writing to a regular file.
   312  		d.metadataMu.Lock()
   313  		defer d.metadataMu.Unlock()
   314  
   315  		// Set offset to file size if the regular file was opened with O_APPEND.
   316  		if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
   317  			// Holding d.metadataMu is sufficient for reading d.size.
   318  			offset = int64(d.size.RacyLoad())
   319  		}
   320  		limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
   321  		if err != nil {
   322  			return 0, offset, err
   323  		}
   324  		src = src.TakeFirst64(limit)
   325  	}
   326  
   327  	if d.cachedMetadataAuthoritative() {
   328  		if fd.isRegularFile {
   329  			d.touchCMtimeLocked()
   330  		} else {
   331  			d.touchCMtime()
   332  		}
   333  	}
   334  
   335  	// handleReadWriter always writes to the remote file. So O_DIRECT is
   336  	// effectively always set. Invalidate pages in d.mappings that have been
   337  	// written to.
   338  	pgstart := hostarch.PageRoundDown(uint64(offset))
   339  	pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes()))
   340  	if !ok {
   341  		return 0, offset, linuxerr.EINVAL
   342  	}
   343  	mr := memmap.MappableRange{pgstart, pgend}
   344  	d.mapsMu.Lock()
   345  	d.mappings.Invalidate(mr, memmap.InvalidateOpts{})
   346  	d.mapsMu.Unlock()
   347  
   348  	rw := getHandleReadWriter(ctx, &fd.handle, offset)
   349  	n, err := src.CopyInTo(ctx, rw)
   350  	putHandleReadWriter(rw)
   351  	if n > 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 {
   352  		// Note that if syncing the remote file fails, then we can't guarantee that
   353  		// any data was actually written with the semantics of O_DSYNC or
   354  		// O_SYNC, so we return zero bytes written. Compare Linux's
   355  		// mm/filemap.c:generic_file_write_iter() =>
   356  		// include/linux/fs.h:generic_write_sync().
   357  		if err := fd.sync(ctx, false /* forFilesystemSync */); err != nil {
   358  			return 0, offset, err
   359  		}
   360  	}
   361  	if linuxerr.Equals(linuxerr.EAGAIN, err) {
   362  		err = linuxerr.ErrWouldBlock
   363  	}
   364  	// Update offset if the offset is valid.
   365  	if offset >= 0 {
   366  		offset += n
   367  	}
   368  	// Update file size for regular files.
   369  	if fd.isRegularFile {
   370  		// d.metadataMu is already locked at this point.
   371  		if uint64(offset) > d.size.RacyLoad() {
   372  			d.dataMu.Lock()
   373  			defer d.dataMu.Unlock()
   374  			d.size.Store(uint64(offset))
   375  		}
   376  	}
   377  	return int64(n), offset, err
   378  }
   379  
   380  // Write implements vfs.FileDescriptionImpl.Write.
   381  func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
   382  	if !fd.seekable {
   383  		return fd.PWrite(ctx, src, -1, opts)
   384  	}
   385  
   386  	fd.mu.Lock()
   387  	n, off, err := fd.pwrite(ctx, src, fd.off, opts)
   388  	fd.off = off
   389  	fd.mu.Unlock()
   390  	return n, err
   391  }
   392  
   393  // Seek implements vfs.FileDescriptionImpl.Seek.
   394  func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   395  	if !fd.seekable {
   396  		return 0, linuxerr.ESPIPE
   397  	}
   398  	fd.mu.Lock()
   399  	defer fd.mu.Unlock()
   400  	newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence)
   401  	if err != nil {
   402  		return 0, err
   403  	}
   404  	fd.off = newOffset
   405  	return newOffset, nil
   406  }
   407  
   408  // Sync implements vfs.FileDescriptionImpl.Sync.
   409  func (fd *specialFileFD) Sync(ctx context.Context) error {
   410  	return fd.sync(ctx, false /* forFilesystemSync */)
   411  }
   412  
   413  func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error {
   414  	// Locks to ensure it didn't race with fd.Release().
   415  	fd.releaseMu.RLock()
   416  	defer fd.releaseMu.RUnlock()
   417  
   418  	if err := fd.handle.sync(ctx); err != nil {
   419  		if !forFilesystemSync {
   420  			return err
   421  		}
   422  		// Only return err if we can reasonably have expected sync to succeed
   423  		// (fd represents a regular file that was opened for writing).
   424  		if fd.isRegularFile && fd.vfsfd.IsWritable() {
   425  			return err
   426  		}
   427  		ctx.Debugf("gofer.specialFileFD.sync: syncing non-writable or non-regular-file FD failed: %v", err)
   428  	}
   429  	return nil
   430  }
   431  
   432  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   433  func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   434  	if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache {
   435  		return linuxerr.ENODEV
   436  	}
   437  	// After this point, fd may be used as a memmap.Mappable and memmap.File.
   438  	fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init)
   439  	return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts)
   440  }
   441  
   442  // AddMapping implements memmap.Mappable.AddMapping.
   443  func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   444  	d := fd.dentry()
   445  	d.mapsMu.Lock()
   446  	defer d.mapsMu.Unlock()
   447  	d.mappings.AddMapping(ms, ar, offset, writable)
   448  	fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
   449  	return nil
   450  }
   451  
   452  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   453  func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   454  	d := fd.dentry()
   455  	d.mapsMu.Lock()
   456  	defer d.mapsMu.Unlock()
   457  	d.mappings.RemoveMapping(ms, ar, offset, writable)
   458  	fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())})
   459  }
   460  
   461  // CopyMapping implements memmap.Mappable.CopyMapping.
   462  func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   463  	return fd.AddMapping(ctx, ms, dstAR, offset, writable)
   464  }
   465  
   466  // Translate implements memmap.Mappable.Translate.
   467  func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   468  	mr := optional
   469  	if fd.filesystem().opts.limitHostFDTranslation {
   470  		mr = maxFillRange(required, optional)
   471  	}
   472  	return []memmap.Translation{
   473  		{
   474  			Source: mr,
   475  			File:   fd,
   476  			Offset: mr.Start,
   477  			Perms:  hostarch.AnyAccess,
   478  		},
   479  	}, nil
   480  }
   481  
   482  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   483  func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error {
   484  	return nil
   485  }
   486  
   487  // IncRef implements memmap.File.IncRef.
   488  func (fd *specialFileFD) IncRef(fr memmap.FileRange, memCgID uint32) {
   489  	fd.fileRefsMu.Lock()
   490  	defer fd.fileRefsMu.Unlock()
   491  	fd.fileRefs.IncRefAndAccount(fr, memCgID)
   492  }
   493  
   494  // DecRef implements memmap.File.DecRef.
   495  func (fd *specialFileFD) DecRef(fr memmap.FileRange) {
   496  	fd.fileRefsMu.Lock()
   497  	defer fd.fileRefsMu.Unlock()
   498  	fd.fileRefs.DecRefAndAccount(fr)
   499  }
   500  
   501  // MapInternal implements memmap.File.MapInternal.
   502  func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) {
   503  	fd.requireHostFD()
   504  	return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write)
   505  }
   506  
   507  // FD implements memmap.File.FD.
   508  func (fd *specialFileFD) FD() int {
   509  	fd.requireHostFD()
   510  	return int(fd.handle.fd)
   511  }
   512  
   513  func (fd *specialFileFD) requireHostFD() {
   514  	if fd.handle.fd < 0 {
   515  		// This is possible if fd was successfully mmapped before saving, then
   516  		// was restored without a host FD. This is unrecoverable: without a
   517  		// host FD, we can't mmap this file post-restore.
   518  		panic("gofer.specialFileFD can no longer be memory-mapped without a host FD")
   519  	}
   520  }
   521  
   522  func (fd *specialFileFD) updateMetadata(ctx context.Context) error {
   523  	d := fd.dentry()
   524  	d.metadataMu.Lock()
   525  	defer d.metadataMu.Unlock()
   526  	return d.updateMetadataLocked(ctx, fd.handle)
   527  }