github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/file.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  	"time"
    20  
    21  	"golang.org/x/sys/unix"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/log"
    24  	"github.com/SagerNet/gvisor/pkg/metric"
    25  	"github.com/SagerNet/gvisor/pkg/p9"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/device"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fsmetric"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    31  	"github.com/SagerNet/gvisor/pkg/syserror"
    32  	"github.com/SagerNet/gvisor/pkg/usermem"
    33  	"github.com/SagerNet/gvisor/pkg/waiter"
    34  )
    35  
    36  // fileOperations implements fs.FileOperations for a remote file system.
    37  //
    38  // +stateify savable
    39  type fileOperations struct {
    40  	fsutil.FileNoIoctl  `state:"nosave"`
    41  	fsutil.FileNoSplice `state:"nosplice"`
    42  	waiter.AlwaysReady  `state:"nosave"`
    43  
    44  	// inodeOperations is the inodeOperations backing the file. It is protected
    45  	// by a reference held by File.Dirent.Inode which is stable until
    46  	// FileOperations.Release is called.
    47  	inodeOperations *inodeOperations `state:"wait"`
    48  
    49  	// dirCursor is the directory cursor.
    50  	dirCursor string
    51  
    52  	// handles are the opened remote file system handles, which may
    53  	// be shared with other files.
    54  	handles *handles `state:"nosave"`
    55  
    56  	// flags are the flags used to open handles.
    57  	flags fs.FileFlags `state:"wait"`
    58  }
    59  
    60  // fileOperations implements fs.FileOperations.
    61  var _ fs.FileOperations = (*fileOperations)(nil)
    62  
    63  // NewFile returns a file. NewFile is not appropriate with host pipes and sockets.
    64  //
    65  // The `name` argument is only used to log a warning if we are returning a
    66  // writeable+executable file. (A metric counter is incremented in this case as
    67  // well.) Note that we cannot call d.BaseName() directly in this function,
    68  // because that would lead to a lock order violation, since this is called in
    69  // d.Create which holds d.mu, while d.BaseName() takes d.parent.mu, and the two
    70  // locks must be taken in the opposite order.
    71  func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileFlags, i *inodeOperations, handles *handles) *fs.File {
    72  	// Remote file systems enforce readability/writability at an offset,
    73  	// see fs/9p/vfs_inode.c:v9fs_vfs_atomic_open -> fs/open.c:finish_open.
    74  	flags.Pread = true
    75  	flags.Pwrite = true
    76  
    77  	if fs.IsFile(dirent.Inode.StableAttr) {
    78  		// If cache policy is "remote revalidating", then we must
    79  		// ensure that we have a host FD. Otherwise, the
    80  		// sentry-internal page cache will be used, and we can end up
    81  		// in an inconsistent state if the remote file changes.
    82  		cp := dirent.Inode.InodeOperations.(*inodeOperations).session().cachePolicy
    83  		if cp == cacheRemoteRevalidating && handles.Host == nil {
    84  			panic(fmt.Sprintf("remote-revalidating cache policy requires gofer to donate host FD, but file %q did not have host FD", name))
    85  		}
    86  	}
    87  
    88  	f := &fileOperations{
    89  		inodeOperations: i,
    90  		handles:         handles,
    91  		flags:           flags,
    92  	}
    93  	if flags.Write {
    94  		if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Execute: true}); err == nil {
    95  			metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file")
    96  			log.Warningf("Opened a writable executable: %q", name)
    97  		}
    98  	}
    99  	if handles.Host != nil {
   100  		fsmetric.GoferOpensHost.Increment()
   101  	} else {
   102  		fsmetric.GoferOpens9P.Increment()
   103  	}
   104  	return fs.NewFile(ctx, dirent, flags, f)
   105  }
   106  
   107  // Release implements fs.FileOpeations.Release.
   108  func (f *fileOperations) Release(context.Context) {
   109  	f.handles.DecRef()
   110  }
   111  
   112  // Readdir implements fs.FileOperations.Readdir.
   113  func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) {
   114  	root := fs.RootFromContext(ctx)
   115  	if root != nil {
   116  		defer root.DecRef(ctx)
   117  	}
   118  
   119  	dirCtx := &fs.DirCtx{
   120  		Serializer: serializer,
   121  		DirCursor:  &f.dirCursor,
   122  	}
   123  	n, err := fs.DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset())
   124  	if f.inodeOperations.session().cachePolicy.cacheUAttrs(file.Dirent.Inode) {
   125  		f.inodeOperations.cachingInodeOps.TouchAccessTime(ctx, file.Dirent.Inode)
   126  	}
   127  	return n, err
   128  }
   129  
   130  // IterateDir implements fs.DirIterator.IterateDir.
   131  func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) {
   132  	f.inodeOperations.readdirMu.Lock()
   133  	defer f.inodeOperations.readdirMu.Unlock()
   134  
   135  	// Fetch directory entries if needed.
   136  	if !f.inodeOperations.session().cachePolicy.cacheReaddir() || f.inodeOperations.readdirCache == nil {
   137  		entries, err := f.readdirAll(ctx)
   138  		if err != nil {
   139  			return offset, err
   140  		}
   141  
   142  		// Cache the readdir result.
   143  		f.inodeOperations.readdirCache = fs.NewSortedDentryMap(entries)
   144  	}
   145  
   146  	// Serialize the entries.
   147  	n, err := fs.GenericReaddir(dirCtx, f.inodeOperations.readdirCache)
   148  	return offset + n, err
   149  }
   150  
   151  // readdirAll fetches fs.DentAttrs for f, using the attributes of g.
   152  func (f *fileOperations) readdirAll(ctx context.Context) (map[string]fs.DentAttr, error) {
   153  	entries := make(map[string]fs.DentAttr)
   154  	var readOffset uint64
   155  	for {
   156  		// We choose some arbitrary high number of directory entries (64k) and call
   157  		// Readdir until we've exhausted them all.
   158  		dirents, err := f.handles.File.readdir(ctx, readOffset, 64*1024)
   159  		if err != nil {
   160  			return nil, err
   161  		}
   162  		if len(dirents) == 0 {
   163  			// We're done, we reached EOF.
   164  			break
   165  		}
   166  
   167  		// The last dirent contains the offset into the next set of dirents.  The gofer
   168  		// returns the offset as an index into directories, not as a byte offset, because
   169  		// converting a byte offset to an index into directories entries is a huge pain.
   170  		// But everything is fine if we're consistent.
   171  		readOffset = dirents[len(dirents)-1].Offset
   172  
   173  		for _, dirent := range dirents {
   174  			if dirent.Name == "." || dirent.Name == ".." {
   175  				// These must not be included in Readdir results.
   176  				continue
   177  			}
   178  
   179  			// Find a best approximation of the type.
   180  			var nt fs.InodeType
   181  			switch dirent.Type {
   182  			case p9.TypeDir:
   183  				nt = fs.Directory
   184  			case p9.TypeSymlink:
   185  				nt = fs.Symlink
   186  			default:
   187  				nt = fs.RegularFile
   188  			}
   189  
   190  			// Install the DentAttr.
   191  			entries[dirent.Name] = fs.DentAttr{
   192  				Type: nt,
   193  				// Construct the key to find the virtual inode.
   194  				// Directory entries reside on the same Device
   195  				// and SecondaryDevice as their parent.
   196  				InodeID: goferDevice.Map(device.MultiDeviceKey{
   197  					Device:          f.inodeOperations.fileState.key.Device,
   198  					SecondaryDevice: f.inodeOperations.fileState.key.SecondaryDevice,
   199  					Inode:           dirent.QID.Path,
   200  				}),
   201  			}
   202  		}
   203  	}
   204  
   205  	return entries, nil
   206  }
   207  
   208  // maybeSync will call FSync on the file if the file flags require it.
   209  func (f *fileOperations) maybeSync(ctx context.Context, file *fs.File, offset, n int64) error {
   210  	flags := file.Flags()
   211  	var syncType fs.SyncType
   212  	switch {
   213  	case flags.Direct || flags.Sync:
   214  		syncType = fs.SyncAll
   215  	case flags.DSync:
   216  		syncType = fs.SyncData
   217  	default:
   218  		// No need to sync.
   219  		return nil
   220  	}
   221  
   222  	return f.Fsync(ctx, file, offset, offset+n, syncType)
   223  }
   224  
   225  // Write implements fs.FileOperations.Write.
   226  func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   227  	if fs.IsDir(file.Dirent.Inode.StableAttr) {
   228  		// Not all remote file systems enforce this so this client does.
   229  		return 0, syserror.EISDIR
   230  	}
   231  
   232  	var (
   233  		n   int64
   234  		err error
   235  	)
   236  	// The write is handled in different ways depending on the cache policy
   237  	// and availability of a host-mappable FD.
   238  	if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
   239  		n, err = f.inodeOperations.cachingInodeOps.Write(ctx, src, offset)
   240  	} else {
   241  		uattr, e := f.UnstableAttr(ctx, file)
   242  		if e != nil {
   243  			return 0, e
   244  		}
   245  		if f.inodeOperations.fileState.hostMappable != nil {
   246  			n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset, uattr)
   247  		} else {
   248  			n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset))
   249  			if n > 0 && uattr.Perms.HasSetUIDOrGID() {
   250  				uattr.Perms.DropSetUIDAndMaybeGID()
   251  				f.inodeOperations.SetPermissions(ctx, file.Dirent.Inode, uattr.Perms)
   252  			}
   253  		}
   254  	}
   255  
   256  	if n == 0 {
   257  		// Nothing written. We are done.
   258  		return 0, err
   259  	}
   260  
   261  	// Write the dirty pages and attributes if cache policy tells us to.
   262  	if f.inodeOperations.session().cachePolicy.writeThrough(file.Dirent.Inode) {
   263  		if werr := f.inodeOperations.cachingInodeOps.WriteDirtyPagesAndAttrs(ctx, file.Dirent.Inode); werr != nil {
   264  			// Report no bytes written since the write faild.
   265  			return 0, werr
   266  		}
   267  	}
   268  
   269  	// We may need to sync the written bytes.
   270  	if syncErr := f.maybeSync(ctx, file, offset, n); syncErr != nil {
   271  		// Sync failed. Report 0 bytes written, since none of them are
   272  		// guaranteed to have been synced.
   273  		return 0, syncErr
   274  	}
   275  
   276  	return n, err
   277  }
   278  
   279  // incrementReadCounters increments the read counters for the read starting at the given time. We
   280  // use this function rather than using a defer in Read() to avoid the performance hit of defer.
   281  func (f *fileOperations) incrementReadCounters(start time.Time) {
   282  	if f.handles.Host != nil {
   283  		fsmetric.GoferReadsHost.Increment()
   284  		fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start)
   285  	} else {
   286  		fsmetric.GoferReads9P.Increment()
   287  		fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start)
   288  	}
   289  }
   290  
   291  // Read implements fs.FileOperations.Read.
   292  func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   293  	start := fsmetric.StartReadWait()
   294  	if fs.IsDir(file.Dirent.Inode.StableAttr) {
   295  		// Not all remote file systems enforce this so this client does.
   296  		f.incrementReadCounters(start)
   297  		return 0, syserror.EISDIR
   298  	}
   299  
   300  	if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) {
   301  		n, err := f.inodeOperations.cachingInodeOps.Read(ctx, file, dst, offset)
   302  		f.incrementReadCounters(start)
   303  		return n, err
   304  	}
   305  	n, err := dst.CopyOutFrom(ctx, f.handles.readWriterAt(ctx, offset))
   306  	f.incrementReadCounters(start)
   307  	return n, err
   308  }
   309  
   310  // Fsync implements fs.FileOperations.Fsync.
   311  func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start, end int64, syncType fs.SyncType) error {
   312  	switch syncType {
   313  	case fs.SyncAll, fs.SyncData:
   314  		if err := file.Dirent.Inode.WriteOut(ctx); err != nil {
   315  			return err
   316  		}
   317  		fallthrough
   318  	case fs.SyncBackingStorage:
   319  		// Sync remote caches.
   320  		if f.handles.Host != nil {
   321  			// Sync the host fd directly.
   322  			return unix.Fsync(f.handles.Host.FD())
   323  		}
   324  		// Otherwise sync on the p9.File handle.
   325  		return f.handles.File.fsync(ctx)
   326  	}
   327  	panic("invalid sync type")
   328  }
   329  
   330  // Flush implements fs.FileOperations.Flush.
   331  func (f *fileOperations) Flush(ctx context.Context, file *fs.File) error {
   332  	// If this file is not opened writable then there is nothing to flush.
   333  	// We do this because some p9 server implementations of Flush are
   334  	// over-zealous.
   335  	//
   336  	// FIXME(edahlgren): weaken these implementations and remove this check.
   337  	if !file.Flags().Write {
   338  		return nil
   339  	}
   340  	// Execute the flush.
   341  	return f.handles.File.flush(ctx)
   342  }
   343  
   344  // ConfigureMMap implements fs.FileOperations.ConfigureMMap.
   345  func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error {
   346  	return f.inodeOperations.configureMMap(file, opts)
   347  }
   348  
   349  // UnstableAttr implements fs.FileOperations.UnstableAttr.
   350  func (f *fileOperations) UnstableAttr(ctx context.Context, file *fs.File) (fs.UnstableAttr, error) {
   351  	s := f.inodeOperations.session()
   352  	if s.cachePolicy.cacheUAttrs(file.Dirent.Inode) {
   353  		return f.inodeOperations.cachingInodeOps.UnstableAttr(ctx, file.Dirent.Inode)
   354  	}
   355  	// Use f.handles.File, which represents 9P fids that have been opened,
   356  	// instead of inodeFileState.file, which represents 9P fids that have not.
   357  	// This may be significantly more efficient in some implementations.
   358  	_, valid, pattr, err := getattr(ctx, f.handles.File)
   359  	if err != nil {
   360  		return fs.UnstableAttr{}, err
   361  	}
   362  	return unstable(ctx, valid, pattr, s.mounter, s.client), nil
   363  }
   364  
   365  // Seek implements fs.FileOperations.Seek.
   366  func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) {
   367  	return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor)
   368  }