github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/host/inode.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package host
    16  
    17  import (
    18  	"golang.org/x/sys/unix"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/fd"
    22  	"github.com/SagerNet/gvisor/pkg/safemem"
    23  	"github.com/SagerNet/gvisor/pkg/secio"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    28  	"github.com/SagerNet/gvisor/pkg/sync"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  	"github.com/SagerNet/gvisor/pkg/waiter"
    31  )
    32  
    33  // inodeOperations implements fs.InodeOperations for an fs.Inodes backed
    34  // by a host file descriptor.
    35  //
    36  // +stateify savable
    37  type inodeOperations struct {
    38  	fsutil.InodeNotVirtual           `state:"nosave"`
    39  	fsutil.InodeNoExtendedAttributes `state:"nosave"`
    40  
    41  	// fileState implements fs.CachedFileObject. It exists
    42  	// to break a circular load dependency between inodeOperations
    43  	// and cachingInodeOps (below).
    44  	fileState *inodeFileState `state:"wait"`
    45  
    46  	// cachedInodeOps implements memmap.Mappable.
    47  	cachingInodeOps *fsutil.CachingInodeOperations
    48  
    49  	// readdirMu protects the file offset on the host FD. This is needed
    50  	// for readdir because getdents must use the kernel offset, so
    51  	// concurrent readdirs must be exclusive.
    52  	//
    53  	// All read/write functions pass the offset directly to the kernel and
    54  	// thus don't need a lock.
    55  	readdirMu sync.Mutex `state:"nosave"`
    56  }
    57  
    58  // inodeFileState implements fs.CachedFileObject and otherwise fully
    59  // encapsulates state that needs to be manually loaded on restore for
    60  // this file object.
    61  //
    62  // This unfortunate structure exists because fs.CachingInodeOperations
    63  // defines afterLoad and therefore cannot be lazily loaded (to break a
    64  // circular load dependency between it and inodeOperations). Even with
    65  // lazy loading, this approach defines the dependencies between objects
    66  // and the expected load behavior more concretely.
    67  //
    68  // +stateify savable
    69  type inodeFileState struct {
    70  	// descriptor is the backing host FD.
    71  	descriptor *descriptor `state:"wait"`
    72  
    73  	// Event queue for blocking operations.
    74  	queue waiter.Queue `state:"zerovalue"`
    75  
    76  	// sattr is used to restore the inodeOperations.
    77  	sattr fs.StableAttr `state:"wait"`
    78  
    79  	// savedUAttr is only allocated during S/R. It points to the save-time
    80  	// unstable attributes and is used to validate restore-time ones.
    81  	//
    82  	// Note that these unstable attributes are only used to detect cross-S/R
    83  	// external file system metadata changes. They may differ from the
    84  	// cached unstable attributes in cachingInodeOps, as that might differ
    85  	// from the external file system attributes if there had been WriteOut
    86  	// failures. S/R is transparent to Sentry and the latter will continue
    87  	// using its cached values after restore.
    88  	savedUAttr *fs.UnstableAttr
    89  }
    90  
    91  // ReadToBlocksAt implements fsutil.CachedFileObject.ReadToBlocksAt.
    92  func (i *inodeFileState) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) {
    93  	// TODO(jamieliu): Using safemem.FromIOReader here is wasteful for two
    94  	// reasons:
    95  	//
    96  	// - Using preadv instead of iterated preads saves on host system calls.
    97  	//
    98  	// - Host system calls can handle destination memory that would fault in
    99  	// gr3 (i.e. they can accept safemem.Blocks with NeedSafecopy() == true),
   100  	// so the buffering performed by FromIOReader is unnecessary.
   101  	//
   102  	// This also applies to the write path below.
   103  	return safemem.FromIOReader{secio.NewOffsetReader(fd.NewReadWriter(i.FD()), int64(offset))}.ReadToBlocks(dsts)
   104  }
   105  
   106  // WriteFromBlocksAt implements fsutil.CachedFileObject.WriteFromBlocksAt.
   107  func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) {
   108  	return safemem.FromIOWriter{secio.NewOffsetWriter(fd.NewReadWriter(i.FD()), int64(offset))}.WriteFromBlocks(srcs)
   109  }
   110  
   111  // SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes.
   112  func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr, _ bool) error {
   113  	if mask.Empty() {
   114  		return nil
   115  	}
   116  	if mask.UID || mask.GID {
   117  		return linuxerr.EPERM
   118  	}
   119  	if mask.Perms {
   120  		if err := unix.Fchmod(i.FD(), uint32(attr.Perms.LinuxMode())); err != nil {
   121  			return err
   122  		}
   123  	}
   124  	if mask.Size {
   125  		if err := unix.Ftruncate(i.FD(), attr.Size); err != nil {
   126  			return err
   127  		}
   128  	}
   129  	if mask.AccessTime || mask.ModificationTime {
   130  		ts := fs.TimeSpec{
   131  			ATime:     attr.AccessTime,
   132  			ATimeOmit: !mask.AccessTime,
   133  			MTime:     attr.ModificationTime,
   134  			MTimeOmit: !mask.ModificationTime,
   135  		}
   136  		if err := setTimestamps(i.FD(), ts); err != nil {
   137  			return err
   138  		}
   139  	}
   140  	return nil
   141  }
   142  
   143  // Sync implements fsutil.CachedFileObject.Sync.
   144  func (i *inodeFileState) Sync(ctx context.Context) error {
   145  	return unix.Fsync(i.FD())
   146  }
   147  
   148  // FD implements fsutil.CachedFileObject.FD.
   149  func (i *inodeFileState) FD() int {
   150  	return i.descriptor.value
   151  }
   152  
   153  func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, error) {
   154  	var s unix.Stat_t
   155  	if err := unix.Fstat(i.FD(), &s); err != nil {
   156  		return fs.UnstableAttr{}, err
   157  	}
   158  	return unstableAttr(&s), nil
   159  }
   160  
   161  // Allocate implements fsutil.CachedFileObject.Allocate.
   162  func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error {
   163  	return unix.Fallocate(i.FD(), 0, offset, length)
   164  }
   165  
   166  // inodeOperations implements fs.InodeOperations.
   167  var _ fs.InodeOperations = (*inodeOperations)(nil)
   168  
   169  // newInode returns a new fs.Inode backed by the host FD.
   170  func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool) (*fs.Inode, error) {
   171  	// Retrieve metadata.
   172  	var s unix.Stat_t
   173  	err := unix.Fstat(fd, &s)
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  
   178  	fileState := &inodeFileState{
   179  		sattr: stableAttr(&s),
   180  	}
   181  
   182  	// Initialize the wrapped host file descriptor.
   183  	fileState.descriptor, err = newDescriptor(fd, saveable, wouldBlock(&s), &fileState.queue)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  
   188  	// Build the fs.InodeOperations.
   189  	uattr := unstableAttr(&s)
   190  	iops := &inodeOperations{
   191  		fileState: fileState,
   192  		cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{
   193  			ForcePageCache: msrc.Flags.ForcePageCache,
   194  		}),
   195  	}
   196  
   197  	// Return the fs.Inode.
   198  	return fs.NewInode(ctx, iops, msrc, fileState.sattr), nil
   199  }
   200  
   201  // Mappable implements fs.InodeOperations.Mappable.
   202  func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable {
   203  	if !canMap(inode) {
   204  		return nil
   205  	}
   206  	return i.cachingInodeOps
   207  }
   208  
   209  // ReturnsWouldBlock returns true if this host FD can return EWOULDBLOCK for
   210  // operations that would block.
   211  func (i *inodeOperations) ReturnsWouldBlock() bool {
   212  	return i.fileState.descriptor.wouldBlock
   213  }
   214  
   215  // Release implements fs.InodeOperations.Release.
   216  func (i *inodeOperations) Release(context.Context) {
   217  	i.fileState.descriptor.Release()
   218  	i.cachingInodeOps.Release()
   219  }
   220  
   221  // Lookup implements fs.InodeOperations.Lookup.
   222  func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
   223  	return nil, syserror.ENOENT
   224  }
   225  
   226  // Create implements fs.InodeOperations.Create.
   227  func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) {
   228  	return nil, linuxerr.EPERM
   229  
   230  }
   231  
   232  // CreateDirectory implements fs.InodeOperations.CreateDirectory.
   233  func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
   234  	return linuxerr.EPERM
   235  }
   236  
   237  // CreateLink implements fs.InodeOperations.CreateLink.
   238  func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error {
   239  	return linuxerr.EPERM
   240  }
   241  
   242  // CreateHardLink implements fs.InodeOperations.CreateHardLink.
   243  func (*inodeOperations) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error {
   244  	return linuxerr.EPERM
   245  }
   246  
   247  // CreateFifo implements fs.InodeOperations.CreateFifo.
   248  func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error {
   249  	return linuxerr.EPERM
   250  }
   251  
   252  // Remove implements fs.InodeOperations.Remove.
   253  func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error {
   254  	return linuxerr.EPERM
   255  }
   256  
   257  // RemoveDirectory implements fs.InodeOperations.RemoveDirectory.
   258  func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error {
   259  	return linuxerr.EPERM
   260  }
   261  
   262  // Rename implements fs.InodeOperations.Rename.
   263  func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
   264  	return linuxerr.EPERM
   265  }
   266  
   267  // Bind implements fs.InodeOperations.Bind.
   268  func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
   269  	return nil, syserror.EOPNOTSUPP
   270  }
   271  
   272  // BoundEndpoint implements fs.InodeOperations.BoundEndpoint.
   273  func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.BoundEndpoint {
   274  	return nil
   275  }
   276  
   277  // GetFile implements fs.InodeOperations.GetFile.
   278  func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
   279  	if fs.IsSocket(d.Inode.StableAttr) {
   280  		return nil, linuxerr.ENXIO
   281  	}
   282  
   283  	return newFile(ctx, d, flags, i), nil
   284  }
   285  
   286  // canMap returns true if this fs.Inode can be memory mapped.
   287  func canMap(inode *fs.Inode) bool {
   288  	// FIXME(b/38213152): Some obscure character devices can be mapped.
   289  	return fs.IsFile(inode.StableAttr)
   290  }
   291  
   292  // UnstableAttr implements fs.InodeOperations.UnstableAttr.
   293  func (i *inodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) {
   294  	// When the kernel supports mapping host FDs, we do so to take
   295  	// advantage of the host page cache. We forego updating fs.Inodes
   296  	// because the host manages consistency of its own inode structures.
   297  	//
   298  	// For fs.Inodes that can never be mapped we take advantage of
   299  	// synchronizing metadata updates through host caches.
   300  	//
   301  	// So can we use host kernel metadata caches?
   302  	if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
   303  		// Then just obtain the attributes.
   304  		return i.fileState.unstableAttr(ctx)
   305  	}
   306  	// No, we're maintaining consistency of metadata ourselves.
   307  	return i.cachingInodeOps.UnstableAttr(ctx, inode)
   308  }
   309  
   310  // Check implements fs.InodeOperations.Check.
   311  func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool {
   312  	return fs.ContextCanAccessFile(ctx, inode, p)
   313  }
   314  
   315  // SetOwner implements fs.InodeOperations.SetOwner.
   316  func (i *inodeOperations) SetOwner(context.Context, *fs.Inode, fs.FileOwner) error {
   317  	return linuxerr.EPERM
   318  }
   319  
   320  // SetPermissions implements fs.InodeOperations.SetPermissions.
   321  func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f fs.FilePermissions) bool {
   322  	// Can we use host kernel metadata caches?
   323  	if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
   324  		// Then just change the timestamps on the FD, the host
   325  		// will synchronize the metadata update with any host
   326  		// inode and page cache.
   327  		return unix.Fchmod(i.fileState.FD(), uint32(f.LinuxMode())) == nil
   328  	}
   329  	// Otherwise update our cached metadata.
   330  	return i.cachingInodeOps.SetPermissions(ctx, inode, f)
   331  }
   332  
   333  // SetTimestamps implements fs.InodeOperations.SetTimestamps.
   334  func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error {
   335  	// Can we use host kernel metadata caches?
   336  	if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
   337  		// Then just change the timestamps on the FD, the host
   338  		// will synchronize the metadata update with any host
   339  		// inode and page cache.
   340  		return setTimestamps(i.fileState.FD(), ts)
   341  	}
   342  	// Otherwise update our cached metadata.
   343  	return i.cachingInodeOps.SetTimestamps(ctx, inode, ts)
   344  }
   345  
   346  // Truncate implements fs.InodeOperations.Truncate.
   347  func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error {
   348  	// Is the file not memory-mappable?
   349  	if !canMap(inode) {
   350  		// Then just change the file size on the FD, the host
   351  		// will synchronize the metadata update with any host
   352  		// inode and page cache.
   353  		return unix.Ftruncate(i.fileState.FD(), size)
   354  	}
   355  	// Otherwise we need to go through cachingInodeOps, even if the host page
   356  	// cache is in use, to invalidate private copies of truncated pages.
   357  	return i.cachingInodeOps.Truncate(ctx, inode, size)
   358  }
   359  
   360  // Allocate implements fs.InodeOperations.Allocate.
   361  func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error {
   362  	// Is the file not memory-mappable?
   363  	if !canMap(inode) {
   364  		// Then just send the call to the FD, the host will synchronize the metadata
   365  		// update with any host inode and page cache.
   366  		return i.fileState.Allocate(ctx, offset, length)
   367  	}
   368  	// Otherwise we need to go through cachingInodeOps, even if the host page
   369  	// cache is in use, to invalidate private copies of truncated pages.
   370  	return i.cachingInodeOps.Allocate(ctx, offset, length)
   371  }
   372  
   373  // WriteOut implements fs.InodeOperations.WriteOut.
   374  func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error {
   375  	if inode.MountSource.Flags.ReadOnly {
   376  		return nil
   377  	}
   378  	// Have we been using host kernel metadata caches?
   379  	if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) {
   380  		// Then the metadata is already up to date on the host.
   381  		return nil
   382  	}
   383  	// Otherwise we need to write out cached pages and attributes
   384  	// that are dirty.
   385  	return i.cachingInodeOps.WriteOut(ctx, inode)
   386  }
   387  
   388  // Readlink implements fs.InodeOperations.Readlink.
   389  func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string, error) {
   390  	return readLink(i.fileState.FD())
   391  }
   392  
   393  // Getlink implements fs.InodeOperations.Getlink.
   394  func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) {
   395  	if !fs.IsSymlink(i.fileState.sattr) {
   396  		return nil, linuxerr.ENOLINK
   397  	}
   398  	return nil, fs.ErrResolveViaReadlink
   399  }
   400  
   401  // StatFS implements fs.InodeOperations.StatFS.
   402  func (i *inodeOperations) StatFS(context.Context) (fs.Info, error) {
   403  	return fs.Info{}, syserror.ENOSYS
   404  }
   405  
   406  // AddLink implements fs.InodeOperations.AddLink.
   407  func (i *inodeOperations) AddLink() {}
   408  
   409  // DropLink implements fs.InodeOperations.DropLink.
   410  func (i *inodeOperations) DropLink() {}
   411  
   412  // NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange.
   413  func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}