github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/inode.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/log"
    22  	"github.com/SagerNet/gvisor/pkg/refs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/fs/lock"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fsmetric"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/memmap"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    28  	"github.com/SagerNet/gvisor/pkg/sync"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  )
    31  
    32  // Inode is a file system object that can be simultaneously referenced by different
    33  // components of the VFS (Dirent, fs.File, etc).
    34  //
    35  // +stateify savable
    36  type Inode struct {
    37  	// AtomicRefCount is our reference count.
    38  	refs.AtomicRefCount
    39  
    40  	// InodeOperations is the file system specific behavior of the Inode.
    41  	InodeOperations InodeOperations
    42  
    43  	// StableAttr are stable cached attributes of the Inode.
    44  	StableAttr StableAttr
    45  
    46  	// LockCtx is the file lock context. It manages its own sychronization and tracks
    47  	// regions of the Inode that have locks held.
    48  	LockCtx LockCtx
    49  
    50  	// Watches is the set of inotify watches for this inode.
    51  	Watches *Watches
    52  
    53  	// MountSource is the mount source this Inode is a part of.
    54  	MountSource *MountSource
    55  
    56  	// overlay is the overlay entry for this Inode.
    57  	overlay *overlayEntry
    58  
    59  	// appendMu is used to synchronize write operations into files which
    60  	// have been opened with O_APPEND. Operations which change a file size
    61  	// have to take this lock for read. Write operations to files with
    62  	// O_APPEND have to take this lock for write.
    63  	appendMu sync.RWMutex `state:"nosave"`
    64  }
    65  
    66  // LockCtx is an Inode's lock context and contains different personalities of locks; both
    67  // Posix and BSD style locks are supported.
    68  //
    69  // Note that in Linux fcntl(2) and flock(2) locks are _not_ cooperative, because race and
    70  // deadlock conditions make merging them prohibitive. We do the same and keep them oblivious
    71  // to each other but provide a "context" as a convenient container.
    72  //
    73  // +stateify savable
    74  type LockCtx struct {
    75  	// Posix is a set of POSIX-style regional advisory locks, see fcntl(2).
    76  	Posix lock.Locks
    77  
    78  	// BSD is a set of BSD-style advisory file wide locks, see flock(2).
    79  	BSD lock.Locks
    80  }
    81  
    82  // NewInode constructs an Inode from InodeOperations, a MountSource, and stable attributes.
    83  //
    84  // NewInode takes a reference on msrc.
    85  func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode {
    86  	msrc.IncRef()
    87  	i := Inode{
    88  		InodeOperations: iops,
    89  		StableAttr:      sattr,
    90  		Watches:         newWatches(),
    91  		MountSource:     msrc,
    92  	}
    93  	i.EnableLeakCheck("fs.Inode")
    94  	return &i
    95  }
    96  
    97  // DecRef drops a reference on the Inode.
    98  func (i *Inode) DecRef(ctx context.Context) {
    99  	i.DecRefWithDestructor(ctx, i.destroy)
   100  }
   101  
   102  // destroy releases the Inode and releases the msrc reference taken.
   103  func (i *Inode) destroy(ctx context.Context) {
   104  	if err := i.WriteOut(ctx); err != nil {
   105  		// FIXME(b/65209558): Mark as warning again once noatime is
   106  		// properly supported.
   107  		log.Debugf("Inode %+v, failed to sync all metadata: %v", i.StableAttr, err)
   108  	}
   109  
   110  	// If this inode is being destroyed because it was unlinked, queue a
   111  	// deletion event. This may not be the case for inodes being revalidated.
   112  	if i.Watches.unlinked {
   113  		i.Watches.Notify("", linux.IN_DELETE_SELF, 0)
   114  	}
   115  
   116  	// Remove references from the watch owners to the watches on this inode,
   117  	// since the watches are about to be GCed. Note that we don't need to worry
   118  	// about the watch pins since if there were any active pins, this inode
   119  	// wouldn't be in the destructor.
   120  	i.Watches.targetDestroyed()
   121  
   122  	if i.overlay != nil {
   123  		i.overlay.release(ctx)
   124  	} else {
   125  		i.InodeOperations.Release(ctx)
   126  	}
   127  
   128  	i.MountSource.DecRef(ctx)
   129  }
   130  
   131  // Mappable calls i.InodeOperations.Mappable.
   132  func (i *Inode) Mappable() memmap.Mappable {
   133  	if i.overlay != nil {
   134  		// In an overlay, Mappable is always implemented by
   135  		// the overlayEntry metadata to synchronize memory
   136  		// access of files with copy up. But first check if
   137  		// the Inodes involved would be mappable in the first
   138  		// place.
   139  		i.overlay.copyMu.RLock()
   140  		ok := i.overlay.isMappableLocked()
   141  		i.overlay.copyMu.RUnlock()
   142  		if !ok {
   143  			return nil
   144  		}
   145  		return i.overlay
   146  	}
   147  	return i.InodeOperations.Mappable(i)
   148  }
   149  
   150  // WriteOut calls i.InodeOperations.WriteOut with i as the Inode.
   151  func (i *Inode) WriteOut(ctx context.Context) error {
   152  	if i.overlay != nil {
   153  		return overlayWriteOut(ctx, i.overlay)
   154  	}
   155  	return i.InodeOperations.WriteOut(ctx, i)
   156  }
   157  
   158  // Lookup calls i.InodeOperations.Lookup with i as the directory.
   159  func (i *Inode) Lookup(ctx context.Context, name string) (*Dirent, error) {
   160  	if i.overlay != nil {
   161  		d, _, err := overlayLookup(ctx, i.overlay, i, name)
   162  		return d, err
   163  	}
   164  	return i.InodeOperations.Lookup(ctx, i, name)
   165  }
   166  
   167  // Create calls i.InodeOperations.Create with i as the directory.
   168  func (i *Inode) Create(ctx context.Context, d *Dirent, name string, flags FileFlags, perm FilePermissions) (*File, error) {
   169  	if i.overlay != nil {
   170  		return overlayCreate(ctx, i.overlay, d, name, flags, perm)
   171  	}
   172  	return i.InodeOperations.Create(ctx, i, name, flags, perm)
   173  }
   174  
   175  // CreateDirectory calls i.InodeOperations.CreateDirectory with i as the directory.
   176  func (i *Inode) CreateDirectory(ctx context.Context, d *Dirent, name string, perm FilePermissions) error {
   177  	if i.overlay != nil {
   178  		return overlayCreateDirectory(ctx, i.overlay, d, name, perm)
   179  	}
   180  	return i.InodeOperations.CreateDirectory(ctx, i, name, perm)
   181  }
   182  
   183  // CreateLink calls i.InodeOperations.CreateLink with i as the directory.
   184  func (i *Inode) CreateLink(ctx context.Context, d *Dirent, oldname string, newname string) error {
   185  	if i.overlay != nil {
   186  		return overlayCreateLink(ctx, i.overlay, d, oldname, newname)
   187  	}
   188  	return i.InodeOperations.CreateLink(ctx, i, oldname, newname)
   189  }
   190  
   191  // CreateHardLink calls i.InodeOperations.CreateHardLink with i as the directory.
   192  func (i *Inode) CreateHardLink(ctx context.Context, d *Dirent, target *Dirent, name string) error {
   193  	if i.overlay != nil {
   194  		return overlayCreateHardLink(ctx, i.overlay, d, target, name)
   195  	}
   196  	return i.InodeOperations.CreateHardLink(ctx, i, target.Inode, name)
   197  }
   198  
   199  // CreateFifo calls i.InodeOperations.CreateFifo with i as the directory.
   200  func (i *Inode) CreateFifo(ctx context.Context, d *Dirent, name string, perm FilePermissions) error {
   201  	if i.overlay != nil {
   202  		return overlayCreateFifo(ctx, i.overlay, d, name, perm)
   203  	}
   204  	return i.InodeOperations.CreateFifo(ctx, i, name, perm)
   205  }
   206  
   207  // Remove calls i.InodeOperations.Remove/RemoveDirectory with i as the directory.
   208  func (i *Inode) Remove(ctx context.Context, d *Dirent, remove *Dirent) error {
   209  	if i.overlay != nil {
   210  		return overlayRemove(ctx, i.overlay, d, remove)
   211  	}
   212  	switch remove.Inode.StableAttr.Type {
   213  	case Directory, SpecialDirectory:
   214  		return i.InodeOperations.RemoveDirectory(ctx, i, remove.name)
   215  	default:
   216  		return i.InodeOperations.Remove(ctx, i, remove.name)
   217  	}
   218  }
   219  
   220  // Rename calls i.InodeOperations.Rename with the given arguments.
   221  func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, newParent *Dirent, newName string, replacement bool) error {
   222  	if i.overlay != nil {
   223  		return overlayRename(ctx, i.overlay, oldParent, renamed, newParent, newName, replacement)
   224  	}
   225  	return i.InodeOperations.Rename(ctx, renamed.Inode, oldParent.Inode, renamed.name, newParent.Inode, newName, replacement)
   226  }
   227  
   228  // Bind calls i.InodeOperations.Bind with i as the directory.
   229  func (i *Inode) Bind(ctx context.Context, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) {
   230  	if i.overlay != nil {
   231  		return overlayBind(ctx, i.overlay, parent, name, data, perm)
   232  	}
   233  	return i.InodeOperations.Bind(ctx, i, name, data, perm)
   234  }
   235  
   236  // BoundEndpoint calls i.InodeOperations.BoundEndpoint with i as the Inode.
   237  func (i *Inode) BoundEndpoint(path string) transport.BoundEndpoint {
   238  	if i.overlay != nil {
   239  		return overlayBoundEndpoint(i.overlay, path)
   240  	}
   241  	return i.InodeOperations.BoundEndpoint(i, path)
   242  }
   243  
   244  // GetFile calls i.InodeOperations.GetFile with the given arguments.
   245  func (i *Inode) GetFile(ctx context.Context, d *Dirent, flags FileFlags) (*File, error) {
   246  	if i.overlay != nil {
   247  		return overlayGetFile(ctx, i.overlay, d, flags)
   248  	}
   249  	fsmetric.Opens.Increment()
   250  	return i.InodeOperations.GetFile(ctx, d, flags)
   251  }
   252  
   253  // UnstableAttr calls i.InodeOperations.UnstableAttr with i as the Inode.
   254  func (i *Inode) UnstableAttr(ctx context.Context) (UnstableAttr, error) {
   255  	if i.overlay != nil {
   256  		return overlayUnstableAttr(ctx, i.overlay)
   257  	}
   258  	return i.InodeOperations.UnstableAttr(ctx, i)
   259  }
   260  
   261  // GetXattr calls i.InodeOperations.GetXattr with i as the Inode.
   262  func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string, error) {
   263  	if i.overlay != nil {
   264  		return overlayGetXattr(ctx, i.overlay, name, size)
   265  	}
   266  	return i.InodeOperations.GetXattr(ctx, i, name, size)
   267  }
   268  
   269  // SetXattr calls i.InodeOperations.SetXattr with i as the Inode.
   270  func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, flags uint32) error {
   271  	if i.overlay != nil {
   272  		return overlaySetXattr(ctx, i.overlay, d, name, value, flags)
   273  	}
   274  	return i.InodeOperations.SetXattr(ctx, i, name, value, flags)
   275  }
   276  
   277  // ListXattr calls i.InodeOperations.ListXattr with i as the Inode.
   278  func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) {
   279  	if i.overlay != nil {
   280  		return overlayListXattr(ctx, i.overlay, size)
   281  	}
   282  	return i.InodeOperations.ListXattr(ctx, i, size)
   283  }
   284  
   285  // RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode.
   286  func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error {
   287  	if i.overlay != nil {
   288  		return overlayRemoveXattr(ctx, i.overlay, d, name)
   289  	}
   290  	return i.InodeOperations.RemoveXattr(ctx, i, name)
   291  }
   292  
   293  // CheckPermission will check if the caller may access this file in the
   294  // requested way for reading, writing, or executing.
   295  //
   296  // CheckPermission is like Linux's fs/namei.c:inode_permission. It
   297  // - checks file system mount flags,
   298  // - and utilizes InodeOperations.Check to check capabilities and modes.
   299  func (i *Inode) CheckPermission(ctx context.Context, p PermMask) error {
   300  	// First check the outer-most mounted filesystem.
   301  	if p.Write && i.MountSource.Flags.ReadOnly {
   302  		return linuxerr.EROFS
   303  	}
   304  
   305  	if i.overlay != nil {
   306  		// CheckPermission requires some special handling for
   307  		// an overlay.
   308  		//
   309  		// Writes will always be redirected to an upper filesystem,
   310  		// so ignore all lower layers being read-only.
   311  		//
   312  		// But still honor the upper-most filesystem's mount flags;
   313  		// we should not attempt to modify the writable layer if it
   314  		// is mounted read-only.
   315  		if p.Write && overlayUpperMountSource(i.MountSource).Flags.ReadOnly {
   316  			return linuxerr.EROFS
   317  		}
   318  	}
   319  
   320  	return i.check(ctx, p)
   321  }
   322  
   323  func (i *Inode) check(ctx context.Context, p PermMask) error {
   324  	if i.overlay != nil {
   325  		return overlayCheck(ctx, i.overlay, p)
   326  	}
   327  	if !i.InodeOperations.Check(ctx, i, p) {
   328  		return linuxerr.EACCES
   329  	}
   330  	return nil
   331  }
   332  
   333  // SetPermissions calls i.InodeOperations.SetPermissions with i as the Inode.
   334  func (i *Inode) SetPermissions(ctx context.Context, d *Dirent, f FilePermissions) bool {
   335  	if i.overlay != nil {
   336  		return overlaySetPermissions(ctx, i.overlay, d, f)
   337  	}
   338  	return i.InodeOperations.SetPermissions(ctx, i, f)
   339  }
   340  
   341  // SetOwner calls i.InodeOperations.SetOwner with i as the Inode.
   342  func (i *Inode) SetOwner(ctx context.Context, d *Dirent, o FileOwner) error {
   343  	if i.overlay != nil {
   344  		return overlaySetOwner(ctx, i.overlay, d, o)
   345  	}
   346  	return i.InodeOperations.SetOwner(ctx, i, o)
   347  }
   348  
   349  // SetTimestamps calls i.InodeOperations.SetTimestamps with i as the Inode.
   350  func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error {
   351  	if i.overlay != nil {
   352  		return overlaySetTimestamps(ctx, i.overlay, d, ts)
   353  	}
   354  	return i.InodeOperations.SetTimestamps(ctx, i, ts)
   355  }
   356  
   357  // Truncate calls i.InodeOperations.Truncate with i as the Inode.
   358  func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error {
   359  	if IsDir(i.StableAttr) {
   360  		return syserror.EISDIR
   361  	}
   362  
   363  	if i.overlay != nil {
   364  		return overlayTruncate(ctx, i.overlay, d, size)
   365  	}
   366  	i.appendMu.RLock()
   367  	defer i.appendMu.RUnlock()
   368  	return i.InodeOperations.Truncate(ctx, i, size)
   369  }
   370  
   371  // Allocate calls i.InodeOperations.Allocate with i as the Inode.
   372  func (i *Inode) Allocate(ctx context.Context, d *Dirent, offset int64, length int64) error {
   373  	if i.overlay != nil {
   374  		return overlayAllocate(ctx, i.overlay, d, offset, length)
   375  	}
   376  	return i.InodeOperations.Allocate(ctx, i, offset, length)
   377  }
   378  
   379  // Readlink calls i.InodeOperations.Readlnk with i as the Inode.
   380  func (i *Inode) Readlink(ctx context.Context) (string, error) {
   381  	if i.overlay != nil {
   382  		return overlayReadlink(ctx, i.overlay)
   383  	}
   384  	return i.InodeOperations.Readlink(ctx, i)
   385  }
   386  
   387  // Getlink calls i.InodeOperations.Getlink.
   388  func (i *Inode) Getlink(ctx context.Context) (*Dirent, error) {
   389  	if i.overlay != nil {
   390  		return overlayGetlink(ctx, i.overlay)
   391  	}
   392  	return i.InodeOperations.Getlink(ctx, i)
   393  }
   394  
   395  // AddLink calls i.InodeOperations.AddLink.
   396  func (i *Inode) AddLink() {
   397  	if i.overlay != nil {
   398  		// This interface is only used by ramfs to update metadata of
   399  		// children. These filesystems should _never_ have overlay
   400  		// Inodes cached as children. So explicitly disallow this
   401  		// scenario and avoid plumbing Dirents through to do copy up.
   402  		panic("overlay Inodes cached in ramfs directories are not supported")
   403  	}
   404  	i.InodeOperations.AddLink()
   405  }
   406  
   407  // DropLink calls i.InodeOperations.DropLink.
   408  func (i *Inode) DropLink() {
   409  	if i.overlay != nil {
   410  		// Same as AddLink.
   411  		panic("overlay Inodes cached in ramfs directories are not supported")
   412  	}
   413  	i.InodeOperations.DropLink()
   414  }
   415  
   416  // IsVirtual calls i.InodeOperations.IsVirtual.
   417  func (i *Inode) IsVirtual() bool {
   418  	if i.overlay != nil {
   419  		// An overlay configuration does not support virtual files.
   420  		return false
   421  	}
   422  	return i.InodeOperations.IsVirtual()
   423  }
   424  
   425  // StatFS calls i.InodeOperations.StatFS.
   426  func (i *Inode) StatFS(ctx context.Context) (Info, error) {
   427  	if i.overlay != nil {
   428  		return overlayStatFS(ctx, i.overlay)
   429  	}
   430  	return i.InodeOperations.StatFS(ctx)
   431  }
   432  
   433  // CheckOwnership checks whether `ctx` owns this Inode or may act as its owner.
   434  // Compare Linux's fs/inode.c:inode_owner_or_capable().
   435  func (i *Inode) CheckOwnership(ctx context.Context) bool {
   436  	uattr, err := i.UnstableAttr(ctx)
   437  	if err != nil {
   438  		return false
   439  	}
   440  	creds := auth.CredentialsFromContext(ctx)
   441  	if uattr.Owner.UID == creds.EffectiveKUID {
   442  		return true
   443  	}
   444  	if creds.HasCapability(linux.CAP_FOWNER) && creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() {
   445  		return true
   446  	}
   447  	return false
   448  }
   449  
   450  // CheckCapability checks whether `ctx` has capability `cp` with respect to
   451  // operations on this Inode.
   452  //
   453  // Compare Linux's kernel/capability.c:capable_wrt_inode_uidgid().
   454  func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool {
   455  	uattr, err := i.UnstableAttr(ctx)
   456  	if err != nil {
   457  		return false
   458  	}
   459  	creds := auth.CredentialsFromContext(ctx)
   460  	if !creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() {
   461  		return false
   462  	}
   463  	if !creds.UserNamespace.MapFromKGID(uattr.Owner.GID).Ok() {
   464  		return false
   465  	}
   466  	return creds.HasCapability(cp)
   467  }
   468  
   469  func (i *Inode) lockAppendMu(appendMode bool) func() {
   470  	if appendMode {
   471  		i.appendMu.Lock()
   472  		return i.appendMu.Unlock
   473  	}
   474  	i.appendMu.RLock()
   475  	return i.appendMu.RUnlock
   476  }