github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/vfs/dentry.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"sync/atomic"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/context"
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/sync"
    23  )
    24  
    25  // Dentry represents a node in a Filesystem tree at which a file exists.
    26  //
    27  // Dentries are reference-counted. Unless otherwise specified, all Dentry
    28  // methods require that a reference is held.
    29  //
    30  // Dentry is loosely analogous to Linux's struct dentry, but:
    31  //
    32  // - VFS does not associate Dentries with inodes. gVisor interacts primarily
    33  // with filesystems that are accessed through filesystem APIs (as opposed to
    34  // raw block devices); many such APIs support only paths and file descriptors,
    35  // and not inodes. Furthermore, when parties outside the scope of VFS can
    36  // rename inodes on such filesystems, VFS generally cannot "follow" the rename,
    37  // both due to synchronization issues and because it may not even be able to
    38  // name the destination path; this implies that it would in fact be incorrect
    39  // for Dentries to be associated with inodes on such filesystems. Consequently,
    40  // operations that are inode operations in Linux are FilesystemImpl methods
    41  // and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do
    42  // support inodes may store appropriate state in implementations of DentryImpl.
    43  //
    44  // - VFS does not require that Dentries are instantiated for all paths accessed
    45  // through VFS, only those that are tracked beyond the scope of a single
    46  // Filesystem operation. This includes file descriptions, mount points, mount
    47  // roots, process working directories, and chroots. This avoids instantiation
    48  // of Dentries for operations on mutable remote filesystems that can't actually
    49  // cache any state in the Dentry.
    50  //
    51  // - VFS does not track filesystem structure (i.e. relationships between
    52  // Dentries), since both the relevant state and synchronization are
    53  // filesystem-specific.
    54  //
    55  // - For the reasons above, VFS is not directly responsible for managing Dentry
    56  // lifetime. Dentry reference counts only indicate the extent to which VFS
    57  // requires Dentries to exist; Filesystems may elect to cache or discard
    58  // Dentries with zero references.
    59  //
    60  // +stateify savable
    61  type Dentry struct {
    62  	// mu synchronizes deletion/invalidation and mounting over this Dentry.
    63  	mu sync.Mutex `state:"nosave"`
    64  
    65  	// dead is true if the file represented by this Dentry has been deleted (by
    66  	// CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by
    67  	// InvalidateDentry). dead is protected by mu.
    68  	dead bool
    69  
    70  	// mounts is the number of Mounts for which this Dentry is Mount.point.
    71  	// mounts is accessed using atomic memory operations.
    72  	mounts uint32
    73  
    74  	// impl is the DentryImpl associated with this Dentry. impl is immutable.
    75  	// This should be the last field in Dentry.
    76  	impl DentryImpl
    77  }
    78  
    79  // Init must be called before first use of d.
    80  func (d *Dentry) Init(impl DentryImpl) {
    81  	d.impl = impl
    82  }
    83  
    84  // Impl returns the DentryImpl associated with d.
    85  func (d *Dentry) Impl() DentryImpl {
    86  	return d.impl
    87  }
    88  
    89  // DentryImpl contains implementation details for a Dentry. Implementations of
    90  // DentryImpl should contain their associated Dentry by value as their first
    91  // field.
    92  //
    93  // +stateify savable
    94  type DentryImpl interface {
    95  	// IncRef increments the Dentry's reference count. A Dentry with a non-zero
    96  	// reference count must remain coherent with the state of the filesystem.
    97  	IncRef()
    98  
    99  	// TryIncRef increments the Dentry's reference count and returns true. If
   100  	// the Dentry's reference count is zero, TryIncRef may do nothing and
   101  	// return false. (It is also permitted to succeed if it can restore the
   102  	// guarantee that the Dentry is coherent with the state of the filesystem.)
   103  	//
   104  	// TryIncRef does not require that a reference is held on the Dentry.
   105  	TryIncRef() bool
   106  
   107  	// DecRef decrements the Dentry's reference count.
   108  	DecRef(ctx context.Context)
   109  
   110  	// InotifyWithParent notifies all watches on the targets represented by this
   111  	// dentry and its parent. The parent's watches are notified first, followed
   112  	// by this dentry's.
   113  	//
   114  	// InotifyWithParent automatically adds the IN_ISDIR flag for dentries
   115  	// representing directories.
   116  	//
   117  	// Note that the events may not actually propagate up to the user, depending
   118  	// on the event masks.
   119  	InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType)
   120  
   121  	// Watches returns the set of inotify watches for the file corresponding to
   122  	// the Dentry. Dentries that are hard links to the same underlying file
   123  	// share the same watches.
   124  	//
   125  	// Watches may return nil if the dentry belongs to a FilesystemImpl that
   126  	// does not support inotify. If an implementation returns a non-nil watch
   127  	// set, it must always return a non-nil watch set. Likewise, if an
   128  	// implementation returns a nil watch set, it must always return a nil watch
   129  	// set.
   130  	//
   131  	// The caller does not need to hold a reference on the dentry.
   132  	Watches() *Watches
   133  
   134  	// OnZeroWatches is called whenever the number of watches on a dentry drops
   135  	// to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage
   136  	// dentry lifetime.
   137  	//
   138  	// The caller does not need to hold a reference on the dentry. OnZeroWatches
   139  	// may acquire inotify locks, so to prevent deadlock, no inotify locks should
   140  	// be held by the caller.
   141  	OnZeroWatches(ctx context.Context)
   142  }
   143  
   144  // IncRef increments d's reference count.
   145  func (d *Dentry) IncRef() {
   146  	d.impl.IncRef()
   147  }
   148  
   149  // TryIncRef increments d's reference count and returns true. If d's reference
   150  // count is zero, TryIncRef may instead do nothing and return false.
   151  func (d *Dentry) TryIncRef() bool {
   152  	return d.impl.TryIncRef()
   153  }
   154  
   155  // DecRef decrements d's reference count.
   156  func (d *Dentry) DecRef(ctx context.Context) {
   157  	d.impl.DecRef(ctx)
   158  }
   159  
   160  // IsDead returns true if d has been deleted or invalidated by its owning
   161  // filesystem.
   162  func (d *Dentry) IsDead() bool {
   163  	d.mu.Lock()
   164  	defer d.mu.Unlock()
   165  	return d.dead
   166  }
   167  
   168  func (d *Dentry) isMounted() bool {
   169  	return atomic.LoadUint32(&d.mounts) != 0
   170  }
   171  
   172  // InotifyWithParent notifies all watches on the targets represented by d and
   173  // its parent of events.
   174  func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {
   175  	d.impl.InotifyWithParent(ctx, events, cookie, et)
   176  }
   177  
   178  // Watches returns the set of inotify watches associated with d.
   179  //
   180  // Watches will return nil if d belongs to a FilesystemImpl that does not
   181  // support inotify.
   182  func (d *Dentry) Watches() *Watches {
   183  	return d.impl.Watches()
   184  }
   185  
   186  // OnZeroWatches performs cleanup tasks whenever the number of watches on a
   187  // dentry drops to zero.
   188  func (d *Dentry) OnZeroWatches(ctx context.Context) {
   189  	d.impl.OnZeroWatches(ctx)
   190  }
   191  
   192  // The following functions are exported so that filesystem implementations can
   193  // use them. The vfs package, and users of VFS, should not call these
   194  // functions.
   195  
   196  // PrepareDeleteDentry must be called before attempting to delete the file
   197  // represented by d. If PrepareDeleteDentry succeeds, the caller must call
   198  // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
   199  // +checklocksacquire:d.mu
   200  func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
   201  	vfs.mountMu.Lock()
   202  	if mntns.mountpoints[d] != 0 {
   203  		vfs.mountMu.Unlock()
   204  		return linuxerr.EBUSY // +checklocksforce: inconsistent return.
   205  	}
   206  	d.mu.Lock()
   207  	vfs.mountMu.Unlock()
   208  	// Return with d.mu locked to block attempts to mount over it; it will be
   209  	// unlocked by AbortDeleteDentry or CommitDeleteDentry.
   210  	return nil
   211  }
   212  
   213  // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
   214  // fails.
   215  // +checklocksrelease:d.mu
   216  func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
   217  	d.mu.Unlock()
   218  }
   219  
   220  // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
   221  // succeeds.
   222  // +checklocksrelease:d.mu
   223  func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
   224  	d.dead = true
   225  	d.mu.Unlock()
   226  	if d.isMounted() {
   227  		vfs.forgetDeadMountpoint(ctx, d)
   228  	}
   229  }
   230  
   231  // InvalidateDentry is called when d ceases to represent the file it formerly
   232  // did for reasons outside of VFS' control (e.g. d represents the local state
   233  // of a file on a remote filesystem on which the file has already been
   234  // deleted).
   235  func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) {
   236  	d.mu.Lock()
   237  	d.dead = true
   238  	d.mu.Unlock()
   239  	if d.isMounted() {
   240  		vfs.forgetDeadMountpoint(ctx, d)
   241  	}
   242  }
   243  
   244  // PrepareRenameDentry must be called before attempting to rename the file
   245  // represented by from. If to is not nil, it represents the file that will be
   246  // replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the
   247  // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
   248  // CommitRenameExchangeDentry depending on the rename's outcome.
   249  //
   250  // Preconditions:
   251  // * If to is not nil, it must be a child Dentry from the same Filesystem.
   252  // * from != to.
   253  // +checklocksacquire:from.mu
   254  // +checklocksacquire:to.mu
   255  func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
   256  	vfs.mountMu.Lock()
   257  	if mntns.mountpoints[from] != 0 {
   258  		vfs.mountMu.Unlock()
   259  		return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   260  	}
   261  	if to != nil {
   262  		if mntns.mountpoints[to] != 0 {
   263  			vfs.mountMu.Unlock()
   264  			return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   265  		}
   266  		to.mu.Lock()
   267  	}
   268  	from.mu.Lock()
   269  	vfs.mountMu.Unlock()
   270  	// Return with from.mu and to.mu locked, which will be unlocked by
   271  	// AbortRenameDentry, CommitRenameReplaceDentry, or
   272  	// CommitRenameExchangeDentry.
   273  	return nil // +checklocksforce: to may not be acquired.
   274  }
   275  
   276  // AbortRenameDentry must be called after PrepareRenameDentry if the rename
   277  // fails.
   278  // +checklocksrelease:from.mu
   279  // +checklocksrelease:to.mu
   280  func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
   281  	from.mu.Unlock()
   282  	if to != nil {
   283  		to.mu.Unlock()
   284  	}
   285  }
   286  
   287  // CommitRenameReplaceDentry must be called after the file represented by from
   288  // is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file
   289  // that was replaced by from.
   290  //
   291  // Preconditions: PrepareRenameDentry was previously called on from and to.
   292  // +checklocksrelease:from.mu
   293  // +checklocksrelease:to.mu
   294  func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
   295  	from.mu.Unlock()
   296  	if to != nil {
   297  		to.dead = true
   298  		to.mu.Unlock()
   299  		if to.isMounted() {
   300  			vfs.forgetDeadMountpoint(ctx, to)
   301  		}
   302  	}
   303  }
   304  
   305  // CommitRenameExchangeDentry must be called after the files represented by
   306  // from and to are exchanged by rename(RENAME_EXCHANGE).
   307  //
   308  // Preconditions: PrepareRenameDentry was previously called on from and to.
   309  // +checklocksrelease:from.mu
   310  // +checklocksrelease:to.mu
   311  func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
   312  	from.mu.Unlock()
   313  	to.mu.Unlock()
   314  }
   315  
   316  // forgetDeadMountpoint is called when a mount point is deleted or invalidated
   317  // to umount all mounts using it in all other mount namespaces.
   318  //
   319  // forgetDeadMountpoint is analogous to Linux's
   320  // fs/namespace.c:__detach_mounts().
   321  func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) {
   322  	var (
   323  		vdsToDecRef    []VirtualDentry
   324  		mountsToDecRef []*Mount
   325  	)
   326  	vfs.mountMu.Lock()
   327  	vfs.mounts.seq.BeginWrite()
   328  	for mnt := range vfs.mountpoints[d] {
   329  		vdsToDecRef, mountsToDecRef = vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{}, vdsToDecRef, mountsToDecRef)
   330  	}
   331  	vfs.mounts.seq.EndWrite()
   332  	vfs.mountMu.Unlock()
   333  	for _, vd := range vdsToDecRef {
   334  		vd.DecRef(ctx)
   335  	}
   336  	for _, mnt := range mountsToDecRef {
   337  		mnt.DecRef(ctx)
   338  	}
   339  }