gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/vfs/dentry.go

gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/vfs/dentry.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"gvisor.dev/gvisor/pkg/atomicbitops"
    19  	"gvisor.dev/gvisor/pkg/context"
    20  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    21  	"gvisor.dev/gvisor/pkg/refs"
    22  	"gvisor.dev/gvisor/pkg/sync"
    23  )
    24  
    25  // Dentry represents a node in a Filesystem tree at which a file exists.
    26  //
    27  // Dentries are reference-counted. Unless otherwise specified, all Dentry
    28  // methods require that a reference is held.
    29  //
    30  // Dentry is loosely analogous to Linux's struct dentry, but:
    31  //
    32  //   - VFS does not associate Dentries with inodes. gVisor interacts primarily
    33  //     with filesystems that are accessed through filesystem APIs (as opposed to
    34  //     raw block devices); many such APIs support only paths and file descriptors,
    35  //     and not inodes. Furthermore, when parties outside the scope of VFS can
    36  //     rename inodes on such filesystems, VFS generally cannot "follow" the rename,
    37  //     both due to synchronization issues and because it may not even be able to
    38  //     name the destination path; this implies that it would in fact be incorrect
    39  //     for Dentries to be associated with inodes on such filesystems. Consequently,
    40  //     operations that are inode operations in Linux are FilesystemImpl methods
    41  //     and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do
    42  //     support inodes may store appropriate state in implementations of DentryImpl.
    43  //
    44  //   - VFS does not require that Dentries are instantiated for all paths accessed
    45  //     through VFS, only those that are tracked beyond the scope of a single
    46  //     Filesystem operation. This includes file descriptions, mount points, mount
    47  //     roots, process working directories, and chroots. This avoids instantiation
    48  //     of Dentries for operations on mutable remote filesystems that can't actually
    49  //     cache any state in the Dentry.
    50  //
    51  //   - VFS does not track filesystem structure (i.e. relationships between
    52  //     Dentries), since both the relevant state and synchronization are
    53  //     filesystem-specific.
    54  //
    55  //   - For the reasons above, VFS is not directly responsible for managing Dentry
    56  //     lifetime. Dentry reference counts only indicate the extent to which VFS
    57  //     requires Dentries to exist; Filesystems may elect to cache or discard
    58  //     Dentries with zero references.
    59  //
    60  // +stateify savable
    61  type Dentry struct {
    62  	// mu synchronizes deletion/invalidation and mounting over this Dentry.
    63  	mu sync.Mutex `state:"nosave"`
    64  
    65  	// dead is true if the file represented by this Dentry has been deleted (by
    66  	// CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by
    67  	// InvalidateDentry). dead is protected by mu.
    68  	dead bool
    69  
    70  	// evictable is set by the VFS layer or filesystems like overlayfs as a hint
    71  	// that this dentry will not be accessed hence forth. So filesystems that
    72  	// cache dentries locally can use this hint to release the dentry when all
    73  	// references are dropped. evictable is protected by mu.
    74  	evictable bool
    75  
    76  	// mounts is the number of Mounts for which this Dentry is Mount.point.
    77  	mounts atomicbitops.Uint32
    78  
    79  	// impl is the DentryImpl associated with this Dentry. impl is immutable.
    80  	// This should be the last field in Dentry.
    81  	impl DentryImpl
    82  }
    83  
    84  // Init must be called before first use of d.
    85  func (d *Dentry) Init(impl DentryImpl) {
    86  	d.impl = impl
    87  }
    88  
    89  // Impl returns the DentryImpl associated with d.
    90  func (d *Dentry) Impl() DentryImpl {
    91  	return d.impl
    92  }
    93  
    94  // DentryImpl contains implementation details for a Dentry. Implementations of
    95  // DentryImpl should contain their associated Dentry by value as their first
    96  // field.
    97  //
    98  // +stateify savable
    99  type DentryImpl interface {
   100  	// IncRef increments the Dentry's reference count. A Dentry with a non-zero
   101  	// reference count must remain coherent with the state of the filesystem.
   102  	IncRef()
   103  
   104  	// TryIncRef increments the Dentry's reference count and returns true. If
   105  	// the Dentry's reference count is zero, TryIncRef may do nothing and
   106  	// return false. (It is also permitted to succeed if it can restore the
   107  	// guarantee that the Dentry is coherent with the state of the filesystem.)
   108  	//
   109  	// TryIncRef does not require that a reference is held on the Dentry.
   110  	TryIncRef() bool
   111  
   112  	// DecRef decrements the Dentry's reference count.
   113  	DecRef(ctx context.Context)
   114  
   115  	// InotifyWithParent notifies all watches on the targets represented by this
   116  	// dentry and its parent. The parent's watches are notified first, followed
   117  	// by this dentry's.
   118  	//
   119  	// InotifyWithParent automatically adds the IN_ISDIR flag for dentries
   120  	// representing directories.
   121  	//
   122  	// Note that the events may not actually propagate up to the user, depending
   123  	// on the event masks.
   124  	InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType)
   125  
   126  	// Watches returns the set of inotify watches for the file corresponding to
   127  	// the Dentry. Dentries that are hard links to the same underlying file
   128  	// share the same watches.
   129  	//
   130  	// The caller does not need to hold a reference on the dentry.
   131  	Watches() *Watches
   132  
   133  	// OnZeroWatches is called whenever the number of watches on a dentry drops
   134  	// to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage
   135  	// dentry lifetime.
   136  	//
   137  	// The caller does not need to hold a reference on the dentry. OnZeroWatches
   138  	// may acquire inotify locks, so to prevent deadlock, no inotify locks should
   139  	// be held by the caller.
   140  	OnZeroWatches(ctx context.Context)
   141  }
   142  
   143  // IncRef increments d's reference count.
   144  func (d *Dentry) IncRef() {
   145  	d.impl.IncRef()
   146  }
   147  
   148  // TryIncRef increments d's reference count and returns true. If d's reference
   149  // count is zero, TryIncRef may instead do nothing and return false.
   150  func (d *Dentry) TryIncRef() bool {
   151  	return d.impl.TryIncRef()
   152  }
   153  
   154  // DecRef decrements d's reference count.
   155  func (d *Dentry) DecRef(ctx context.Context) {
   156  	d.impl.DecRef(ctx)
   157  }
   158  
   159  // IsDead returns true if d has been deleted or invalidated by its owning
   160  // filesystem.
   161  func (d *Dentry) IsDead() bool {
   162  	d.mu.Lock()
   163  	defer d.mu.Unlock()
   164  	return d.dead
   165  }
   166  
   167  // IsEvictable returns true if d is evictable from filesystem dentry cache.
   168  func (d *Dentry) IsEvictable() bool {
   169  	d.mu.Lock()
   170  	defer d.mu.Unlock()
   171  	return d.evictable
   172  }
   173  
   174  // MarkEvictable marks d as evictable.
   175  func (d *Dentry) MarkEvictable() {
   176  	d.mu.Lock()
   177  	defer d.mu.Unlock()
   178  	d.evictable = true
   179  }
   180  
   181  func (d *Dentry) isMounted() bool {
   182  	return d.mounts.Load() != 0
   183  }
   184  
   185  // InotifyWithParent notifies all watches on the targets represented by d and
   186  // its parent of events.
   187  func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {
   188  	d.impl.InotifyWithParent(ctx, events, cookie, et)
   189  }
   190  
   191  // Watches returns the set of inotify watches associated with d.
   192  func (d *Dentry) Watches() *Watches {
   193  	return d.impl.Watches()
   194  }
   195  
   196  // OnZeroWatches performs cleanup tasks whenever the number of watches on a
   197  // dentry drops to zero.
   198  func (d *Dentry) OnZeroWatches(ctx context.Context) {
   199  	d.impl.OnZeroWatches(ctx)
   200  }
   201  
   202  // The following functions are exported so that filesystem implementations can
   203  // use them. The vfs package, and users of VFS, should not call these
   204  // functions.
   205  
   206  // PrepareDeleteDentry must be called before attempting to delete the file
   207  // represented by d. If PrepareDeleteDentry succeeds, the caller must call
   208  // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
   209  // +checklocksacquire:d.mu
   210  func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
   211  	vfs.lockMounts()
   212  	defer vfs.unlockMounts(context.Background())
   213  	if mntns.mountpoints[d] != 0 {
   214  		return linuxerr.EBUSY // +checklocksforce: inconsistent return.
   215  	}
   216  	d.mu.Lock()
   217  	// Return with d.mu locked to block attempts to mount over it; it will be
   218  	// unlocked by AbortDeleteDentry or CommitDeleteDentry.
   219  	return nil
   220  }
   221  
   222  // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
   223  // fails.
   224  // +checklocksrelease:d.mu
   225  func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
   226  	d.mu.Unlock()
   227  }
   228  
   229  // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
   230  // succeeds. If d is mounted, the method returns a list of Virtual Dentries
   231  // mounted on d that the caller is responsible for DecRefing.
   232  // +checklocksrelease:d.mu
   233  func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) []refs.RefCounter {
   234  	d.dead = true
   235  	d.mu.Unlock()
   236  	if d.isMounted() {
   237  		return vfs.forgetDeadMountpoint(ctx, d)
   238  	}
   239  	return nil
   240  }
   241  
   242  // InvalidateDentry is called when d ceases to represent the file it formerly
   243  // did for reasons outside of VFS' control (e.g. d represents the local state
   244  // of a file on a remote filesystem on which the file has already been
   245  // deleted). If d is mounted, the method returns a list of Virtual Dentries
   246  // mounted on d that the caller is responsible for DecRefing.
   247  func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) []refs.RefCounter {
   248  	d.mu.Lock()
   249  	d.dead = true
   250  	d.mu.Unlock()
   251  	if d.isMounted() {
   252  		return vfs.forgetDeadMountpoint(ctx, d)
   253  	}
   254  	return nil
   255  }
   256  
   257  // PrepareRenameDentry must be called before attempting to rename the file
   258  // represented by from. If to is not nil, it represents the file that will be
   259  // replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the
   260  // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
   261  // CommitRenameExchangeDentry depending on the rename's outcome.
   262  //
   263  // Preconditions:
   264  //   - If to is not nil, it must be a child Dentry from the same Filesystem.
   265  //   - from != to.
   266  //
   267  // +checklocksacquire:from.mu
   268  // +checklocksacquire:to.mu
   269  func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
   270  	vfs.lockMounts()
   271  	defer vfs.unlockMounts(context.Background())
   272  	if mntns.mountpoints[from] != 0 {
   273  		return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   274  	}
   275  	if to != nil {
   276  		if mntns.mountpoints[to] != 0 {
   277  			return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   278  		}
   279  		to.mu.Lock()
   280  	}
   281  	from.mu.Lock()
   282  	// Return with from.mu and to.mu locked, which will be unlocked by
   283  	// AbortRenameDentry, CommitRenameReplaceDentry, or
   284  	// CommitRenameExchangeDentry.
   285  	return nil // +checklocksforce: to may not be acquired.
   286  }
   287  
   288  // AbortRenameDentry must be called after PrepareRenameDentry if the rename
   289  // fails.
   290  // +checklocksrelease:from.mu
   291  // +checklocksrelease:to.mu
   292  func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
   293  	from.mu.Unlock()
   294  	if to != nil {
   295  		to.mu.Unlock()
   296  	}
   297  }
   298  
   299  // CommitRenameReplaceDentry must be called after the file represented by from
   300  // is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file
   301  // that was replaced by from. If to is mounted, the method returns a list of
   302  // Virtual Dentries mounted on to that the caller is responsible for DecRefing.
   303  //
   304  // Preconditions: PrepareRenameDentry was previously called on from and to.
   305  // +checklocksrelease:from.mu
   306  // +checklocksrelease:to.mu
   307  func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) []refs.RefCounter {
   308  	from.mu.Unlock()
   309  	if to != nil {
   310  		to.dead = true
   311  		to.mu.Unlock()
   312  		if to.isMounted() {
   313  			return vfs.forgetDeadMountpoint(ctx, to)
   314  		}
   315  	}
   316  	return nil
   317  }
   318  
   319  // CommitRenameExchangeDentry must be called after the files represented by
   320  // from and to are exchanged by rename(RENAME_EXCHANGE).
   321  //
   322  // Preconditions: PrepareRenameDentry was previously called on from and to.
   323  // +checklocksrelease:from.mu
   324  // +checklocksrelease:to.mu
   325  func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
   326  	from.mu.Unlock()
   327  	to.mu.Unlock()
   328  }
   329  
   330  // forgetDeadMountpoint is called when a mount point is deleted or invalidated
   331  // to umount all mounts using it in all other mount namespaces. If skipDecRef
   332  // is true, the method returns a list of reference counted objects with an
   333  // an extra reference.
   334  //
   335  // forgetDeadMountpoint is analogous to Linux's
   336  // fs/namespace.c:__detach_mounts().
   337  func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) []refs.RefCounter {
   338  	vfs.lockMounts()
   339  	defer vfs.unlockMounts(ctx)
   340  	for mnt := range vfs.mountpoints[d] {
   341  		// If umounted is true, the mount point has already been decrefed by umount
   342  		// so we don't need to release the reference again here.
   343  		if mnt.umounted {
   344  			vfs.mounts.seq.BeginWrite()
   345  			vfs.disconnectLocked(mnt)
   346  			vfs.delayDecRef(mnt)
   347  			vfs.mounts.seq.EndWrite()
   348  		} else {
   349  			vfs.umountTreeLocked(mnt, &umountRecursiveOptions{})
   350  		}
   351  	}
   352  	return vfs.PopDelayedDecRefs()
   353  }