github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/vfs/dentry.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops"
    19  	"github.com/ttpreport/gvisor-ligolo/pkg/context"
    20  	"github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr"
    21  	"github.com/ttpreport/gvisor-ligolo/pkg/refs"
    22  	"github.com/ttpreport/gvisor-ligolo/pkg/sync"
    23  )
    24  
    25  // Dentry represents a node in a Filesystem tree at which a file exists.
    26  //
    27  // Dentries are reference-counted. Unless otherwise specified, all Dentry
    28  // methods require that a reference is held.
    29  //
    30  // Dentry is loosely analogous to Linux's struct dentry, but:
    31  //
    32  //   - VFS does not associate Dentries with inodes. gVisor interacts primarily
    33  //     with filesystems that are accessed through filesystem APIs (as opposed to
    34  //     raw block devices); many such APIs support only paths and file descriptors,
    35  //     and not inodes. Furthermore, when parties outside the scope of VFS can
    36  //     rename inodes on such filesystems, VFS generally cannot "follow" the rename,
    37  //     both due to synchronization issues and because it may not even be able to
    38  //     name the destination path; this implies that it would in fact be incorrect
    39  //     for Dentries to be associated with inodes on such filesystems. Consequently,
    40  //     operations that are inode operations in Linux are FilesystemImpl methods
    41  //     and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do
    42  //     support inodes may store appropriate state in implementations of DentryImpl.
    43  //
    44  //   - VFS does not require that Dentries are instantiated for all paths accessed
    45  //     through VFS, only those that are tracked beyond the scope of a single
    46  //     Filesystem operation. This includes file descriptions, mount points, mount
    47  //     roots, process working directories, and chroots. This avoids instantiation
    48  //     of Dentries for operations on mutable remote filesystems that can't actually
    49  //     cache any state in the Dentry.
    50  //
    51  //   - VFS does not track filesystem structure (i.e. relationships between
    52  //     Dentries), since both the relevant state and synchronization are
    53  //     filesystem-specific.
    54  //
    55  //   - For the reasons above, VFS is not directly responsible for managing Dentry
    56  //     lifetime. Dentry reference counts only indicate the extent to which VFS
    57  //     requires Dentries to exist; Filesystems may elect to cache or discard
    58  //     Dentries with zero references.
    59  //
    60  // +stateify savable
    61  type Dentry struct {
    62  	// mu synchronizes deletion/invalidation and mounting over this Dentry.
    63  	mu sync.Mutex `state:"nosave"`
    64  
    65  	// dead is true if the file represented by this Dentry has been deleted (by
    66  	// CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by
    67  	// InvalidateDentry). dead is protected by mu.
    68  	dead bool
    69  
    70  	// evictable is set by the VFS layer or filesystems like overlayfs as a hint
    71  	// that this dentry will not be accessed hence forth. So filesystems that
    72  	// cache dentries locally can use this hint to release the dentry when all
    73  	// references are dropped. evictable is protected by mu.
    74  	evictable bool
    75  
    76  	// mounts is the number of Mounts for which this Dentry is Mount.point.
    77  	mounts atomicbitops.Uint32
    78  
    79  	// impl is the DentryImpl associated with this Dentry. impl is immutable.
    80  	// This should be the last field in Dentry.
    81  	impl DentryImpl
    82  }
    83  
    84  // Init must be called before first use of d.
    85  func (d *Dentry) Init(impl DentryImpl) {
    86  	d.impl = impl
    87  }
    88  
    89  // Impl returns the DentryImpl associated with d.
    90  func (d *Dentry) Impl() DentryImpl {
    91  	return d.impl
    92  }
    93  
    94  // DentryImpl contains implementation details for a Dentry. Implementations of
    95  // DentryImpl should contain their associated Dentry by value as their first
    96  // field.
    97  //
    98  // +stateify savable
    99  type DentryImpl interface {
   100  	// IncRef increments the Dentry's reference count. A Dentry with a non-zero
   101  	// reference count must remain coherent with the state of the filesystem.
   102  	IncRef()
   103  
   104  	// TryIncRef increments the Dentry's reference count and returns true. If
   105  	// the Dentry's reference count is zero, TryIncRef may do nothing and
   106  	// return false. (It is also permitted to succeed if it can restore the
   107  	// guarantee that the Dentry is coherent with the state of the filesystem.)
   108  	//
   109  	// TryIncRef does not require that a reference is held on the Dentry.
   110  	TryIncRef() bool
   111  
   112  	// DecRef decrements the Dentry's reference count.
   113  	DecRef(ctx context.Context)
   114  
   115  	// InotifyWithParent notifies all watches on the targets represented by this
   116  	// dentry and its parent. The parent's watches are notified first, followed
   117  	// by this dentry's.
   118  	//
   119  	// InotifyWithParent automatically adds the IN_ISDIR flag for dentries
   120  	// representing directories.
   121  	//
   122  	// Note that the events may not actually propagate up to the user, depending
   123  	// on the event masks.
   124  	InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType)
   125  
   126  	// Watches returns the set of inotify watches for the file corresponding to
   127  	// the Dentry. Dentries that are hard links to the same underlying file
   128  	// share the same watches.
   129  	//
   130  	// The caller does not need to hold a reference on the dentry.
   131  	Watches() *Watches
   132  
   133  	// OnZeroWatches is called whenever the number of watches on a dentry drops
   134  	// to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage
   135  	// dentry lifetime.
   136  	//
   137  	// The caller does not need to hold a reference on the dentry. OnZeroWatches
   138  	// may acquire inotify locks, so to prevent deadlock, no inotify locks should
   139  	// be held by the caller.
   140  	OnZeroWatches(ctx context.Context)
   141  }
   142  
   143  // IncRef increments d's reference count.
   144  func (d *Dentry) IncRef() {
   145  	d.impl.IncRef()
   146  }
   147  
   148  // TryIncRef increments d's reference count and returns true. If d's reference
   149  // count is zero, TryIncRef may instead do nothing and return false.
   150  func (d *Dentry) TryIncRef() bool {
   151  	return d.impl.TryIncRef()
   152  }
   153  
   154  // DecRef decrements d's reference count.
   155  func (d *Dentry) DecRef(ctx context.Context) {
   156  	d.impl.DecRef(ctx)
   157  }
   158  
   159  // IsDead returns true if d has been deleted or invalidated by its owning
   160  // filesystem.
   161  func (d *Dentry) IsDead() bool {
   162  	d.mu.Lock()
   163  	defer d.mu.Unlock()
   164  	return d.dead
   165  }
   166  
   167  // IsEvictable returns true if d is evictable from filesystem dentry cache.
   168  func (d *Dentry) IsEvictable() bool {
   169  	d.mu.Lock()
   170  	defer d.mu.Unlock()
   171  	return d.evictable
   172  }
   173  
   174  // MarkEvictable marks d as evictable.
   175  func (d *Dentry) MarkEvictable() {
   176  	d.mu.Lock()
   177  	defer d.mu.Unlock()
   178  	d.evictable = true
   179  }
   180  
   181  func (d *Dentry) isMounted() bool {
   182  	return d.mounts.Load() != 0
   183  }
   184  
   185  // InotifyWithParent notifies all watches on the targets represented by d and
   186  // its parent of events.
   187  func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) {
   188  	d.impl.InotifyWithParent(ctx, events, cookie, et)
   189  }
   190  
   191  // Watches returns the set of inotify watches associated with d.
   192  func (d *Dentry) Watches() *Watches {
   193  	return d.impl.Watches()
   194  }
   195  
   196  // OnZeroWatches performs cleanup tasks whenever the number of watches on a
   197  // dentry drops to zero.
   198  func (d *Dentry) OnZeroWatches(ctx context.Context) {
   199  	d.impl.OnZeroWatches(ctx)
   200  }
   201  
   202  // The following functions are exported so that filesystem implementations can
   203  // use them. The vfs package, and users of VFS, should not call these
   204  // functions.
   205  
   206  // PrepareDeleteDentry must be called before attempting to delete the file
   207  // represented by d. If PrepareDeleteDentry succeeds, the caller must call
   208  // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome.
   209  // +checklocksacquire:d.mu
   210  func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error {
   211  	vfs.mountMu.Lock()
   212  	if mntns.mountpoints[d] != 0 {
   213  		vfs.mountMu.Unlock()
   214  		return linuxerr.EBUSY // +checklocksforce: inconsistent return.
   215  	}
   216  	d.mu.Lock()
   217  	vfs.mountMu.Unlock()
   218  	// Return with d.mu locked to block attempts to mount over it; it will be
   219  	// unlocked by AbortDeleteDentry or CommitDeleteDentry.
   220  	return nil
   221  }
   222  
   223  // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion
   224  // fails.
   225  // +checklocksrelease:d.mu
   226  func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) {
   227  	d.mu.Unlock()
   228  }
   229  
   230  // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion
   231  // succeeds.
   232  // +checklocksrelease:d.mu
   233  func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) {
   234  	d.dead = true
   235  	d.mu.Unlock()
   236  	if d.isMounted() {
   237  		vfs.forgetDeadMountpoint(ctx, d, false /*skipDecRef*/)
   238  	}
   239  }
   240  
   241  // InvalidateDentry is called when d ceases to represent the file it formerly
   242  // did for reasons outside of VFS' control (e.g. d represents the local state
   243  // of a file on a remote filesystem on which the file has already been
   244  // deleted). If d is mounted, the method returns a list of Virtual Dentries
   245  // mounted on d that the caller is responsible for DecRefing.
   246  func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) []refs.RefCounter {
   247  	d.mu.Lock()
   248  	d.dead = true
   249  	d.mu.Unlock()
   250  	if d.isMounted() {
   251  		return vfs.forgetDeadMountpoint(ctx, d, true /*skipDecRef*/)
   252  	}
   253  	return nil
   254  }
   255  
   256  // PrepareRenameDentry must be called before attempting to rename the file
   257  // represented by from. If to is not nil, it represents the file that will be
   258  // replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the
   259  // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or
   260  // CommitRenameExchangeDentry depending on the rename's outcome.
   261  //
   262  // Preconditions:
   263  //   - If to is not nil, it must be a child Dentry from the same Filesystem.
   264  //   - from != to.
   265  //
   266  // +checklocksacquire:from.mu
   267  // +checklocksacquire:to.mu
   268  func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error {
   269  	vfs.mountMu.Lock()
   270  	if mntns.mountpoints[from] != 0 {
   271  		vfs.mountMu.Unlock()
   272  		return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   273  	}
   274  	if to != nil {
   275  		if mntns.mountpoints[to] != 0 {
   276  			vfs.mountMu.Unlock()
   277  			return linuxerr.EBUSY // +checklocksforce: no locks acquired.
   278  		}
   279  		to.mu.Lock()
   280  	}
   281  	from.mu.Lock()
   282  	vfs.mountMu.Unlock()
   283  	// Return with from.mu and to.mu locked, which will be unlocked by
   284  	// AbortRenameDentry, CommitRenameReplaceDentry, or
   285  	// CommitRenameExchangeDentry.
   286  	return nil // +checklocksforce: to may not be acquired.
   287  }
   288  
   289  // AbortRenameDentry must be called after PrepareRenameDentry if the rename
   290  // fails.
   291  // +checklocksrelease:from.mu
   292  // +checklocksrelease:to.mu
   293  func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) {
   294  	from.mu.Unlock()
   295  	if to != nil {
   296  		to.mu.Unlock()
   297  	}
   298  }
   299  
   300  // CommitRenameReplaceDentry must be called after the file represented by from
   301  // is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file
   302  // that was replaced by from.
   303  //
   304  // Preconditions: PrepareRenameDentry was previously called on from and to.
   305  // +checklocksrelease:from.mu
   306  // +checklocksrelease:to.mu
   307  func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) {
   308  	from.mu.Unlock()
   309  	if to != nil {
   310  		to.dead = true
   311  		to.mu.Unlock()
   312  		if to.isMounted() {
   313  			vfs.forgetDeadMountpoint(ctx, to, false /*skipDecRef*/)
   314  		}
   315  	}
   316  }
   317  
   318  // CommitRenameExchangeDentry must be called after the files represented by
   319  // from and to are exchanged by rename(RENAME_EXCHANGE).
   320  //
   321  // Preconditions: PrepareRenameDentry was previously called on from and to.
   322  // +checklocksrelease:from.mu
   323  // +checklocksrelease:to.mu
   324  func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) {
   325  	from.mu.Unlock()
   326  	to.mu.Unlock()
   327  }
   328  
   329  // forgetDeadMountpoint is called when a mount point is deleted or invalidated
   330  // to umount all mounts using it in all other mount namespaces. If skipDecRef
   331  // is true, the method returns a list of reference counted objects with an
   332  // an extra reference.
   333  //
   334  // forgetDeadMountpoint is analogous to Linux's
   335  // fs/namespace.c:__detach_mounts().
   336  func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry, skipDecRef bool) []refs.RefCounter {
   337  	var (
   338  		vdsToDecRef    []VirtualDentry
   339  		mountsToDecRef []*Mount
   340  	)
   341  	vfs.mountMu.Lock()
   342  	vfs.mounts.seq.BeginWrite()
   343  	for mnt := range vfs.mountpoints[d] {
   344  		vdsToDecRef, mountsToDecRef = vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{}, vdsToDecRef, mountsToDecRef)
   345  	}
   346  	vfs.mounts.seq.EndWrite()
   347  	vfs.mountMu.Unlock()
   348  	rcs := make([]refs.RefCounter, 0, len(vdsToDecRef)+len(mountsToDecRef))
   349  	for _, vd := range vdsToDecRef {
   350  		rcs = append(rcs, vd)
   351  	}
   352  	for _, mnt := range mountsToDecRef {
   353  		rcs = append(rcs, mnt)
   354  	}
   355  	if skipDecRef {
   356  		return rcs
   357  	}
   358  	for _, rc := range rcs {
   359  		rc.DecRef(ctx)
   360  	}
   361  	return nil
   362  }