github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/kernfs/filesystem.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernfs
    16  
    17  // This file implements vfs.FilesystemImpl for kernfs.
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/fspath"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  )
    31  
    32  // stepExistingLocked resolves rp.Component() in parent directory vfsd.
    33  //
    34  // stepExistingLocked is loosely analogous to fs/namei.c:walk_component().
    35  //
    36  // Preconditions:
    37  // * Filesystem.mu must be locked for at least reading.
    38  // * !rp.Done().
    39  //
    40  // Postcondition: Caller must call fs.processDeferredDecRefs*.
    41  func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, mayFollowSymlinks bool) (*Dentry, error) {
    42  	if !d.isDir() {
    43  		return nil, syserror.ENOTDIR
    44  	}
    45  	// Directory searchable?
    46  	if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
    47  		return nil, err
    48  	}
    49  afterSymlink:
    50  	name := rp.Component()
    51  	// Revalidation must be skipped if name is "." or ".."; d or its parent
    52  	// respectively can't be expected to transition from invalidated back to
    53  	// valid, so detecting invalidation and retrying would loop forever. This
    54  	// is consistent with Linux: fs/namei.c:walk_component() => lookup_fast()
    55  	// calls d_revalidate(), but walk_component() => handle_dots() does not.
    56  	if name == "." {
    57  		rp.Advance()
    58  		return d, nil
    59  	}
    60  	if name == ".." {
    61  		if isRoot, err := rp.CheckRoot(ctx, d.VFSDentry()); err != nil {
    62  			return nil, err
    63  		} else if isRoot || d.parent == nil {
    64  			rp.Advance()
    65  			return d, nil
    66  		}
    67  		if err := rp.CheckMount(ctx, d.parent.VFSDentry()); err != nil {
    68  			return nil, err
    69  		}
    70  		rp.Advance()
    71  		return d.parent, nil
    72  	}
    73  	if len(name) > linux.NAME_MAX {
    74  		return nil, linuxerr.ENAMETOOLONG
    75  	}
    76  	d.dirMu.Lock()
    77  	next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, d.children[name])
    78  	d.dirMu.Unlock()
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  	if err := rp.CheckMount(ctx, next.VFSDentry()); err != nil {
    83  		return nil, err
    84  	}
    85  	// Resolve any symlink at current path component.
    86  	if mayFollowSymlinks && rp.ShouldFollowSymlink() && next.isSymlink() {
    87  		targetVD, targetPathname, err := next.inode.Getlink(ctx, rp.Mount())
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  		if targetVD.Ok() {
    92  			err := rp.HandleJump(targetVD)
    93  			fs.deferDecRefVD(ctx, targetVD)
    94  			if err != nil {
    95  				return nil, err
    96  			}
    97  		} else {
    98  			if err := rp.HandleSymlink(targetPathname); err != nil {
    99  				return nil, err
   100  			}
   101  		}
   102  		goto afterSymlink
   103  	}
   104  	rp.Advance()
   105  	return next, nil
   106  }
   107  
   108  // revalidateChildLocked must be called after a call to parent.vfsd.Child(name)
   109  // or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be
   110  // nil) to verify that the returned child (or lack thereof) is correct.
   111  //
   112  // Preconditions:
   113  // * Filesystem.mu must be locked for at least reading.
   114  // * parent.dirMu must be locked.
   115  // * parent.isDir().
   116  // * name is not "." or "..".
   117  //
   118  // Postconditions: Caller must call fs.processDeferredDecRefs*.
   119  func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, child *Dentry) (*Dentry, error) {
   120  	if child != nil {
   121  		// Cached dentry exists, revalidate.
   122  		if !child.inode.Valid(ctx) {
   123  			delete(parent.children, name)
   124  			if child.inode.Keep() {
   125  				// Drop the ref owned by kernfs.
   126  				fs.deferDecRef(child)
   127  			}
   128  			vfsObj.InvalidateDentry(ctx, child.VFSDentry())
   129  			child = nil
   130  		}
   131  	}
   132  	if child == nil {
   133  		// Dentry isn't cached; it either doesn't exist or failed revalidation.
   134  		// Attempt to resolve it via Lookup.
   135  		childInode, err := parent.inode.Lookup(ctx, name)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  		var newChild Dentry
   140  		newChild.Init(fs, childInode) // childInode's ref is transferred to newChild.
   141  		parent.insertChildLocked(name, &newChild)
   142  		child = &newChild
   143  
   144  		// Drop the ref on newChild. This will cause the dentry to get pruned
   145  		// from the dentry tree by the end of current filesystem operation
   146  		// (before returning to the VFS layer) if another ref is not picked on
   147  		// this dentry.
   148  		if !childInode.Keep() {
   149  			fs.deferDecRef(&newChild)
   150  		}
   151  	}
   152  	return child, nil
   153  }
   154  
   155  // walkExistingLocked resolves rp to an existing file.
   156  //
   157  // walkExistingLocked is loosely analogous to Linux's
   158  // fs/namei.c:path_lookupat().
   159  //
   160  // Preconditions: Filesystem.mu must be locked for at least reading.
   161  //
   162  // Postconditions: Caller must call fs.processDeferredDecRefs*.
   163  func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) {
   164  	d := rp.Start().Impl().(*Dentry)
   165  	for !rp.Done() {
   166  		var err error
   167  		d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */)
   168  		if err != nil {
   169  			return nil, err
   170  		}
   171  	}
   172  	if rp.MustBeDir() && !d.isDir() {
   173  		return nil, syserror.ENOTDIR
   174  	}
   175  	return d, nil
   176  }
   177  
   178  // walkParentDirLocked resolves all but the last path component of rp to an
   179  // existing directory. It does not check that the returned directory is
   180  // searchable by the provider of rp.
   181  //
   182  // walkParentDirLocked is loosely analogous to Linux's
   183  // fs/namei.c:path_parentat().
   184  //
   185  // Preconditions:
   186  // * Filesystem.mu must be locked for at least reading.
   187  // * !rp.Done().
   188  //
   189  // Postconditions: Caller must call fs.processDeferredDecRefs*.
   190  func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) {
   191  	d := rp.Start().Impl().(*Dentry)
   192  	for !rp.Final() {
   193  		var err error
   194  		d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */)
   195  		if err != nil {
   196  			return nil, err
   197  		}
   198  	}
   199  	if !d.isDir() {
   200  		return nil, syserror.ENOTDIR
   201  	}
   202  	return d, nil
   203  }
   204  
   205  // checkCreateLocked checks that a file named rp.Component() may be created in
   206  // directory parent, then returns rp.Component().
   207  //
   208  // Preconditions:
   209  // * Filesystem.mu must be locked for at least reading.
   210  // * isDir(parentInode) == true.
   211  func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string, parent *Dentry) error {
   212  	// Order of checks is important. First check if parent directory can be
   213  	// executed, then check for existence, and lastly check if mount is writable.
   214  	if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayExec); err != nil {
   215  		return err
   216  	}
   217  	if name == "." || name == ".." {
   218  		return syserror.EEXIST
   219  	}
   220  	if len(name) > linux.NAME_MAX {
   221  		return linuxerr.ENAMETOOLONG
   222  	}
   223  	if _, ok := parent.children[name]; ok {
   224  		return syserror.EEXIST
   225  	}
   226  	if parent.VFSDentry().IsDead() {
   227  		return syserror.ENOENT
   228  	}
   229  	if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil {
   230  		return err
   231  	}
   232  	return nil
   233  }
   234  
   235  // checkDeleteLocked checks that the file represented by vfsd may be deleted.
   236  //
   237  // Preconditions: Filesystem.mu must be locked for at least reading.
   238  func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) error {
   239  	parent := d.parent
   240  	if parent == nil {
   241  		return linuxerr.EBUSY
   242  	}
   243  	if parent.vfsd.IsDead() {
   244  		return syserror.ENOENT
   245  	}
   246  	if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
   247  		return err
   248  	}
   249  	return nil
   250  }
   251  
   252  // Release implements vfs.FilesystemImpl.Release.
   253  func (fs *Filesystem) Release(ctx context.Context) {
   254  	root := fs.root
   255  	if root == nil {
   256  		return
   257  	}
   258  	fs.mu.Lock()
   259  	root.releaseKeptDentriesLocked(ctx)
   260  	for fs.cachedDentriesLen != 0 {
   261  		fs.evictCachedDentryLocked(ctx)
   262  	}
   263  	fs.mu.Unlock()
   264  	// Drop ref acquired in Dentry.InitRoot().
   265  	root.DecRef(ctx)
   266  }
   267  
   268  // releaseKeptDentriesLocked recursively drops all dentry references created by
   269  // Lookup when Dentry.inode.Keep() is true.
   270  //
   271  // Precondition: Filesystem.mu is held.
   272  func (d *Dentry) releaseKeptDentriesLocked(ctx context.Context) {
   273  	if d.inode.Keep() && d != d.fs.root {
   274  		d.decRefLocked(ctx)
   275  	}
   276  
   277  	if d.isDir() {
   278  		var children []*Dentry
   279  		d.dirMu.Lock()
   280  		for _, child := range d.children {
   281  			children = append(children, child)
   282  		}
   283  		d.dirMu.Unlock()
   284  		for _, child := range children {
   285  			child.releaseKeptDentriesLocked(ctx)
   286  		}
   287  	}
   288  }
   289  
   290  // Sync implements vfs.FilesystemImpl.Sync.
   291  func (fs *Filesystem) Sync(ctx context.Context) error {
   292  	// All filesystem state is in-memory.
   293  	return nil
   294  }
   295  
   296  // AccessAt implements vfs.Filesystem.Impl.AccessAt.
   297  func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
   298  	fs.mu.RLock()
   299  	defer fs.processDeferredDecRefs(ctx)
   300  	defer fs.mu.RUnlock()
   301  
   302  	d, err := fs.walkExistingLocked(ctx, rp)
   303  	if err != nil {
   304  		return err
   305  	}
   306  	return d.inode.CheckPermissions(ctx, creds, ats)
   307  }
   308  
   309  // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
   310  func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
   311  	fs.mu.RLock()
   312  	defer fs.processDeferredDecRefs(ctx)
   313  	defer fs.mu.RUnlock()
   314  	d, err := fs.walkExistingLocked(ctx, rp)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  
   319  	if opts.CheckSearchable {
   320  		if !d.isDir() {
   321  			return nil, syserror.ENOTDIR
   322  		}
   323  		if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
   324  			return nil, err
   325  		}
   326  	}
   327  	vfsd := d.VFSDentry()
   328  	vfsd.IncRef() // Ownership transferred to caller.
   329  	return vfsd, nil
   330  }
   331  
   332  // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
   333  func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
   334  	fs.mu.RLock()
   335  	defer fs.processDeferredDecRefs(ctx)
   336  	defer fs.mu.RUnlock()
   337  	d, err := fs.walkParentDirLocked(ctx, rp)
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  	d.IncRef() // Ownership transferred to caller.
   342  	return d.VFSDentry(), nil
   343  }
   344  
   345  // LinkAt implements vfs.FilesystemImpl.LinkAt.
   346  func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
   347  	if rp.Done() {
   348  		return syserror.EEXIST
   349  	}
   350  	fs.mu.Lock()
   351  	defer fs.processDeferredDecRefs(ctx)
   352  	defer fs.mu.Unlock()
   353  	parent, err := fs.walkParentDirLocked(ctx, rp)
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	parent.dirMu.Lock()
   359  	defer parent.dirMu.Unlock()
   360  	pc := rp.Component()
   361  	if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil {
   362  		return err
   363  	}
   364  	if rp.MustBeDir() {
   365  		return syserror.ENOENT
   366  	}
   367  	if rp.Mount() != vd.Mount() {
   368  		return linuxerr.EXDEV
   369  	}
   370  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   371  		return err
   372  	}
   373  	defer rp.Mount().EndWrite()
   374  
   375  	d := vd.Dentry().Impl().(*Dentry)
   376  	if d.isDir() {
   377  		return linuxerr.EPERM
   378  	}
   379  
   380  	childI, err := parent.inode.NewLink(ctx, pc, d.inode)
   381  	if err != nil {
   382  		return err
   383  	}
   384  	var child Dentry
   385  	child.Init(fs, childI)
   386  	parent.insertChildLocked(pc, &child)
   387  	return nil
   388  }
   389  
   390  // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
   391  func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
   392  	if rp.Done() {
   393  		return syserror.EEXIST
   394  	}
   395  	fs.mu.Lock()
   396  	defer fs.processDeferredDecRefs(ctx)
   397  	defer fs.mu.Unlock()
   398  	parent, err := fs.walkParentDirLocked(ctx, rp)
   399  	if err != nil {
   400  		return err
   401  	}
   402  
   403  	parent.dirMu.Lock()
   404  	defer parent.dirMu.Unlock()
   405  	pc := rp.Component()
   406  	if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil {
   407  		return err
   408  	}
   409  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   410  		return err
   411  	}
   412  	defer rp.Mount().EndWrite()
   413  	childI, err := parent.inode.NewDir(ctx, pc, opts)
   414  	if err != nil {
   415  		if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
   416  			return err
   417  		}
   418  		childI = newSyntheticDirectory(ctx, rp.Credentials(), opts.Mode)
   419  	}
   420  	var child Dentry
   421  	child.Init(fs, childI)
   422  	parent.insertChildLocked(pc, &child)
   423  	return nil
   424  }
   425  
   426  // MknodAt implements vfs.FilesystemImpl.MknodAt.
   427  func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
   428  	if rp.Done() {
   429  		return syserror.EEXIST
   430  	}
   431  	fs.mu.Lock()
   432  	defer fs.processDeferredDecRefs(ctx)
   433  	defer fs.mu.Unlock()
   434  	parent, err := fs.walkParentDirLocked(ctx, rp)
   435  	if err != nil {
   436  		return err
   437  	}
   438  
   439  	parent.dirMu.Lock()
   440  	defer parent.dirMu.Unlock()
   441  	pc := rp.Component()
   442  	if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil {
   443  		return err
   444  	}
   445  	if rp.MustBeDir() {
   446  		return syserror.ENOENT
   447  	}
   448  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   449  		return err
   450  	}
   451  	defer rp.Mount().EndWrite()
   452  	newI, err := parent.inode.NewNode(ctx, pc, opts)
   453  	if err != nil {
   454  		return err
   455  	}
   456  	var newD Dentry
   457  	newD.Init(fs, newI)
   458  	parent.insertChildLocked(pc, &newD)
   459  	return nil
   460  }
   461  
   462  // OpenAt implements vfs.FilesystemImpl.OpenAt.
   463  func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   464  	// Filter out flags that are not supported by kernfs. O_DIRECTORY and
   465  	// O_NOFOLLOW have no effect here (they're handled by VFS by setting
   466  	// appropriate bits in rp), but are returned by
   467  	// FileDescriptionImpl.StatusFlags().
   468  	opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC |
   469  		linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY
   470  	ats := vfs.AccessTypesForOpenFlags(&opts)
   471  
   472  	// Do not create new file.
   473  	if opts.Flags&linux.O_CREAT == 0 {
   474  		fs.mu.RLock()
   475  		defer fs.processDeferredDecRefs(ctx)
   476  		d, err := fs.walkExistingLocked(ctx, rp)
   477  		if err != nil {
   478  			fs.mu.RUnlock()
   479  			return nil, err
   480  		}
   481  		if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
   482  			fs.mu.RUnlock()
   483  			return nil, err
   484  		}
   485  		// Open may block so we need to unlock fs.mu. IncRef d to prevent
   486  		// its destruction while fs.mu is unlocked.
   487  		d.IncRef()
   488  		fs.mu.RUnlock()
   489  		fd, err := d.inode.Open(ctx, rp, d, opts)
   490  		d.DecRef(ctx)
   491  		return fd, err
   492  	}
   493  
   494  	// May create new file.
   495  	mustCreate := opts.Flags&linux.O_EXCL != 0
   496  	d := rp.Start().Impl().(*Dentry)
   497  	fs.mu.Lock()
   498  	unlocked := false
   499  	unlock := func() {
   500  		if !unlocked {
   501  			fs.mu.Unlock()
   502  			unlocked = true
   503  		}
   504  	}
   505  	// Process all to-be-decref'd dentries at the end at once.
   506  	// Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked
   507  	// when this is executed.
   508  	defer fs.processDeferredDecRefs(ctx)
   509  	defer unlock()
   510  	if rp.Done() {
   511  		if rp.MustBeDir() {
   512  			return nil, syserror.EISDIR
   513  		}
   514  		if mustCreate {
   515  			return nil, syserror.EEXIST
   516  		}
   517  		if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
   518  			return nil, err
   519  		}
   520  		// Open may block so we need to unlock fs.mu. IncRef d to prevent
   521  		// its destruction while fs.mu is unlocked.
   522  		d.IncRef()
   523  		unlock()
   524  		fd, err := d.inode.Open(ctx, rp, d, opts)
   525  		d.DecRef(ctx)
   526  		return fd, err
   527  	}
   528  afterTrailingSymlink:
   529  	parent, err := fs.walkParentDirLocked(ctx, rp)
   530  	if err != nil {
   531  		return nil, err
   532  	}
   533  	// Check for search permission in the parent directory.
   534  	if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil {
   535  		return nil, err
   536  	}
   537  	// Reject attempts to open directories with O_CREAT.
   538  	if rp.MustBeDir() {
   539  		return nil, syserror.EISDIR
   540  	}
   541  	pc := rp.Component()
   542  	if pc == "." || pc == ".." {
   543  		return nil, syserror.EISDIR
   544  	}
   545  	if len(pc) > linux.NAME_MAX {
   546  		return nil, linuxerr.ENAMETOOLONG
   547  	}
   548  	// Determine whether or not we need to create a file.
   549  	child, err := fs.stepExistingLocked(ctx, rp, parent, false /* mayFollowSymlinks */)
   550  	if linuxerr.Equals(linuxerr.ENOENT, err) {
   551  		// Already checked for searchability above; now check for writability.
   552  		if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
   553  			return nil, err
   554  		}
   555  		if err := rp.Mount().CheckBeginWrite(); err != nil {
   556  			return nil, err
   557  		}
   558  		defer rp.Mount().EndWrite()
   559  		// Create and open the child.
   560  		childI, err := parent.inode.NewFile(ctx, pc, opts)
   561  		if err != nil {
   562  			return nil, err
   563  		}
   564  		var child Dentry
   565  		child.Init(fs, childI)
   566  		parent.insertChild(pc, &child)
   567  		// Open may block so we need to unlock fs.mu. IncRef child to prevent
   568  		// its destruction while fs.mu is unlocked.
   569  		child.IncRef()
   570  		unlock()
   571  		fd, err := child.inode.Open(ctx, rp, &child, opts)
   572  		child.DecRef(ctx)
   573  		return fd, err
   574  	}
   575  	if err != nil {
   576  		return nil, err
   577  	}
   578  	// Open existing file or follow symlink.
   579  	if mustCreate {
   580  		return nil, syserror.EEXIST
   581  	}
   582  	if rp.ShouldFollowSymlink() && child.isSymlink() {
   583  		targetVD, targetPathname, err := child.inode.Getlink(ctx, rp.Mount())
   584  		if err != nil {
   585  			return nil, err
   586  		}
   587  		if targetVD.Ok() {
   588  			err := rp.HandleJump(targetVD)
   589  			fs.deferDecRefVD(ctx, targetVD)
   590  			if err != nil {
   591  				return nil, err
   592  			}
   593  		} else {
   594  			if err := rp.HandleSymlink(targetPathname); err != nil {
   595  				return nil, err
   596  			}
   597  		}
   598  		// rp.Final() may no longer be true since we now need to resolve the
   599  		// symlink target.
   600  		goto afterTrailingSymlink
   601  	}
   602  	if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
   603  		return nil, err
   604  	}
   605  	// Open may block so we need to unlock fs.mu. IncRef child to prevent
   606  	// its destruction while fs.mu is unlocked.
   607  	child.IncRef()
   608  	unlock()
   609  	fd, err := child.inode.Open(ctx, rp, child, opts)
   610  	child.DecRef(ctx)
   611  	return fd, err
   612  }
   613  
   614  // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
   615  func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
   616  	defer fs.processDeferredDecRefs(ctx)
   617  
   618  	fs.mu.RLock()
   619  	d, err := fs.walkExistingLocked(ctx, rp)
   620  	if err != nil {
   621  		fs.mu.RUnlock()
   622  		return "", err
   623  	}
   624  	if !d.isSymlink() {
   625  		fs.mu.RUnlock()
   626  		return "", linuxerr.EINVAL
   627  	}
   628  
   629  	// Inode.Readlink() cannot be called holding fs locks.
   630  	d.IncRef()
   631  	defer d.DecRef(ctx)
   632  	fs.mu.RUnlock()
   633  
   634  	return d.inode.Readlink(ctx, rp.Mount())
   635  }
   636  
   637  // RenameAt implements vfs.FilesystemImpl.RenameAt.
   638  func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
   639  	fs.mu.Lock()
   640  	defer fs.processDeferredDecRefs(ctx)
   641  	defer fs.mu.Unlock()
   642  
   643  	// Resolve the destination directory first to verify that it's on this
   644  	// Mount.
   645  	dstDir, err := fs.walkParentDirLocked(ctx, rp)
   646  	if err != nil {
   647  		return err
   648  	}
   649  
   650  	// Only RENAME_NOREPLACE is supported.
   651  	if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
   652  		return linuxerr.EINVAL
   653  	}
   654  	noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
   655  
   656  	mnt := rp.Mount()
   657  	if mnt != oldParentVD.Mount() {
   658  		return linuxerr.EXDEV
   659  	}
   660  	if err := mnt.CheckBeginWrite(); err != nil {
   661  		return err
   662  	}
   663  	defer mnt.EndWrite()
   664  
   665  	srcDirVFSD := oldParentVD.Dentry()
   666  	srcDir := srcDirVFSD.Impl().(*Dentry)
   667  	srcDir.dirMu.Lock()
   668  	src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDir.children[oldName])
   669  	srcDir.dirMu.Unlock()
   670  	if err != nil {
   671  		return err
   672  	}
   673  
   674  	// Can we remove the src dentry?
   675  	if err := checkDeleteLocked(ctx, rp, src); err != nil {
   676  		return err
   677  	}
   678  
   679  	// Can we create the dst dentry?
   680  	var dst *Dentry
   681  	newName := rp.Component()
   682  	if newName == "." || newName == ".." {
   683  		if noReplace {
   684  			return syserror.EEXIST
   685  		}
   686  		return linuxerr.EBUSY
   687  	}
   688  
   689  	err = checkCreateLocked(ctx, rp.Credentials(), newName, dstDir)
   690  	switch {
   691  	case err == nil:
   692  		// Ok, continue with rename as replacement.
   693  	case linuxerr.Equals(linuxerr.EEXIST, err):
   694  		if noReplace {
   695  			// Won't overwrite existing node since RENAME_NOREPLACE was requested.
   696  			return syserror.EEXIST
   697  		}
   698  		dst = dstDir.children[newName]
   699  		if dst == nil {
   700  			panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", newName, dstDir))
   701  		}
   702  	default:
   703  		return err
   704  	}
   705  
   706  	if srcDir == dstDir && oldName == newName {
   707  		return nil
   708  	}
   709  
   710  	var dstVFSD *vfs.Dentry
   711  	if dst != nil {
   712  		dstVFSD = dst.VFSDentry()
   713  	}
   714  
   715  	mntns := vfs.MountNamespaceFromContext(ctx)
   716  	defer mntns.DecRef(ctx)
   717  	virtfs := rp.VirtualFilesystem()
   718  
   719  	// We can't deadlock here due to lock ordering because we're protected from
   720  	// concurrent renames by fs.mu held for writing.
   721  	srcDir.dirMu.Lock()
   722  	defer srcDir.dirMu.Unlock()
   723  	if srcDir != dstDir {
   724  		dstDir.dirMu.Lock()
   725  		defer dstDir.dirMu.Unlock()
   726  	}
   727  
   728  	srcVFSD := src.VFSDentry()
   729  	if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil {
   730  		return err
   731  	}
   732  	err = srcDir.inode.Rename(ctx, src.name, newName, src.inode, dstDir.inode)
   733  	if err != nil {
   734  		virtfs.AbortRenameDentry(srcVFSD, dstVFSD)
   735  		return err
   736  	}
   737  	delete(srcDir.children, src.name)
   738  	if srcDir != dstDir {
   739  		fs.deferDecRef(srcDir) // child (src) drops ref on old parent.
   740  		dstDir.IncRef()        // child (src) takes a ref on the new parent.
   741  	}
   742  	src.parent = dstDir
   743  	src.name = newName
   744  	if dstDir.children == nil {
   745  		dstDir.children = make(map[string]*Dentry)
   746  	}
   747  	replaced := dstDir.children[newName]
   748  	dstDir.children[newName] = src
   749  	var replaceVFSD *vfs.Dentry
   750  	if replaced != nil {
   751  		// deferDecRef so that fs.mu and dstDir.mu are unlocked by then.
   752  		fs.deferDecRef(replaced)
   753  		replaceVFSD = replaced.VFSDentry()
   754  	}
   755  	virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay.
   756  	return nil
   757  }
   758  
   759  // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
   760  func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
   761  	fs.mu.Lock()
   762  	defer fs.processDeferredDecRefs(ctx)
   763  	defer fs.mu.Unlock()
   764  
   765  	d, err := fs.walkExistingLocked(ctx, rp)
   766  	if err != nil {
   767  		return err
   768  	}
   769  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   770  		return err
   771  	}
   772  	defer rp.Mount().EndWrite()
   773  	if err := checkDeleteLocked(ctx, rp, d); err != nil {
   774  		return err
   775  	}
   776  	if !d.isDir() {
   777  		return syserror.ENOTDIR
   778  	}
   779  	if d.inode.HasChildren() {
   780  		return linuxerr.ENOTEMPTY
   781  	}
   782  	virtfs := rp.VirtualFilesystem()
   783  	parentDentry := d.parent
   784  	parentDentry.dirMu.Lock()
   785  	defer parentDentry.dirMu.Unlock()
   786  
   787  	mntns := vfs.MountNamespaceFromContext(ctx)
   788  	defer mntns.DecRef(ctx)
   789  	vfsd := d.VFSDentry()
   790  	if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
   791  		return err // +checklocksforce: vfsd is not locked.
   792  	}
   793  
   794  	if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
   795  		virtfs.AbortDeleteDentry(vfsd)
   796  		return err
   797  	}
   798  	delete(parentDentry.children, d.name)
   799  	// Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
   800  	fs.deferDecRef(d)
   801  	virtfs.CommitDeleteDentry(ctx, vfsd)
   802  	return nil
   803  }
   804  
   805  // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
   806  func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
   807  	fs.mu.RLock()
   808  	defer fs.processDeferredDecRefs(ctx)
   809  	defer fs.mu.RUnlock()
   810  	d, err := fs.walkExistingLocked(ctx, rp)
   811  	if err != nil {
   812  		return err
   813  	}
   814  	if opts.Stat.Mask == 0 {
   815  		return nil
   816  	}
   817  	return d.inode.SetStat(ctx, fs.VFSFilesystem(), rp.Credentials(), opts)
   818  }
   819  
   820  // StatAt implements vfs.FilesystemImpl.StatAt.
   821  func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
   822  	fs.mu.RLock()
   823  	defer fs.processDeferredDecRefs(ctx)
   824  	defer fs.mu.RUnlock()
   825  	d, err := fs.walkExistingLocked(ctx, rp)
   826  	if err != nil {
   827  		return linux.Statx{}, err
   828  	}
   829  	return d.inode.Stat(ctx, fs.VFSFilesystem(), opts)
   830  }
   831  
   832  // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
   833  func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
   834  	fs.mu.RLock()
   835  	defer fs.processDeferredDecRefs(ctx)
   836  	defer fs.mu.RUnlock()
   837  	d, err := fs.walkExistingLocked(ctx, rp)
   838  	if err != nil {
   839  		return linux.Statfs{}, err
   840  	}
   841  	return d.inode.StatFS(ctx, fs.VFSFilesystem())
   842  }
   843  
   844  // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
   845  func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
   846  	if rp.Done() {
   847  		return syserror.EEXIST
   848  	}
   849  	fs.mu.Lock()
   850  	defer fs.processDeferredDecRefs(ctx)
   851  	defer fs.mu.Unlock()
   852  	parent, err := fs.walkParentDirLocked(ctx, rp)
   853  	if err != nil {
   854  		return err
   855  	}
   856  	parent.dirMu.Lock()
   857  	defer parent.dirMu.Unlock()
   858  
   859  	pc := rp.Component()
   860  	if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil {
   861  		return err
   862  	}
   863  	if rp.MustBeDir() {
   864  		return syserror.ENOENT
   865  	}
   866  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   867  		return err
   868  	}
   869  	defer rp.Mount().EndWrite()
   870  	childI, err := parent.inode.NewSymlink(ctx, pc, target)
   871  	if err != nil {
   872  		return err
   873  	}
   874  	var child Dentry
   875  	child.Init(fs, childI)
   876  	parent.insertChildLocked(pc, &child)
   877  	return nil
   878  }
   879  
   880  // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
   881  func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
   882  	fs.mu.Lock()
   883  	defer fs.processDeferredDecRefs(ctx)
   884  	defer fs.mu.Unlock()
   885  
   886  	d, err := fs.walkExistingLocked(ctx, rp)
   887  	if err != nil {
   888  		return err
   889  	}
   890  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   891  		return err
   892  	}
   893  	defer rp.Mount().EndWrite()
   894  	if err := checkDeleteLocked(ctx, rp, d); err != nil {
   895  		return err
   896  	}
   897  	if d.isDir() {
   898  		return syserror.EISDIR
   899  	}
   900  	virtfs := rp.VirtualFilesystem()
   901  	parentDentry := d.parent
   902  	parentDentry.dirMu.Lock()
   903  	defer parentDentry.dirMu.Unlock()
   904  	mntns := vfs.MountNamespaceFromContext(ctx)
   905  	defer mntns.DecRef(ctx)
   906  	vfsd := d.VFSDentry()
   907  	if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
   908  		return err
   909  	}
   910  	if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil {
   911  		virtfs.AbortDeleteDentry(vfsd)
   912  		return err
   913  	}
   914  	delete(parentDentry.children, d.name)
   915  	// Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
   916  	fs.deferDecRef(d)
   917  	virtfs.CommitDeleteDentry(ctx, vfsd)
   918  	return nil
   919  }
   920  
   921  // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
   922  func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
   923  	fs.mu.RLock()
   924  	defer fs.processDeferredDecRefs(ctx)
   925  	defer fs.mu.RUnlock()
   926  	d, err := fs.walkExistingLocked(ctx, rp)
   927  	if err != nil {
   928  		return nil, err
   929  	}
   930  	if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil {
   931  		return nil, err
   932  	}
   933  	return nil, linuxerr.ECONNREFUSED
   934  }
   935  
   936  // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
   937  func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
   938  	fs.mu.RLock()
   939  	defer fs.processDeferredDecRefs(ctx)
   940  	defer fs.mu.RUnlock()
   941  	_, err := fs.walkExistingLocked(ctx, rp)
   942  	if err != nil {
   943  		return nil, err
   944  	}
   945  	// kernfs currently does not support extended attributes.
   946  	return nil, linuxerr.ENOTSUP
   947  }
   948  
   949  // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
   950  func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
   951  	fs.mu.RLock()
   952  	defer fs.processDeferredDecRefs(ctx)
   953  	defer fs.mu.RUnlock()
   954  	_, err := fs.walkExistingLocked(ctx, rp)
   955  	if err != nil {
   956  		return "", err
   957  	}
   958  	// kernfs currently does not support extended attributes.
   959  	return "", linuxerr.ENOTSUP
   960  }
   961  
   962  // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
   963  func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
   964  	fs.mu.RLock()
   965  	defer fs.processDeferredDecRefs(ctx)
   966  	defer fs.mu.RUnlock()
   967  	_, err := fs.walkExistingLocked(ctx, rp)
   968  	if err != nil {
   969  		return err
   970  	}
   971  	// kernfs currently does not support extended attributes.
   972  	return linuxerr.ENOTSUP
   973  }
   974  
   975  // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
   976  func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
   977  	fs.mu.RLock()
   978  	defer fs.processDeferredDecRefs(ctx)
   979  	defer fs.mu.RUnlock()
   980  	_, err := fs.walkExistingLocked(ctx, rp)
   981  	if err != nil {
   982  		return err
   983  	}
   984  	// kernfs currently does not support extended attributes.
   985  	return linuxerr.ENOTSUP
   986  }
   987  
   988  // PrependPath implements vfs.FilesystemImpl.PrependPath.
   989  func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
   990  	fs.mu.RLock()
   991  	defer fs.mu.RUnlock()
   992  	return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b)
   993  }
   994  
   995  func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) {
   996  	if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs {
   997  		// The following is equivalent to vd.DecRef(ctx). This is needed
   998  		// because if d belongs to this filesystem, we can not DecRef it right
   999  		// away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we
  1000  		// defer the DecRef to when locks are dropped.
  1001  		vd.Mount().DecRef(ctx)
  1002  		fs.deferDecRef(d)
  1003  	} else {
  1004  		vd.DecRef(ctx)
  1005  	}
  1006  }