github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/fsimpl/gofer/filesystem.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"strings"
    21  	"sync"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    25  	"github.com/MerlinKodo/gvisor/pkg/atomicbitops"
    26  	"github.com/MerlinKodo/gvisor/pkg/context"
    27  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    28  	"github.com/MerlinKodo/gvisor/pkg/fspath"
    29  	"github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/host"
    30  	"github.com/MerlinKodo/gvisor/pkg/sentry/fsmetric"
    31  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel"
    32  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    33  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/pipe"
    34  	"github.com/MerlinKodo/gvisor/pkg/sentry/socket/unix/transport"
    35  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    36  )
    37  
    38  // Sync implements vfs.FilesystemImpl.Sync.
    39  func (fs *filesystem) Sync(ctx context.Context) error {
    40  	// Snapshot current syncable dentries and special file FDs.
    41  	fs.syncMu.Lock()
    42  	ds := make([]*dentry, 0, fs.syncableDentries.Len())
    43  	for elem := fs.syncableDentries.Front(); elem != nil; elem = elem.Next() {
    44  		ds = append(ds, elem.d)
    45  	}
    46  	sffds := make([]*specialFileFD, 0, fs.specialFileFDs.Len())
    47  	for sffd := fs.specialFileFDs.Front(); sffd != nil; sffd = sffd.Next() {
    48  		sffds = append(sffds, sffd)
    49  	}
    50  	fs.syncMu.Unlock()
    51  
    52  	// Return the first error we encounter, but sync everything we can
    53  	// regardless.
    54  	var retErr error
    55  
    56  	// Note that lisafs is capable of batching FSync RPCs. However, we can not
    57  	// batch all the FDIDs to be synced from ds and sffds. Because the error
    58  	// handling varies based on file type. FSync errors are only considered for
    59  	// regular file FDIDs that were opened for writing. We could do individual
    60  	// RPCs for such FDIDs and batch the rest, but it increases code complexity
    61  	// substantially. We could implement it in the future if need be.
    62  
    63  	// Sync syncable dentries.
    64  	for _, d := range ds {
    65  		if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil {
    66  			ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err)
    67  			if retErr == nil {
    68  				retErr = err
    69  			}
    70  		}
    71  	}
    72  
    73  	// Sync special files, which may be writable but do not use dentry shared
    74  	// handles (so they won't be synced by the above).
    75  	for _, sffd := range sffds {
    76  		if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil {
    77  			ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err)
    78  			if retErr == nil {
    79  				retErr = err
    80  			}
    81  		}
    82  	}
    83  
    84  	return retErr
    85  }
    86  
    87  // MaxFilenameLen is the maximum length of a filename. This is dictated by 9P's
    88  // encoding of strings, which uses 2 bytes for the length prefix.
    89  const MaxFilenameLen = (1 << 16) - 1
    90  
    91  // dentrySlicePool is a pool of *[]*dentry used to store dentries for which
    92  // dentry.checkCachingLocked() must be called. The pool holds pointers to
    93  // slices because Go lacks generics, so sync.Pool operates on any, so
    94  // every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy
    95  // of the slice header on the heap.
    96  var dentrySlicePool = sync.Pool{
    97  	New: func() any {
    98  		ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
    99  		return &ds
   100  	},
   101  }
   102  
   103  func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
   104  	if ds == nil {
   105  		ds = dentrySlicePool.Get().(*[]*dentry)
   106  	}
   107  	*ds = append(*ds, d)
   108  	return ds
   109  }
   110  
   111  // Precondition: !parent.isSynthetic() && !child.isSynthetic().
   112  func appendNewChildDentry(ds **[]*dentry, parent *dentry, child *dentry) {
   113  	// The new child was added to parent and took a ref on the parent (hence
   114  	// parent can be removed from cache). A new child has 0 refs for now. So
   115  	// checkCachingLocked() should be called on both. Call it first on the parent
   116  	// as it may create space in the cache for child to be inserted - hence
   117  	// avoiding a cache eviction.
   118  	*ds = appendDentry(*ds, parent)
   119  	*ds = appendDentry(*ds, child)
   120  }
   121  
   122  // Preconditions: ds != nil.
   123  func putDentrySlice(ds *[]*dentry) {
   124  	// Allow dentries to be GC'd.
   125  	for i := range *ds {
   126  		(*ds)[i] = nil
   127  	}
   128  	*ds = (*ds)[:0]
   129  	dentrySlicePool.Put(ds)
   130  }
   131  
   132  // renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
   133  // dentry.checkCachingLocked on all dentries in *dsp with fs.renameMu locked
   134  // for writing.
   135  //
   136  // dsp is a pointer-to-pointer since defer evaluates its arguments immediately,
   137  // but dentry slices are allocated lazily, and it's much easier to say "defer
   138  // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
   139  // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
   140  // +checklocksreleaseread:fs.renameMu
   141  func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
   142  	fs.renameMu.RUnlock()
   143  	if *dsp == nil {
   144  		return
   145  	}
   146  	ds := **dsp
   147  	for _, d := range ds {
   148  		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
   149  	}
   150  	putDentrySlice(*dsp)
   151  }
   152  
   153  // +checklocksrelease:fs.renameMu
   154  func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
   155  	if *ds == nil {
   156  		fs.renameMu.Unlock()
   157  		return
   158  	}
   159  	for _, d := range **ds {
   160  		d.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
   161  	}
   162  	fs.renameMu.Unlock()
   163  	putDentrySlice(*ds)
   164  }
   165  
   166  // stepLocked resolves rp.Component() to an existing file, starting from the
   167  // given directory.
   168  //
   169  // Dentries which may become cached as a result of the traversal are appended
   170  // to *ds.
   171  //
   172  // Preconditions:
   173  //   - fs.renameMu must be locked.
   174  //   - d.opMu must be locked for reading.
   175  //   - !rp.Done().
   176  //   - If !d.cachedMetadataAuthoritative(), then d and all children that are
   177  //     part of rp must have been revalidated.
   178  //
   179  // +checklocksread:d.opMu
   180  func (fs *filesystem) stepLocked(ctx context.Context, rp resolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) {
   181  	if !d.isDir() {
   182  		return nil, false, linuxerr.ENOTDIR
   183  	}
   184  	if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   185  		return nil, false, err
   186  	}
   187  	name := rp.Component()
   188  	if name == "." {
   189  		rp.Advance()
   190  		return d, false, nil
   191  	}
   192  	if name == ".." {
   193  		if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
   194  			return nil, false, err
   195  		} else if isRoot || d.parent == nil {
   196  			rp.Advance()
   197  			return d, false, nil
   198  		}
   199  		if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
   200  			return nil, false, err
   201  		}
   202  		rp.Advance()
   203  		return d.parent, false, nil
   204  	}
   205  	child, err := fs.getChildAndWalkPathLocked(ctx, d, rp, ds)
   206  	if err != nil {
   207  		return nil, false, err
   208  	}
   209  	if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
   210  		return nil, false, err
   211  	}
   212  	if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
   213  		target, err := child.readlink(ctx, rp.Mount())
   214  		if err != nil {
   215  			return nil, false, err
   216  		}
   217  		followedSymlink, err := rp.HandleSymlink(target)
   218  		return d, followedSymlink, err
   219  	}
   220  	rp.Advance()
   221  	return child, false, nil
   222  }
   223  
   224  // getChildLocked returns a dentry representing the child of parent with the
   225  // given name. Returns ENOENT if the child doesn't exist.
   226  //
   227  // Preconditions:
   228  //   - fs.renameMu must be locked.
   229  //   - parent.opMu must be locked.
   230  //   - parent.isDir().
   231  //   - name is not "." or "..".
   232  //   - parent and the dentry at name have been revalidated.
   233  //
   234  // +checklocks:parent.opMu
   235  func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   236  	if child, err := parent.getCachedChildLocked(name); child != nil || err != nil {
   237  		return child, err
   238  	}
   239  	// We don't need to check for race here because parent.opMu is held for
   240  	// writing.
   241  	return fs.getRemoteChildLocked(ctx, parent, name, false /* checkForRace */, ds)
   242  }
   243  
   244  // getRemoteChildLocked is similar to getChildLocked, with the additional
   245  // precondition that the child identified by name does not exist in cache.
   246  //
   247  // If checkForRace argument is true, then this method will check to see if the
   248  // call has raced with another getRemoteChild call, and will handle the race if
   249  // so.
   250  //
   251  // Preconditions:
   252  //   - If checkForRace is false, then parent.opMu must be held for writing.
   253  //   - Otherwise, parent.opMu must be held for reading.
   254  //
   255  // Postcondition: The returned dentry is already cached appropriately.
   256  //
   257  // +checklocksread:parent.opMu
   258  func (fs *filesystem) getRemoteChildLocked(ctx context.Context, parent *dentry, name string, checkForRace bool, ds **[]*dentry) (*dentry, error) {
   259  	child, err := parent.getRemoteChild(ctx, name)
   260  	// Cache the result appropriately in the dentry tree.
   261  	if err != nil {
   262  		if linuxerr.Equals(linuxerr.ENOENT, err) {
   263  			parent.childrenMu.Lock()
   264  			defer parent.childrenMu.Unlock()
   265  			parent.cacheNegativeLookupLocked(name)
   266  		}
   267  		return nil, err
   268  	}
   269  
   270  	parent.childrenMu.Lock()
   271  	defer parent.childrenMu.Unlock()
   272  
   273  	if checkForRace {
   274  		// See if we raced with anoter getRemoteChild call that added
   275  		// to the cache.
   276  		if cachedChild, ok := parent.children[name]; ok && cachedChild != nil {
   277  			// We raced. Destroy our child and return the cached
   278  			// one. This child has no handles, no data, and has not
   279  			// been cached, so destruction is quick and painless.
   280  			child.destroyDisconnected(ctx)
   281  
   282  			// All good. Return the cached child.
   283  			return cachedChild, nil
   284  		}
   285  		// No race, continue with the child we got.
   286  	}
   287  	parent.cacheNewChildLocked(child, name)
   288  	appendNewChildDentry(ds, parent, child)
   289  	return child, nil
   290  }
   291  
   292  // getChildAndWalkPathLocked is the same as getChildLocked, except that it
   293  // may prefetch the entire path represented by rp.
   294  //
   295  // +checklocksread:parent.opMu
   296  func (fs *filesystem) getChildAndWalkPathLocked(ctx context.Context, parent *dentry, rp resolvingPath, ds **[]*dentry) (*dentry, error) {
   297  	if child, err := parent.getCachedChildLocked(rp.Component()); child != nil || err != nil {
   298  		return child, err
   299  	}
   300  	// dentry.getRemoteChildAndWalkPathLocked already handles dentry caching.
   301  	return parent.getRemoteChildAndWalkPathLocked(ctx, rp, ds)
   302  }
   303  
   304  // getCachedChildLocked returns a child dentry if it was cached earlier. If no
   305  // cached child dentry exists, (nil, nil) is returned.
   306  //
   307  // Preconditions:
   308  //   - fs.renameMu must be locked.
   309  //   - d.opMu must be locked for reading.
   310  //   - d.isDir().
   311  //   - name is not "." or "..".
   312  //   - d and the dentry at name have been revalidated.
   313  //
   314  // +checklocksread:d.opMu
   315  func (d *dentry) getCachedChildLocked(name string) (*dentry, error) {
   316  	if len(name) > MaxFilenameLen {
   317  		return nil, linuxerr.ENAMETOOLONG
   318  	}
   319  	d.childrenMu.Lock()
   320  	defer d.childrenMu.Unlock()
   321  	if child, ok := d.children[name]; ok || d.isSynthetic() {
   322  		if child == nil {
   323  			return nil, linuxerr.ENOENT
   324  		}
   325  		return child, nil
   326  	}
   327  
   328  	if d.childrenSet != nil {
   329  		// Is the child even there? Don't make RPC if not.
   330  		if _, ok := d.childrenSet[name]; !ok {
   331  			return nil, linuxerr.ENOENT
   332  		}
   333  	}
   334  	return nil, nil
   335  }
   336  
   337  // walkParentDirLocked resolves all but the last path component of rp to an
   338  // existing directory, starting from the given directory (which is usually
   339  // rp.Start().Impl().(*dentry)). It does not check that the returned directory
   340  // is searchable by the provider of rp.
   341  //
   342  // Preconditions:
   343  //   - fs.renameMu must be locked.
   344  //   - !rp.Done().
   345  //   - If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up
   346  //     to date.
   347  func (fs *filesystem) walkParentDirLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
   348  	rp := resolvingPathParent(vfsRP)
   349  	if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   350  		return nil, err
   351  	}
   352  	for !rp.done() {
   353  		d.opMu.RLock()
   354  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   355  		d.opMu.RUnlock()
   356  		if err != nil {
   357  			return nil, err
   358  		}
   359  		d = next
   360  		if followedSymlink {
   361  			if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   362  				return nil, err
   363  			}
   364  		}
   365  	}
   366  	if !d.isDir() {
   367  		return nil, linuxerr.ENOTDIR
   368  	}
   369  	return d, nil
   370  }
   371  
   372  // resolveLocked resolves rp to an existing file.
   373  //
   374  // Preconditions: fs.renameMu must be locked.
   375  func (fs *filesystem) resolveLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
   376  	rp := resolvingPathFull(vfsRP)
   377  	d := rp.Start().Impl().(*dentry)
   378  	if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   379  		return nil, err
   380  	}
   381  	for !rp.done() {
   382  		d.opMu.RLock()
   383  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   384  		d.opMu.RUnlock()
   385  		if err != nil {
   386  			return nil, err
   387  		}
   388  		d = next
   389  		if followedSymlink {
   390  			if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   391  				return nil, err
   392  			}
   393  		}
   394  	}
   395  	if rp.MustBeDir() && !d.isDir() {
   396  		return nil, linuxerr.ENOTDIR
   397  	}
   398  	return d, nil
   399  }
   400  
   401  // doCreateAt checks that creating a file at rp is permitted, then invokes
   402  // createInRemoteDir (if the parent directory is a real remote directory) or
   403  // createInSyntheticDir (if the parent directory is synthetic) to do so.
   404  //
   405  // Preconditions:
   406  //   - !rp.Done().
   407  //   - For the final path component in rp, !rp.ShouldFollowSymlink().
   408  func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) (*dentry, error), createInSyntheticDir func(parent *dentry, name string) (*dentry, error)) error {
   409  	var ds *[]*dentry
   410  	fs.renameMu.RLock()
   411  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   412  	start := rp.Start().Impl().(*dentry)
   413  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   414  	if err != nil {
   415  		return err
   416  	}
   417  
   418  	// Order of checks is important. First check if parent directory can be
   419  	// executed, then check for existence, and lastly check if mount is writable.
   420  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   421  		return err
   422  	}
   423  	name := rp.Component()
   424  	if name == "." || name == ".." {
   425  		return linuxerr.EEXIST
   426  	}
   427  	if parent.isDeleted() {
   428  		return linuxerr.ENOENT
   429  	}
   430  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil {
   431  		return err
   432  	}
   433  
   434  	parent.opMu.Lock()
   435  	defer parent.opMu.Unlock()
   436  
   437  	if len(name) > MaxFilenameLen {
   438  		return linuxerr.ENAMETOOLONG
   439  	}
   440  	// Check for existence only if caching information is available. Otherwise,
   441  	// don't check for existence just yet. We will check for existence if the
   442  	// checks for writability fail below. Existence check is done by the creation
   443  	// RPCs themselves.
   444  	parent.childrenMu.Lock()
   445  	if child, ok := parent.children[name]; ok && child != nil {
   446  		parent.childrenMu.Unlock()
   447  		return linuxerr.EEXIST
   448  	}
   449  	if parent.childrenSet != nil {
   450  		if _, ok := parent.childrenSet[name]; ok {
   451  			parent.childrenMu.Unlock()
   452  			return linuxerr.EEXIST
   453  		}
   454  	}
   455  	parent.childrenMu.Unlock()
   456  	checkExistence := func() error {
   457  		if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
   458  			return err
   459  		} else if child != nil {
   460  			return linuxerr.EEXIST
   461  		}
   462  		return nil
   463  	}
   464  
   465  	mnt := rp.Mount()
   466  	if err := mnt.CheckBeginWrite(); err != nil {
   467  		// Existence check takes precedence.
   468  		if existenceErr := checkExistence(); existenceErr != nil {
   469  			return existenceErr
   470  		}
   471  		return err
   472  	}
   473  	defer mnt.EndWrite()
   474  
   475  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
   476  		// Existence check takes precedence.
   477  		if existenceErr := checkExistence(); existenceErr != nil {
   478  			return existenceErr
   479  		}
   480  		return err
   481  	}
   482  	if !dir && rp.MustBeDir() {
   483  		return linuxerr.ENOENT
   484  	}
   485  	if parent.isSynthetic() {
   486  		if createInSyntheticDir == nil {
   487  			return linuxerr.EPERM
   488  		}
   489  		child, err := createInSyntheticDir(parent, name)
   490  		if err != nil {
   491  			return err
   492  		}
   493  		parent.childrenMu.Lock()
   494  		parent.cacheNewChildLocked(child, name)
   495  		parent.syntheticChildren++
   496  		parent.clearDirentsLocked()
   497  		parent.childrenMu.Unlock()
   498  		parent.touchCMtime()
   499  		ev := linux.IN_CREATE
   500  		if dir {
   501  			ev |= linux.IN_ISDIR
   502  		}
   503  		parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   504  		return nil
   505  	}
   506  	// No cached dentry exists; however, in InteropModeShared there might still be
   507  	// an existing file at name. Just attempt the file creation RPC anyways. If a
   508  	// file does exist, the RPC will fail with EEXIST like we would have.
   509  	child, err := createInRemoteDir(parent, name, &ds)
   510  	if err != nil {
   511  		return err
   512  	}
   513  	parent.childrenMu.Lock()
   514  	parent.cacheNewChildLocked(child, name)
   515  	if child.isSynthetic() {
   516  		parent.syntheticChildren++
   517  		ds = appendDentry(ds, parent)
   518  	} else {
   519  		appendNewChildDentry(&ds, parent, child)
   520  	}
   521  	if fs.opts.interop != InteropModeShared {
   522  		if child, ok := parent.children[name]; ok && child == nil {
   523  			// Delete the now-stale negative dentry.
   524  			delete(parent.children, name)
   525  			parent.negativeChildren--
   526  		}
   527  		parent.clearDirentsLocked()
   528  		parent.touchCMtime()
   529  	}
   530  	parent.childrenMu.Unlock()
   531  	ev := linux.IN_CREATE
   532  	if dir {
   533  		ev |= linux.IN_ISDIR
   534  	}
   535  	parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   536  	return nil
   537  }
   538  
   539  // Preconditions: !rp.Done().
   540  func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error {
   541  	var ds *[]*dentry
   542  	fs.renameMu.RLock()
   543  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   544  	start := rp.Start().Impl().(*dentry)
   545  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   546  	if err != nil {
   547  		return err
   548  	}
   549  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
   550  		return err
   551  	}
   552  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   553  		return err
   554  	}
   555  	defer rp.Mount().EndWrite()
   556  
   557  	name := rp.Component()
   558  	if dir {
   559  		if name == "." {
   560  			return linuxerr.EINVAL
   561  		}
   562  		if name == ".." {
   563  			return linuxerr.ENOTEMPTY
   564  		}
   565  	} else {
   566  		if name == "." || name == ".." {
   567  			return linuxerr.EISDIR
   568  		}
   569  	}
   570  
   571  	vfsObj := rp.VirtualFilesystem()
   572  	if err := fs.revalidateOne(ctx, vfsObj, parent, rp.Component(), &ds); err != nil {
   573  		return err
   574  	}
   575  
   576  	mntns := vfs.MountNamespaceFromContext(ctx)
   577  	defer mntns.DecRef(ctx)
   578  
   579  	parent.opMu.Lock()
   580  	defer parent.opMu.Unlock()
   581  
   582  	parent.childrenMu.Lock()
   583  	if parent.childrenSet != nil {
   584  		if _, ok := parent.childrenSet[name]; !ok {
   585  			parent.childrenMu.Unlock()
   586  			return linuxerr.ENOENT
   587  		}
   588  	}
   589  	parent.childrenMu.Unlock()
   590  
   591  	// Load child if sticky bit is set because we need to determine whether
   592  	// deletion is allowed.
   593  	var child *dentry
   594  	if parent.mode.Load()&linux.ModeSticky == 0 {
   595  		var ok bool
   596  		parent.childrenMu.Lock()
   597  		child, ok = parent.children[name]
   598  		parent.childrenMu.Unlock()
   599  		if ok && child == nil {
   600  			// Hit a negative cached entry, child doesn't exist.
   601  			return linuxerr.ENOENT
   602  		}
   603  	} else {
   604  		child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds)
   605  		if err != nil {
   606  			return err
   607  		}
   608  		if err := parent.mayDelete(rp.Credentials(), child); err != nil {
   609  			return err
   610  		}
   611  	}
   612  
   613  	// If a child dentry exists, prepare to delete it. This should fail if it is
   614  	// a mount point. We detect mount points by speculatively calling
   615  	// PrepareDeleteDentry, which fails if child is a mount point.
   616  	//
   617  	// Also note that if child is nil, then it can't be a mount point.
   618  	if child != nil {
   619  		// Hold child.childrenMu so we can check child.children and
   620  		// child.syntheticChildren. We don't access these fields until a bit later,
   621  		// but locking child.childrenMu after calling vfs.PrepareDeleteDentry() would
   622  		// create an inconsistent lock ordering between dentry.childrenMu and
   623  		// vfs.Dentry.mu (in the VFS lock order, it would make dentry.childrenMu both "a
   624  		// FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between
   625  		// PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock
   626  		// child.childrenMu before calling PrepareDeleteDentry.
   627  		child.childrenMu.Lock()
   628  		defer child.childrenMu.Unlock()
   629  		if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
   630  			return err
   631  		}
   632  	}
   633  	flags := uint32(0)
   634  	// If a dentry exists, use it for best-effort checks on its deletability.
   635  	if dir {
   636  		if child != nil {
   637  			// child must be an empty directory.
   638  			if child.syntheticChildren != 0 { // +checklocksforce: child.childrenMu is held if child != nil.
   639  				// This is definitely not an empty directory, irrespective of
   640  				// fs.opts.interop.
   641  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
   642  				return linuxerr.ENOTEMPTY
   643  			}
   644  			// If InteropModeShared is in effect and the first call to
   645  			// PrepareDeleteDentry above succeeded, then child wasn't
   646  			// revalidated (so we can't expect its file type to be correct) and
   647  			// individually revalidating its children (to confirm that they
   648  			// still exist) would be a waste of time.
   649  			if child.cachedMetadataAuthoritative() {
   650  				if !child.isDir() {
   651  					vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   652  					return linuxerr.ENOTDIR
   653  				}
   654  				for _, grandchild := range child.children { // +checklocksforce: child.childrenMu is held if child != nil.
   655  					if grandchild != nil {
   656  						vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   657  						return linuxerr.ENOTEMPTY
   658  					}
   659  				}
   660  			}
   661  		}
   662  		flags = linux.AT_REMOVEDIR
   663  	} else {
   664  		// child must be a non-directory file.
   665  		if child != nil && child.isDir() {
   666  			vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   667  			return linuxerr.EISDIR
   668  		}
   669  		if rp.MustBeDir() {
   670  			if child != nil {
   671  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   672  			}
   673  			return linuxerr.ENOTDIR
   674  		}
   675  	}
   676  	if parent.isSynthetic() {
   677  		if child == nil {
   678  			return linuxerr.ENOENT
   679  		}
   680  	} else if child == nil || !child.isSynthetic() {
   681  		if err := parent.unlink(ctx, name, flags); err != nil {
   682  			if child != nil {
   683  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   684  			}
   685  			return err
   686  		}
   687  	}
   688  
   689  	// Generate inotify events for rmdir or unlink.
   690  	if dir {
   691  		parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */)
   692  	} else {
   693  		var cw *vfs.Watches
   694  		if child != nil {
   695  			cw = &child.watches
   696  		}
   697  		vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name)
   698  	}
   699  
   700  	parent.childrenMu.Lock()
   701  	defer parent.childrenMu.Unlock()
   702  
   703  	if child != nil {
   704  		vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
   705  		child.setDeleted()
   706  		if child.isSynthetic() {
   707  			parent.syntheticChildren--
   708  			child.decRefNoCaching()
   709  		}
   710  		ds = appendDentry(ds, child)
   711  	}
   712  	parent.cacheNegativeLookupLocked(name)
   713  	if parent.cachedMetadataAuthoritative() {
   714  		parent.clearDirentsLocked()
   715  		parent.touchCMtime()
   716  		if dir {
   717  			parent.decLinks()
   718  		}
   719  	}
   720  	return nil
   721  }
   722  
   723  // AccessAt implements vfs.Filesystem.Impl.AccessAt.
   724  func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
   725  	var ds *[]*dentry
   726  	fs.renameMu.RLock()
   727  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   728  	d, err := fs.resolveLocked(ctx, rp, &ds)
   729  	if err != nil {
   730  		return err
   731  	}
   732  	if err := d.checkPermissions(creds, ats); err != nil {
   733  		return err
   734  	}
   735  	if ats.MayWrite() && rp.Mount().ReadOnly() {
   736  		return linuxerr.EROFS
   737  	}
   738  	return nil
   739  }
   740  
   741  // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
   742  func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
   743  	var ds *[]*dentry
   744  	fs.renameMu.RLock()
   745  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   746  	d, err := fs.resolveLocked(ctx, rp, &ds)
   747  	if err != nil {
   748  		return nil, err
   749  	}
   750  	if opts.CheckSearchable {
   751  		if !d.isDir() {
   752  			return nil, linuxerr.ENOTDIR
   753  		}
   754  		if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   755  			return nil, err
   756  		}
   757  	}
   758  	d.IncRef()
   759  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   760  	ds = appendDentry(ds, d)
   761  	return &d.vfsd, nil
   762  }
   763  
   764  // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
   765  func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
   766  	var ds *[]*dentry
   767  	fs.renameMu.RLock()
   768  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   769  	start := rp.Start().Impl().(*dentry)
   770  	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   771  	if err != nil {
   772  		return nil, err
   773  	}
   774  	d.IncRef()
   775  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   776  	ds = appendDentry(ds, d)
   777  	return &d.vfsd, nil
   778  }
   779  
   780  // LinkAt implements vfs.FilesystemImpl.LinkAt.
   781  func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
   782  	err := fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   783  		if rp.Mount() != vd.Mount() {
   784  			return nil, linuxerr.EXDEV
   785  		}
   786  		d := vd.Dentry().Impl().(*dentry)
   787  		if d.isDir() {
   788  			return nil, linuxerr.EPERM
   789  		}
   790  		gid := auth.KGID(d.gid.Load())
   791  		uid := auth.KUID(d.uid.Load())
   792  		mode := linux.FileMode(d.mode.Load())
   793  		if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil {
   794  			return nil, err
   795  		}
   796  		if d.nlink.Load() == 0 {
   797  			return nil, linuxerr.ENOENT
   798  		}
   799  		if d.nlink.Load() == math.MaxUint32 {
   800  			return nil, linuxerr.EMLINK
   801  		}
   802  		return parent.link(ctx, d, name)
   803  	}, nil)
   804  
   805  	if err == nil {
   806  		// Success!
   807  		vd.Dentry().Impl().(*dentry).incLinks()
   808  	}
   809  	return err
   810  }
   811  
   812  // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
   813  func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
   814  	creds := rp.Credentials()
   815  	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   816  		// If the parent is a setgid directory, use the parent's GID
   817  		// rather than the caller's and enable setgid.
   818  		kgid := creds.EffectiveKGID
   819  		mode := opts.Mode
   820  		if parent.mode.Load()&linux.S_ISGID != 0 {
   821  			kgid = auth.KGID(parent.gid.Load())
   822  			mode |= linux.S_ISGID
   823  		}
   824  
   825  		child, err := parent.mkdir(ctx, name, mode, creds.EffectiveKUID, kgid)
   826  		if err == nil {
   827  			if fs.opts.interop != InteropModeShared {
   828  				parent.incLinks()
   829  			}
   830  			return child, nil
   831  		}
   832  
   833  		if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
   834  			return nil, err
   835  		}
   836  		ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err)
   837  		child = fs.newSyntheticDentry(&createSyntheticOpts{
   838  			name: name,
   839  			mode: linux.S_IFDIR | opts.Mode,
   840  			kuid: creds.EffectiveKUID,
   841  			kgid: creds.EffectiveKGID,
   842  		})
   843  		if fs.opts.interop != InteropModeShared {
   844  			parent.incLinks()
   845  		}
   846  		return child, nil
   847  	}, func(parent *dentry, name string) (*dentry, error) {
   848  		if !opts.ForSyntheticMountpoint {
   849  			// Can't create non-synthetic files in synthetic directories.
   850  			return nil, linuxerr.EPERM
   851  		}
   852  		child := fs.newSyntheticDentry(&createSyntheticOpts{
   853  			name: name,
   854  			mode: linux.S_IFDIR | opts.Mode,
   855  			kuid: creds.EffectiveKUID,
   856  			kgid: creds.EffectiveKGID,
   857  		})
   858  		parent.incLinks()
   859  		return child, nil
   860  	})
   861  }
   862  
   863  // MknodAt implements vfs.FilesystemImpl.MknodAt.
   864  func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
   865  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   866  		creds := rp.Credentials()
   867  		if child, err := parent.mknod(ctx, name, creds, &opts); err == nil {
   868  			return child, nil
   869  		} else if !linuxerr.Equals(linuxerr.EPERM, err) {
   870  			return nil, err
   871  		}
   872  
   873  		// EPERM means that gofer does not allow creating a socket or pipe. Fallback
   874  		// to creating a synthetic one, i.e. one that is kept entirely in memory.
   875  
   876  		// Check that we're not overriding an existing file with a synthetic one.
   877  		_, _, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, ds) // +checklocksforce: parent.opMu taken by doCreateAt.
   878  		switch {
   879  		case err == nil:
   880  			// Step succeeded, another file exists.
   881  			return nil, linuxerr.EEXIST
   882  		case !linuxerr.Equals(linuxerr.ENOENT, err):
   883  			// Schrödinger. File/Cat may or may not exist.
   884  			return nil, err
   885  		}
   886  
   887  		switch opts.Mode.FileType() {
   888  		case linux.S_IFSOCK:
   889  			return fs.newSyntheticDentry(&createSyntheticOpts{
   890  				name:     name,
   891  				mode:     opts.Mode,
   892  				kuid:     creds.EffectiveKUID,
   893  				kgid:     creds.EffectiveKGID,
   894  				endpoint: opts.Endpoint,
   895  			}), nil
   896  		case linux.S_IFIFO:
   897  			return fs.newSyntheticDentry(&createSyntheticOpts{
   898  				name: name,
   899  				mode: opts.Mode,
   900  				kuid: creds.EffectiveKUID,
   901  				kgid: creds.EffectiveKGID,
   902  				pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize),
   903  			}), nil
   904  		}
   905  		// Retain error from gofer if synthetic file cannot be created internally.
   906  		return nil, linuxerr.EPERM
   907  	}, nil)
   908  }
   909  
   910  // OpenAt implements vfs.FilesystemImpl.OpenAt.
   911  func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   912  	// Reject O_TMPFILE, which is not supported; supporting it correctly in the
   913  	// presence of other remote filesystem users requires remote filesystem
   914  	// support, and it isn't clear that there's any way to implement this in
   915  	// 9P.
   916  	if opts.Flags&linux.O_TMPFILE != 0 {
   917  		return nil, linuxerr.EOPNOTSUPP
   918  	}
   919  	mayCreate := opts.Flags&linux.O_CREAT != 0
   920  	mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
   921  
   922  	var ds *[]*dentry
   923  	fs.renameMu.RLock()
   924  	unlocked := false
   925  	unlock := func() {
   926  		if !unlocked {
   927  			fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   928  			unlocked = true
   929  		}
   930  	}
   931  	defer unlock()
   932  
   933  	start := rp.Start().Impl().(*dentry)
   934  	if rp.Done() {
   935  		// Reject attempts to open mount root directory with O_CREAT.
   936  		if mayCreate && rp.MustBeDir() {
   937  			return nil, linuxerr.EISDIR
   938  		}
   939  		if mustCreate {
   940  			return nil, linuxerr.EEXIST
   941  		}
   942  		if !start.cachedMetadataAuthoritative() {
   943  			// Refresh dentry's attributes before opening.
   944  			if err := start.updateMetadata(ctx); err != nil {
   945  				return nil, err
   946  			}
   947  		}
   948  		start.IncRef()
   949  		defer start.DecRef(ctx)
   950  		unlock()
   951  		// start is intentionally not added to ds (which would remove it from the
   952  		// cache) because doing so regresses performance in practice.
   953  		return start.open(ctx, rp, &opts)
   954  	}
   955  
   956  afterTrailingSymlink:
   957  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   958  	if err != nil {
   959  		return nil, err
   960  	}
   961  	// Check for search permission in the parent directory.
   962  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   963  		return nil, err
   964  	}
   965  	// Reject attempts to open directories with O_CREAT.
   966  	if mayCreate && rp.MustBeDir() {
   967  		return nil, linuxerr.EISDIR
   968  	}
   969  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil {
   970  		return nil, err
   971  	}
   972  	// Determine whether or not we need to create a file.
   973  	// NOTE(b/263297063): Don't hold opMu for writing here, to avoid
   974  	// serializing OpenAt calls in the same directory in the common case
   975  	// that the file exists.
   976  	parent.opMu.RLock()
   977  	child, followedSymlink, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, true /* mayFollowSymlinks */, &ds)
   978  	parent.opMu.RUnlock()
   979  	if followedSymlink {
   980  		if mustCreate {
   981  			// EEXIST must be returned if an existing symlink is opened with O_EXCL.
   982  			return nil, linuxerr.EEXIST
   983  		}
   984  		if err != nil {
   985  			// If followedSymlink && err != nil, then this symlink resolution error
   986  			// must be handled by the VFS layer.
   987  			return nil, err
   988  		}
   989  		start = parent
   990  		goto afterTrailingSymlink
   991  	}
   992  	if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
   993  		if parent.isSynthetic() {
   994  			return nil, linuxerr.EPERM
   995  		}
   996  
   997  		// Take opMu for writing, but note that the file may have been
   998  		// created by another goroutine since we checked for existence
   999  		// a few lines ago. We must handle that case.
  1000  		parent.opMu.Lock()
  1001  		fd, createErr := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds)
  1002  		if !linuxerr.Equals(linuxerr.EEXIST, createErr) {
  1003  			// Either the creation was a success, or we got an
  1004  			// unexpected error. Either way we can return here.
  1005  			parent.opMu.Unlock()
  1006  			return fd, createErr
  1007  		}
  1008  
  1009  		// We raced, and now the file exists.
  1010  		if mustCreate {
  1011  			parent.opMu.Unlock()
  1012  			return nil, linuxerr.EEXIST
  1013  		}
  1014  
  1015  		// Step to the file again. Since we still hold opMu for
  1016  		// writing, there can't be a race here.
  1017  		child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds)
  1018  		parent.opMu.Unlock()
  1019  	}
  1020  	if err != nil {
  1021  		return nil, err
  1022  	}
  1023  	if mustCreate {
  1024  		return nil, linuxerr.EEXIST
  1025  	}
  1026  	if rp.MustBeDir() && !child.isDir() {
  1027  		return nil, linuxerr.ENOTDIR
  1028  	}
  1029  	child.IncRef()
  1030  	defer child.DecRef(ctx)
  1031  	unlock()
  1032  	// child is intentionally not added to ds (which would remove it from the
  1033  	// cache) because doing so regresses performance in practice.
  1034  	return child.open(ctx, rp, &opts)
  1035  }
  1036  
  1037  // Preconditions: The caller must hold no locks (since opening pipes may block
  1038  // indefinitely).
  1039  func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1040  	ats := vfs.AccessTypesForOpenFlags(opts)
  1041  	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
  1042  		return nil, err
  1043  	}
  1044  
  1045  	if !d.isSynthetic() {
  1046  		// renameMu is locked here because it is required by d.openHandle(), which
  1047  		// is called by d.ensureSharedHandle() and d.openSpecialFile() below. It is
  1048  		// also required by d.connect() which is called by
  1049  		// d.openSocketByConnecting(). Note that opening non-synthetic pipes may
  1050  		// block, renameMu is unlocked separately in d.openSpecialFile() for pipes.
  1051  		d.fs.renameMu.RLock()
  1052  		defer d.fs.renameMu.RUnlock()
  1053  	}
  1054  
  1055  	trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG
  1056  	if trunc {
  1057  		// Lock metadataMu *while* we open a regular file with O_TRUNC because
  1058  		// open(2) will change the file size on server.
  1059  		d.metadataMu.Lock()
  1060  		defer d.metadataMu.Unlock()
  1061  	}
  1062  
  1063  	var vfd *vfs.FileDescription
  1064  	var err error
  1065  	mnt := rp.Mount()
  1066  	switch d.fileType() {
  1067  	case linux.S_IFREG:
  1068  		if !d.fs.opts.regularFilesUseSpecialFileFD {
  1069  			if err := d.ensureSharedHandle(ctx, ats.MayRead(), ats.MayWrite(), trunc); err != nil {
  1070  				return nil, err
  1071  			}
  1072  			fd, err := newRegularFileFD(mnt, d, opts.Flags)
  1073  			if err != nil {
  1074  				return nil, err
  1075  			}
  1076  			vfd = &fd.vfsfd
  1077  		}
  1078  	case linux.S_IFDIR:
  1079  		// Can't open directories with O_CREAT.
  1080  		if opts.Flags&linux.O_CREAT != 0 {
  1081  			return nil, linuxerr.EISDIR
  1082  		}
  1083  		// Can't open directories writably.
  1084  		if ats&vfs.MayWrite != 0 {
  1085  			return nil, linuxerr.EISDIR
  1086  		}
  1087  		if opts.Flags&linux.O_DIRECT != 0 {
  1088  			return nil, linuxerr.EINVAL
  1089  		}
  1090  		if !d.isSynthetic() {
  1091  			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
  1092  				return nil, err
  1093  			}
  1094  		}
  1095  		fd := &directoryFD{}
  1096  		fd.LockFD.Init(&d.locks)
  1097  		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
  1098  			return nil, err
  1099  		}
  1100  		if d.readFD.Load() >= 0 {
  1101  			fsmetric.GoferOpensHost.Increment()
  1102  		} else {
  1103  			fsmetric.GoferOpens9P.Increment()
  1104  		}
  1105  		return &fd.vfsfd, nil
  1106  	case linux.S_IFLNK:
  1107  		// Can't open symlinks without O_PATH, which is handled at the VFS layer.
  1108  		return nil, linuxerr.ELOOP
  1109  	case linux.S_IFSOCK:
  1110  		if d.isSynthetic() {
  1111  			return nil, linuxerr.ENXIO
  1112  		}
  1113  		if d.fs.iopts.OpenSocketsByConnecting {
  1114  			return d.openSocketByConnecting(ctx, opts)
  1115  		}
  1116  	case linux.S_IFIFO:
  1117  		if d.isSynthetic() {
  1118  			return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks)
  1119  		}
  1120  		if d.fs.opts.disableFifoOpen {
  1121  			return nil, linuxerr.EPERM
  1122  		}
  1123  	}
  1124  
  1125  	if vfd == nil {
  1126  		if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil {
  1127  			return nil, err
  1128  		}
  1129  	}
  1130  
  1131  	if trunc {
  1132  		// If no errors occured so far then update file size in memory. This
  1133  		// step is required even if !d.cachedMetadataAuthoritative() because
  1134  		// d.mappings has to be updated.
  1135  		// d.metadataMu has already been acquired if trunc == true.
  1136  		d.updateSizeLocked(0)
  1137  
  1138  		if d.cachedMetadataAuthoritative() {
  1139  			d.touchCMtimeLocked()
  1140  		}
  1141  	}
  1142  	return vfd, err
  1143  }
  1144  
  1145  // Precondition: fs.renameMu is locked.
  1146  func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1147  	if opts.Flags&linux.O_DIRECT != 0 {
  1148  		return nil, linuxerr.EINVAL
  1149  	}
  1150  	// Note that special value of linux.SockType = 0 is interpreted by lisafs
  1151  	// as "do not care about the socket type". Analogous to p9.AnonymousSocket.
  1152  	sockFD, err := d.connect(ctx, 0 /* sockType */)
  1153  	if err != nil {
  1154  		return nil, err
  1155  	}
  1156  	fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), sockFD, &host.NewFDOptions{
  1157  		HaveFlags: true,
  1158  		Flags:     opts.Flags,
  1159  	})
  1160  	if err != nil {
  1161  		unix.Close(sockFD)
  1162  		return nil, err
  1163  	}
  1164  	return fd, nil
  1165  }
  1166  
  1167  // Preconditions:
  1168  //   - !d.isSynthetic().
  1169  //   - fs.renameMu is locked. It may be released temporarily while pipe blocks.
  1170  //   - If d is a pipe, no other locks (other than fs.renameMu) should be held.
  1171  func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1172  	ats := vfs.AccessTypesForOpenFlags(opts)
  1173  	if opts.Flags&linux.O_DIRECT != 0 && !d.isRegularFile() {
  1174  		return nil, linuxerr.EINVAL
  1175  	}
  1176  	// We assume that the server silently inserts O_NONBLOCK in the open flags
  1177  	// for all named pipes (because all existing gofers do this).
  1178  	//
  1179  	// NOTE(b/133875563): This makes named pipe opens racy, because the
  1180  	// mechanisms for translating nonblocking to blocking opens can only detect
  1181  	// the instantaneous presence of a peer holding the other end of the pipe
  1182  	// open, not whether the pipe was *previously* opened by a peer that has
  1183  	// since closed its end.
  1184  	isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0
  1185  retry:
  1186  	h, err := d.openHandle(ctx, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0)
  1187  	if err != nil {
  1188  		if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) {
  1189  			// An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails
  1190  			// with ENXIO if opening the same named pipe with O_WRONLY would
  1191  			// block because there are no readers of the pipe. Release renameMu
  1192  			// while blocking.
  1193  			d.fs.renameMu.RUnlock()
  1194  			err := sleepBetweenNamedPipeOpenChecks(ctx)
  1195  			d.fs.renameMu.RLock()
  1196  			if err != nil {
  1197  				return nil, err
  1198  			}
  1199  			goto retry
  1200  		}
  1201  		return nil, err
  1202  	}
  1203  	if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 {
  1204  		// Release renameMu while blocking.
  1205  		d.fs.renameMu.RUnlock()
  1206  		err := blockUntilNonblockingPipeHasWriter(ctx, h.fd)
  1207  		d.fs.renameMu.RLock()
  1208  		if err != nil {
  1209  			h.close(ctx)
  1210  			return nil, err
  1211  		}
  1212  	}
  1213  	fd, err := newSpecialFileFD(h, mnt, d, opts.Flags)
  1214  	if err != nil {
  1215  		h.close(ctx)
  1216  		return nil, err
  1217  	}
  1218  	return &fd.vfsfd, nil
  1219  }
  1220  
  1221  // Preconditions:
  1222  //   - d.fs.renameMu must be locked.
  1223  //   - d.opMu must be locked for writing.
  1224  //   - !d.isSynthetic().
  1225  //
  1226  // +checklocks:d.opMu
  1227  func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
  1228  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1229  		return nil, err
  1230  	}
  1231  	if d.isDeleted() {
  1232  		return nil, linuxerr.ENOENT
  1233  	}
  1234  	mnt := rp.Mount()
  1235  	if err := mnt.CheckBeginWrite(); err != nil {
  1236  		return nil, err
  1237  	}
  1238  	defer mnt.EndWrite()
  1239  
  1240  	creds := rp.Credentials()
  1241  	name := rp.Component()
  1242  	// If the parent is a setgid directory, use the parent's GID rather
  1243  	// than the caller's.
  1244  	kgid := creds.EffectiveKGID
  1245  	if d.mode.Load()&linux.S_ISGID != 0 {
  1246  		kgid = auth.KGID(d.gid.Load())
  1247  	}
  1248  
  1249  	child, h, err := d.openCreate(ctx, name, opts.Flags&linux.O_ACCMODE, opts.Mode, creds.EffectiveKUID, kgid)
  1250  	if err != nil {
  1251  		return nil, err
  1252  	}
  1253  
  1254  	// Incorporate the fid that was opened by lcreate.
  1255  	useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
  1256  	if useRegularFileFD {
  1257  		var readable, writable bool
  1258  		child.handleMu.Lock()
  1259  		if vfs.MayReadFileWithOpenFlags(opts.Flags) {
  1260  			readable = true
  1261  			if h.fd != -1 {
  1262  				child.readFD = atomicbitops.FromInt32(h.fd)
  1263  				child.mmapFD = atomicbitops.FromInt32(h.fd)
  1264  			}
  1265  		}
  1266  		if vfs.MayWriteFileWithOpenFlags(opts.Flags) {
  1267  			writable = true
  1268  			child.writeFD = atomicbitops.FromInt32(h.fd)
  1269  		}
  1270  		child.updateHandles(ctx, h, readable, writable)
  1271  		child.handleMu.Unlock()
  1272  	}
  1273  	// Insert the dentry into the tree.
  1274  	d.childrenMu.Lock()
  1275  	// We have d.opMu for writing, so there can not be a cached child with
  1276  	// this name.  We could not have raced.
  1277  	d.cacheNewChildLocked(child, name)
  1278  	appendNewChildDentry(ds, d, child)
  1279  	if d.cachedMetadataAuthoritative() {
  1280  		d.touchCMtime()
  1281  		d.clearDirentsLocked()
  1282  	}
  1283  	d.childrenMu.Unlock()
  1284  
  1285  	// Finally, construct a file description representing the created file.
  1286  	var childVFSFD *vfs.FileDescription
  1287  	if useRegularFileFD {
  1288  		fd, err := newRegularFileFD(mnt, child, opts.Flags)
  1289  		if err != nil {
  1290  			return nil, err
  1291  		}
  1292  		childVFSFD = &fd.vfsfd
  1293  	} else {
  1294  		fd, err := newSpecialFileFD(h, mnt, child, opts.Flags)
  1295  		if err != nil {
  1296  			h.close(ctx)
  1297  			return nil, err
  1298  		}
  1299  		childVFSFD = &fd.vfsfd
  1300  	}
  1301  	d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */)
  1302  	return childVFSFD, nil
  1303  }
  1304  
  1305  // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
  1306  func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
  1307  	var ds *[]*dentry
  1308  	fs.renameMu.RLock()
  1309  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1310  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1311  	if err != nil {
  1312  		return "", err
  1313  	}
  1314  	if !d.isSymlink() {
  1315  		return "", linuxerr.EINVAL
  1316  	}
  1317  	return d.readlink(ctx, rp.Mount())
  1318  }
  1319  
  1320  // RenameAt implements vfs.FilesystemImpl.RenameAt.
  1321  func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
  1322  	// Resolve newParent first to verify that it's on this Mount.
  1323  	var ds *[]*dentry
  1324  	fs.renameMu.Lock()
  1325  	defer fs.renameMuUnlockAndCheckCaching(ctx, &ds)
  1326  	newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
  1327  	if err != nil {
  1328  		return err
  1329  	}
  1330  
  1331  	if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
  1332  		return linuxerr.EINVAL
  1333  	}
  1334  	if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1335  		// Requires 9P support to synchronize with other remote filesystem
  1336  		// users.
  1337  		return linuxerr.EINVAL
  1338  	}
  1339  
  1340  	newName := rp.Component()
  1341  	if newName == "." || newName == ".." {
  1342  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1343  			return linuxerr.EEXIST
  1344  		}
  1345  		return linuxerr.EBUSY
  1346  	}
  1347  	if len(newName) > MaxFilenameLen {
  1348  		return linuxerr.ENAMETOOLONG
  1349  	}
  1350  	mnt := rp.Mount()
  1351  	if mnt != oldParentVD.Mount() {
  1352  		return linuxerr.EXDEV
  1353  	}
  1354  	if err := mnt.CheckBeginWrite(); err != nil {
  1355  		return err
  1356  	}
  1357  	defer mnt.EndWrite()
  1358  
  1359  	oldParent := oldParentVD.Dentry().Impl().(*dentry)
  1360  	if !oldParent.cachedMetadataAuthoritative() {
  1361  		if err := oldParent.updateMetadata(ctx); err != nil {
  1362  			return err
  1363  		}
  1364  	}
  1365  	creds := rp.Credentials()
  1366  	if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1367  		return err
  1368  	}
  1369  
  1370  	vfsObj := rp.VirtualFilesystem()
  1371  	if err := fs.revalidateOne(ctx, vfsObj, newParent, newName, &ds); err != nil {
  1372  		return err
  1373  	}
  1374  	if err := fs.revalidateOne(ctx, vfsObj, oldParent, oldName, &ds); err != nil {
  1375  		return err
  1376  	}
  1377  
  1378  	// We need a dentry representing the renamed file since, if it's a
  1379  	// directory, we need to check for write permission on it.
  1380  	oldParent.opMu.Lock()
  1381  	defer oldParent.opMu.Unlock()
  1382  	renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
  1383  	if err != nil {
  1384  		return err
  1385  	}
  1386  	if err := oldParent.mayDelete(creds, renamed); err != nil {
  1387  		return err
  1388  	}
  1389  	if renamed.isDir() {
  1390  		if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
  1391  			return linuxerr.EINVAL
  1392  		}
  1393  		if oldParent != newParent {
  1394  			if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
  1395  				return err
  1396  			}
  1397  		}
  1398  	} else {
  1399  		if opts.MustBeDir || rp.MustBeDir() {
  1400  			return linuxerr.ENOTDIR
  1401  		}
  1402  	}
  1403  
  1404  	if oldParent != newParent {
  1405  		if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1406  			return err
  1407  		}
  1408  		newParent.opMu.Lock()
  1409  		defer newParent.opMu.Unlock()
  1410  	}
  1411  	if newParent.isDeleted() {
  1412  		return linuxerr.ENOENT
  1413  	}
  1414  	replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) // +checklocksforce: newParent.opMu taken if newParent != oldParent.
  1415  	if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
  1416  		return err
  1417  	}
  1418  	var replacedVFSD *vfs.Dentry
  1419  	if replaced != nil {
  1420  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1421  			return linuxerr.EEXIST
  1422  		}
  1423  		replacedVFSD = &replaced.vfsd
  1424  		if replaced.isDir() {
  1425  			if !renamed.isDir() {
  1426  				return linuxerr.EISDIR
  1427  			}
  1428  			if genericIsAncestorDentry(replaced, renamed) {
  1429  				return linuxerr.ENOTEMPTY
  1430  			}
  1431  		} else {
  1432  			if rp.MustBeDir() || renamed.isDir() {
  1433  				return linuxerr.ENOTDIR
  1434  			}
  1435  		}
  1436  	}
  1437  
  1438  	if oldParent == newParent && oldName == newName {
  1439  		return nil
  1440  	}
  1441  	mntns := vfs.MountNamespaceFromContext(ctx)
  1442  	defer mntns.DecRef(ctx)
  1443  	if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil {
  1444  		return err
  1445  	}
  1446  
  1447  	// Update the remote filesystem.
  1448  	if !renamed.isSynthetic() {
  1449  		if err := oldParent.rename(ctx, oldName, newParent, newName); err != nil {
  1450  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1451  			return err
  1452  		}
  1453  	} else if replaced != nil && !replaced.isSynthetic() {
  1454  		// We are replacing an existing real file with a synthetic one, so we
  1455  		// need to unlink the former.
  1456  		flags := uint32(0)
  1457  		if replaced.isDir() {
  1458  			flags = linux.AT_REMOVEDIR
  1459  		}
  1460  		if err := newParent.unlink(ctx, newName, flags); err != nil {
  1461  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1462  			return err
  1463  		}
  1464  	}
  1465  
  1466  	// Update the dentry tree.
  1467  	newParent.childrenMu.Lock()
  1468  	defer newParent.childrenMu.Unlock()
  1469  	if oldParent != newParent {
  1470  		oldParent.childrenMu.Lock()
  1471  		defer oldParent.childrenMu.Unlock()
  1472  	}
  1473  
  1474  	vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD)
  1475  	if replaced != nil {
  1476  		replaced.setDeleted()
  1477  		if replaced.isSynthetic() {
  1478  			newParent.syntheticChildren--
  1479  			replaced.decRefNoCaching()
  1480  		}
  1481  		ds = appendDentry(ds, replaced)
  1482  		// Remove the replaced entry from its parent's cache.
  1483  		delete(newParent.children, newName)
  1484  	}
  1485  	oldParent.cacheNegativeLookupLocked(oldName) // +checklocksforce: oldParent.childrenMu is held if oldParent != newParent.
  1486  	if renamed.isSynthetic() {
  1487  		oldParent.syntheticChildren--
  1488  		newParent.syntheticChildren++
  1489  	}
  1490  	// We have d.opMu for writing, so no need to check for existence of a
  1491  	// child with the given name. We could not have raced.
  1492  	newParent.cacheNewChildLocked(renamed, newName)
  1493  	oldParent.decRefNoCaching()
  1494  	if oldParent != newParent {
  1495  		ds = appendDentry(ds, newParent)
  1496  		ds = appendDentry(ds, oldParent)
  1497  	}
  1498  
  1499  	// Update metadata.
  1500  	if renamed.cachedMetadataAuthoritative() {
  1501  		renamed.touchCtime()
  1502  	}
  1503  	if oldParent.cachedMetadataAuthoritative() {
  1504  		oldParent.clearDirentsLocked()
  1505  		oldParent.touchCMtime()
  1506  		if renamed.isDir() {
  1507  			oldParent.decLinks()
  1508  		}
  1509  	}
  1510  	if newParent.cachedMetadataAuthoritative() {
  1511  		newParent.clearDirentsLocked()
  1512  		newParent.touchCMtime()
  1513  		if renamed.isDir() && (replaced == nil || !replaced.isDir()) {
  1514  			// Increase the link count if we did not replace another directory.
  1515  			newParent.incLinks()
  1516  		}
  1517  	}
  1518  	vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir())
  1519  	return nil
  1520  }
  1521  
  1522  // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
  1523  func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1524  	return fs.unlinkAt(ctx, rp, true /* dir */)
  1525  }
  1526  
  1527  // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
  1528  func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
  1529  	var ds *[]*dentry
  1530  	fs.renameMu.RLock()
  1531  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1532  	if err != nil {
  1533  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1534  		return err
  1535  	}
  1536  	err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount())
  1537  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1538  	if err != nil {
  1539  		return err
  1540  	}
  1541  
  1542  	if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
  1543  		d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
  1544  	}
  1545  	return nil
  1546  }
  1547  
  1548  // StatAt implements vfs.FilesystemImpl.StatAt.
  1549  func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
  1550  	var ds *[]*dentry
  1551  	fs.renameMu.RLock()
  1552  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1553  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1554  	if err != nil {
  1555  		return linux.Statx{}, err
  1556  	}
  1557  	// Since walking updates metadata for all traversed dentries under
  1558  	// InteropModeShared, including the returned one, we can return cached
  1559  	// metadata here regardless of fs.opts.interop.
  1560  	var stat linux.Statx
  1561  	d.statTo(&stat)
  1562  	return stat, nil
  1563  }
  1564  
  1565  // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
  1566  func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
  1567  	var ds *[]*dentry
  1568  	fs.renameMu.RLock()
  1569  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1570  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1571  	if err != nil {
  1572  		return linux.Statfs{}, err
  1573  	}
  1574  	// If d is synthetic, invoke statfs on the first ancestor of d that isn't.
  1575  	for d.isSynthetic() {
  1576  		d = d.parent
  1577  	}
  1578  	statfs, err := d.statfs(ctx)
  1579  	if err != nil {
  1580  		return linux.Statfs{}, err
  1581  	}
  1582  	if statfs.NameLength == 0 || statfs.NameLength > MaxFilenameLen {
  1583  		statfs.NameLength = MaxFilenameLen
  1584  	}
  1585  	// This is primarily for distinguishing a gofer file system in
  1586  	// tests. Testing is important, so instead of defining
  1587  	// something completely random, use a standard value.
  1588  	statfs.Type = linux.V9FS_MAGIC
  1589  	return statfs, nil
  1590  }
  1591  
  1592  // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
  1593  func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
  1594  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
  1595  		child, err := parent.symlink(ctx, name, target, rp.Credentials())
  1596  		if err != nil {
  1597  			return nil, err
  1598  		}
  1599  		if parent.fs.opts.interop != InteropModeShared {
  1600  			// Cache the symlink target on creation. In practice, this helps avoid a
  1601  			// lot of ReadLink RPCs. Note that when InteropModeShared is in effect,
  1602  			// we are forced to make Readlink RPCs. Because in this mode, we use host
  1603  			// timestamps, not timestamps based on our internal clock. And readlink
  1604  			// updates the atime on the host.
  1605  			child.haveTarget = true
  1606  			child.target = target
  1607  		}
  1608  		return child, nil
  1609  	}, nil)
  1610  }
  1611  
  1612  // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
  1613  func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1614  	return fs.unlinkAt(ctx, rp, false /* dir */)
  1615  }
  1616  
  1617  // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
  1618  func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
  1619  	var ds *[]*dentry
  1620  	fs.renameMu.RLock()
  1621  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1622  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1623  	if err != nil {
  1624  		return nil, err
  1625  	}
  1626  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1627  		return nil, err
  1628  	}
  1629  	if !d.isSocket() {
  1630  		return nil, linuxerr.ECONNREFUSED
  1631  	}
  1632  	if d.endpoint != nil {
  1633  		return d.endpoint, nil
  1634  	}
  1635  	if !d.isSynthetic() {
  1636  		d.IncRef()
  1637  		ds = appendDentry(ds, d)
  1638  		return &endpoint{
  1639  			dentry: d,
  1640  			path:   opts.Addr,
  1641  		}, nil
  1642  	}
  1643  	return nil, linuxerr.ECONNREFUSED
  1644  }
  1645  
  1646  // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
  1647  func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
  1648  	var ds *[]*dentry
  1649  	fs.renameMu.RLock()
  1650  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1651  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1652  	if err != nil {
  1653  		return nil, err
  1654  	}
  1655  	return d.listXattr(ctx, size)
  1656  }
  1657  
  1658  // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
  1659  func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
  1660  	var ds *[]*dentry
  1661  	fs.renameMu.RLock()
  1662  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1663  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1664  	if err != nil {
  1665  		return "", err
  1666  	}
  1667  	return d.getXattr(ctx, rp.Credentials(), &opts)
  1668  }
  1669  
  1670  // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
  1671  func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
  1672  	var ds *[]*dentry
  1673  	fs.renameMu.RLock()
  1674  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1675  	if err != nil {
  1676  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1677  		return err
  1678  	}
  1679  	err = d.setXattr(ctx, rp.Credentials(), &opts)
  1680  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1681  	if err != nil {
  1682  		return err
  1683  	}
  1684  
  1685  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1686  	return nil
  1687  }
  1688  
  1689  // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
  1690  func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
  1691  	var ds *[]*dentry
  1692  	fs.renameMu.RLock()
  1693  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1694  	if err != nil {
  1695  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1696  		return err
  1697  	}
  1698  	err = d.removeXattr(ctx, rp.Credentials(), name)
  1699  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1700  	if err != nil {
  1701  		return err
  1702  	}
  1703  
  1704  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1705  	return nil
  1706  }
  1707  
  1708  // PrependPath implements vfs.FilesystemImpl.PrependPath.
  1709  func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
  1710  	fs.renameMu.RLock()
  1711  	defer fs.renameMu.RUnlock()
  1712  	return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
  1713  }
  1714  
  1715  type mopt struct {
  1716  	key   string
  1717  	value any
  1718  }
  1719  
  1720  func (m mopt) String() string {
  1721  	if m.value == nil {
  1722  		return fmt.Sprintf("%s", m.key)
  1723  	}
  1724  	return fmt.Sprintf("%s=%v", m.key, m.value)
  1725  }
  1726  
  1727  // MountOptions implements vfs.FilesystemImpl.MountOptions.
  1728  func (fs *filesystem) MountOptions() string {
  1729  	optsKV := []mopt{
  1730  		{moptTransport, transportModeFD}, // Only valid value, currently.
  1731  		{moptReadFD, fs.opts.fd},         // Currently, read and write FD are the same.
  1732  		{moptWriteFD, fs.opts.fd},        // Currently, read and write FD are the same.
  1733  		{moptAname, fs.opts.aname},
  1734  		{moptDfltUID, fs.opts.dfltuid},
  1735  		{moptDfltGID, fs.opts.dfltgid},
  1736  	}
  1737  
  1738  	switch fs.opts.interop {
  1739  	case InteropModeExclusive:
  1740  		optsKV = append(optsKV, mopt{moptCache, cacheFSCache})
  1741  	case InteropModeWritethrough:
  1742  		optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough})
  1743  	case InteropModeShared:
  1744  		optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating})
  1745  	}
  1746  	if fs.opts.regularFilesUseSpecialFileFD {
  1747  		optsKV = append(optsKV, mopt{moptDisableFileHandleSharing, nil})
  1748  	}
  1749  	if fs.opts.disableFifoOpen {
  1750  		optsKV = append(optsKV, mopt{moptDisableFifoOpen, nil})
  1751  	}
  1752  	if fs.opts.forcePageCache {
  1753  		optsKV = append(optsKV, mopt{moptForcePageCache, nil})
  1754  	}
  1755  	if fs.opts.limitHostFDTranslation {
  1756  		optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil})
  1757  	}
  1758  	if fs.opts.overlayfsStaleRead {
  1759  		optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil})
  1760  	}
  1761  	if fs.opts.directfs.enabled {
  1762  		optsKV = append(optsKV, mopt{moptDirectfs, nil})
  1763  	}
  1764  
  1765  	opts := make([]string, 0, len(optsKV))
  1766  	for _, opt := range optsKV {
  1767  		opts = append(opts, opt.String())
  1768  	}
  1769  	return strings.Join(opts, ",")
  1770  }