github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/gofer/filesystem.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"strings"
    21  	"sync"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/metacubex/gvisor/pkg/abi/linux"
    25  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    26  	"github.com/metacubex/gvisor/pkg/context"
    27  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    28  	"github.com/metacubex/gvisor/pkg/fspath"
    29  	"github.com/metacubex/gvisor/pkg/refs"
    30  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/host"
    31  	"github.com/metacubex/gvisor/pkg/sentry/fsmetric"
    32  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    33  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    34  	"github.com/metacubex/gvisor/pkg/sentry/kernel/pipe"
    35  	"github.com/metacubex/gvisor/pkg/sentry/socket/unix/transport"
    36  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    37  )
    38  
    39  // Sync implements vfs.FilesystemImpl.Sync.
    40  func (fs *filesystem) Sync(ctx context.Context) error {
    41  	// Snapshot current syncable dentries and special file FDs.
    42  	fs.syncMu.Lock()
    43  	ds := make([]*dentry, 0, fs.syncableDentries.Len())
    44  	for elem := fs.syncableDentries.Front(); elem != nil; elem = elem.Next() {
    45  		ds = append(ds, elem.d)
    46  	}
    47  	sffds := make([]*specialFileFD, 0, fs.specialFileFDs.Len())
    48  	for sffd := fs.specialFileFDs.Front(); sffd != nil; sffd = sffd.Next() {
    49  		sffds = append(sffds, sffd)
    50  	}
    51  	fs.syncMu.Unlock()
    52  
    53  	// Return the first error we encounter, but sync everything we can
    54  	// regardless.
    55  	var retErr error
    56  
    57  	// Note that lisafs is capable of batching FSync RPCs. However, we can not
    58  	// batch all the FDIDs to be synced from ds and sffds. Because the error
    59  	// handling varies based on file type. FSync errors are only considered for
    60  	// regular file FDIDs that were opened for writing. We could do individual
    61  	// RPCs for such FDIDs and batch the rest, but it increases code complexity
    62  	// substantially. We could implement it in the future if need be.
    63  
    64  	// Sync syncable dentries.
    65  	for _, d := range ds {
    66  		if err := d.syncCachedFile(ctx, true /* forFilesystemSync */); err != nil {
    67  			ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err)
    68  			if retErr == nil {
    69  				retErr = err
    70  			}
    71  		}
    72  	}
    73  
    74  	// Sync special files, which may be writable but do not use dentry shared
    75  	// handles (so they won't be synced by the above).
    76  	for _, sffd := range sffds {
    77  		if err := sffd.sync(ctx, true /* forFilesystemSync */); err != nil {
    78  			ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err)
    79  			if retErr == nil {
    80  				retErr = err
    81  			}
    82  		}
    83  	}
    84  
    85  	return retErr
    86  }
    87  
    88  // MaxFilenameLen is the maximum length of a filename. This is dictated by 9P's
    89  // encoding of strings, which uses 2 bytes for the length prefix.
    90  const MaxFilenameLen = (1 << 16) - 1
    91  
    92  // dentrySlicePool is a pool of *[]*dentry used to store dentries for which
    93  // dentry.checkCachingLocked() must be called. The pool holds pointers to
    94  // slices because Go lacks generics, so sync.Pool operates on any, so
    95  // every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy
    96  // of the slice header on the heap.
    97  var dentrySlicePool = sync.Pool{
    98  	New: func() any {
    99  		ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
   100  		return &ds
   101  	},
   102  }
   103  
   104  func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
   105  	if ds == nil {
   106  		ds = dentrySlicePool.Get().(*[]*dentry)
   107  	}
   108  	*ds = append(*ds, d)
   109  	return ds
   110  }
   111  
   112  // Precondition: !parent.isSynthetic() && !child.isSynthetic().
   113  func appendNewChildDentry(ds **[]*dentry, parent *dentry, child *dentry) {
   114  	// The new child was added to parent and took a ref on the parent (hence
   115  	// parent can be removed from cache). A new child has 0 refs for now. So
   116  	// checkCachingLocked() should be called on both. Call it first on the parent
   117  	// as it may create space in the cache for child to be inserted - hence
   118  	// avoiding a cache eviction.
   119  	*ds = appendDentry(*ds, parent)
   120  	*ds = appendDentry(*ds, child)
   121  }
   122  
   123  // Preconditions: ds != nil.
   124  func putDentrySlice(ds *[]*dentry) {
   125  	// Allow dentries to be GC'd.
   126  	for i := range *ds {
   127  		(*ds)[i] = nil
   128  	}
   129  	*ds = (*ds)[:0]
   130  	dentrySlicePool.Put(ds)
   131  }
   132  
   133  // renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
   134  // dentry.checkCachingLocked on all dentries in *dsp with fs.renameMu locked
   135  // for writing.
   136  //
   137  // dsp is a pointer-to-pointer since defer evaluates its arguments immediately,
   138  // but dentry slices are allocated lazily, and it's much easier to say "defer
   139  // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
   140  // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
   141  // +checklocksreleaseread:fs.renameMu
   142  func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
   143  	fs.renameMu.RUnlock()
   144  	if *dsp == nil {
   145  		return
   146  	}
   147  	ds := **dsp
   148  	for _, d := range ds {
   149  		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
   150  	}
   151  	putDentrySlice(*dsp)
   152  }
   153  
   154  // +checklocksrelease:fs.renameMu
   155  func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
   156  	if *ds == nil {
   157  		fs.renameMu.Unlock()
   158  		return
   159  	}
   160  	for _, d := range **ds {
   161  		d.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
   162  	}
   163  	fs.renameMu.Unlock()
   164  	putDentrySlice(*ds)
   165  }
   166  
   167  // stepLocked resolves rp.Component() to an existing file, starting from the
   168  // given directory.
   169  //
   170  // Dentries which may become cached as a result of the traversal are appended
   171  // to *ds.
   172  //
   173  // Preconditions:
   174  //   - fs.renameMu must be locked.
   175  //   - d.opMu must be locked for reading.
   176  //   - !rp.Done().
   177  //   - If !d.cachedMetadataAuthoritative(), then d and all children that are
   178  //     part of rp must have been revalidated.
   179  //
   180  // +checklocksread:d.opMu
   181  func (fs *filesystem) stepLocked(ctx context.Context, rp resolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) {
   182  	if !d.isDir() {
   183  		return nil, false, linuxerr.ENOTDIR
   184  	}
   185  	if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   186  		return nil, false, err
   187  	}
   188  	name := rp.Component()
   189  	if name == "." {
   190  		rp.Advance()
   191  		return d, false, nil
   192  	}
   193  	if name == ".." {
   194  		if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
   195  			return nil, false, err
   196  		} else if isRoot || d.parent.Load() == nil {
   197  			rp.Advance()
   198  			return d, false, nil
   199  		}
   200  		if err := rp.CheckMount(ctx, &d.parent.Load().vfsd); err != nil {
   201  			return nil, false, err
   202  		}
   203  		rp.Advance()
   204  		return d.parent.Load(), false, nil
   205  	}
   206  	child, err := fs.getChildAndWalkPathLocked(ctx, d, rp, ds)
   207  	if err != nil {
   208  		return nil, false, err
   209  	}
   210  	if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
   211  		return nil, false, err
   212  	}
   213  	if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
   214  		target, err := child.readlink(ctx, rp.Mount())
   215  		if err != nil {
   216  			return nil, false, err
   217  		}
   218  		followedSymlink, err := rp.HandleSymlink(target)
   219  		return d, followedSymlink, err
   220  	}
   221  	rp.Advance()
   222  	return child, false, nil
   223  }
   224  
   225  // getChildLocked returns a dentry representing the child of parent with the
   226  // given name. Returns ENOENT if the child doesn't exist.
   227  //
   228  // Preconditions:
   229  //   - fs.renameMu must be locked.
   230  //   - parent.opMu must be locked.
   231  //   - parent.isDir().
   232  //   - name is not "." or "..".
   233  //   - parent and the dentry at name have been revalidated.
   234  //
   235  // +checklocks:parent.opMu
   236  func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   237  	if child, err := parent.getCachedChildLocked(name); child != nil || err != nil {
   238  		return child, err
   239  	}
   240  	// We don't need to check for race here because parent.opMu is held for
   241  	// writing.
   242  	return fs.getRemoteChildLocked(ctx, parent, name, false /* checkForRace */, ds)
   243  }
   244  
   245  // getRemoteChildLocked is similar to getChildLocked, with the additional
   246  // precondition that the child identified by name does not exist in cache.
   247  //
   248  // If checkForRace argument is true, then this method will check to see if the
   249  // call has raced with another getRemoteChild call, and will handle the race if
   250  // so.
   251  //
   252  // Preconditions:
   253  //   - If checkForRace is false, then parent.opMu must be held for writing.
   254  //   - Otherwise, parent.opMu must be held for reading.
   255  //
   256  // Postcondition: The returned dentry is already cached appropriately.
   257  //
   258  // +checklocksread:parent.opMu
   259  func (fs *filesystem) getRemoteChildLocked(ctx context.Context, parent *dentry, name string, checkForRace bool, ds **[]*dentry) (*dentry, error) {
   260  	child, err := parent.getRemoteChild(ctx, name)
   261  	// Cache the result appropriately in the dentry tree.
   262  	if err != nil {
   263  		if linuxerr.Equals(linuxerr.ENOENT, err) {
   264  			parent.childrenMu.Lock()
   265  			defer parent.childrenMu.Unlock()
   266  			parent.cacheNegativeLookupLocked(name)
   267  		}
   268  		return nil, err
   269  	}
   270  
   271  	parent.childrenMu.Lock()
   272  	defer parent.childrenMu.Unlock()
   273  
   274  	if checkForRace {
   275  		// See if we raced with another getRemoteChild call that added
   276  		// to the cache.
   277  		if cachedChild, ok := parent.children[name]; ok && cachedChild != nil {
   278  			// We raced. Destroy our child and return the cached
   279  			// one. This child has no handles, no data, and has not
   280  			// been cached, so destruction is quick and painless.
   281  			child.destroyDisconnected(ctx)
   282  
   283  			// All good. Return the cached child.
   284  			return cachedChild, nil
   285  		}
   286  		// No race, continue with the child we got.
   287  	}
   288  	parent.cacheNewChildLocked(child, name)
   289  	appendNewChildDentry(ds, parent, child)
   290  	return child, nil
   291  }
   292  
   293  // getChildAndWalkPathLocked is the same as getChildLocked, except that it
   294  // may prefetch the entire path represented by rp.
   295  //
   296  // +checklocksread:parent.opMu
   297  func (fs *filesystem) getChildAndWalkPathLocked(ctx context.Context, parent *dentry, rp resolvingPath, ds **[]*dentry) (*dentry, error) {
   298  	if child, err := parent.getCachedChildLocked(rp.Component()); child != nil || err != nil {
   299  		return child, err
   300  	}
   301  	// dentry.getRemoteChildAndWalkPathLocked already handles dentry caching.
   302  	return parent.getRemoteChildAndWalkPathLocked(ctx, rp, ds)
   303  }
   304  
   305  // getCachedChildLocked returns a child dentry if it was cached earlier. If no
   306  // cached child dentry exists, (nil, nil) is returned.
   307  //
   308  // Preconditions:
   309  //   - fs.renameMu must be locked.
   310  //   - d.opMu must be locked for reading.
   311  //   - d.isDir().
   312  //   - name is not "." or "..".
   313  //   - d and the dentry at name have been revalidated.
   314  //
   315  // +checklocksread:d.opMu
   316  func (d *dentry) getCachedChildLocked(name string) (*dentry, error) {
   317  	if len(name) > MaxFilenameLen {
   318  		return nil, linuxerr.ENAMETOOLONG
   319  	}
   320  	d.childrenMu.Lock()
   321  	defer d.childrenMu.Unlock()
   322  	if child, ok := d.children[name]; ok || d.isSynthetic() {
   323  		if child == nil {
   324  			return nil, linuxerr.ENOENT
   325  		}
   326  		return child, nil
   327  	}
   328  
   329  	if d.childrenSet != nil {
   330  		// Is the child even there? Don't make RPC if not.
   331  		if _, ok := d.childrenSet[name]; !ok {
   332  			return nil, linuxerr.ENOENT
   333  		}
   334  	}
   335  	return nil, nil
   336  }
   337  
   338  // walkParentDirLocked resolves all but the last path component of rp to an
   339  // existing directory, starting from the given directory (which is usually
   340  // rp.Start().Impl().(*dentry)). It does not check that the returned directory
   341  // is searchable by the provider of rp.
   342  //
   343  // Preconditions:
   344  //   - fs.renameMu must be locked.
   345  //   - !rp.Done().
   346  //   - If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up
   347  //     to date.
   348  func (fs *filesystem) walkParentDirLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
   349  	rp := resolvingPathParent(vfsRP)
   350  	if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   351  		return nil, err
   352  	}
   353  	for !rp.done() {
   354  		d.opMu.RLock()
   355  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   356  		d.opMu.RUnlock()
   357  		if err != nil {
   358  			return nil, err
   359  		}
   360  		d = next
   361  		if followedSymlink {
   362  			if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   363  				return nil, err
   364  			}
   365  		}
   366  	}
   367  	if !d.isDir() {
   368  		return nil, linuxerr.ENOTDIR
   369  	}
   370  	return d, nil
   371  }
   372  
   373  // resolveLocked resolves rp to an existing file.
   374  //
   375  // Preconditions: fs.renameMu must be locked.
   376  func (fs *filesystem) resolveLocked(ctx context.Context, vfsRP *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
   377  	rp := resolvingPathFull(vfsRP)
   378  	d := rp.Start().Impl().(*dentry)
   379  	if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   380  		return nil, err
   381  	}
   382  	for !rp.done() {
   383  		d.opMu.RLock()
   384  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   385  		d.opMu.RUnlock()
   386  		if err != nil {
   387  			return nil, err
   388  		}
   389  		d = next
   390  		if followedSymlink {
   391  			if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   392  				return nil, err
   393  			}
   394  		}
   395  	}
   396  	if rp.MustBeDir() && !d.isDir() {
   397  		return nil, linuxerr.ENOTDIR
   398  	}
   399  	return d, nil
   400  }
   401  
   402  // doCreateAt checks that creating a file at rp is permitted, then invokes
   403  // createInRemoteDir (if the parent directory is a real remote directory) or
   404  // createInSyntheticDir (if the parent directory is synthetic) to do so.
   405  //
   406  // Preconditions:
   407  //   - !rp.Done().
   408  //   - For the final path component in rp, !rp.ShouldFollowSymlink().
   409  func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) (*dentry, error), createInSyntheticDir func(parent *dentry, name string) (*dentry, error)) error {
   410  	var ds *[]*dentry
   411  	fs.renameMu.RLock()
   412  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   413  	start := rp.Start().Impl().(*dentry)
   414  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   415  	if err != nil {
   416  		return err
   417  	}
   418  
   419  	// Order of checks is important. First check if parent directory can be
   420  	// executed, then check for existence, and lastly check if mount is writable.
   421  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   422  		return err
   423  	}
   424  	name := rp.Component()
   425  	if name == "." || name == ".." {
   426  		return linuxerr.EEXIST
   427  	}
   428  	if parent.isDeleted() {
   429  		return linuxerr.ENOENT
   430  	}
   431  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil {
   432  		return err
   433  	}
   434  
   435  	parent.opMu.Lock()
   436  	defer parent.opMu.Unlock()
   437  
   438  	if len(name) > MaxFilenameLen {
   439  		return linuxerr.ENAMETOOLONG
   440  	}
   441  	// Check for existence only if caching information is available. Otherwise,
   442  	// don't check for existence just yet. We will check for existence if the
   443  	// checks for writability fail below. Existence check is done by the creation
   444  	// RPCs themselves.
   445  	parent.childrenMu.Lock()
   446  	if child, ok := parent.children[name]; ok && child != nil {
   447  		parent.childrenMu.Unlock()
   448  		return linuxerr.EEXIST
   449  	}
   450  	if parent.childrenSet != nil {
   451  		if _, ok := parent.childrenSet[name]; ok {
   452  			parent.childrenMu.Unlock()
   453  			return linuxerr.EEXIST
   454  		}
   455  	}
   456  	parent.childrenMu.Unlock()
   457  	checkExistence := func() error {
   458  		if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
   459  			return err
   460  		} else if child != nil {
   461  			return linuxerr.EEXIST
   462  		}
   463  		return nil
   464  	}
   465  
   466  	mnt := rp.Mount()
   467  	if err := mnt.CheckBeginWrite(); err != nil {
   468  		// Existence check takes precedence.
   469  		if existenceErr := checkExistence(); existenceErr != nil {
   470  			return existenceErr
   471  		}
   472  		return err
   473  	}
   474  	defer mnt.EndWrite()
   475  
   476  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
   477  		// Existence check takes precedence.
   478  		if existenceErr := checkExistence(); existenceErr != nil {
   479  			return existenceErr
   480  		}
   481  		return err
   482  	}
   483  	if !dir && rp.MustBeDir() {
   484  		return linuxerr.ENOENT
   485  	}
   486  	if parent.isSynthetic() {
   487  		if createInSyntheticDir == nil {
   488  			return linuxerr.EPERM
   489  		}
   490  		child, err := createInSyntheticDir(parent, name)
   491  		if err != nil {
   492  			return err
   493  		}
   494  		parent.childrenMu.Lock()
   495  		parent.cacheNewChildLocked(child, name)
   496  		parent.syntheticChildren++
   497  		parent.clearDirentsLocked()
   498  		parent.childrenMu.Unlock()
   499  		parent.touchCMtime()
   500  		ev := linux.IN_CREATE
   501  		if dir {
   502  			ev |= linux.IN_ISDIR
   503  		}
   504  		parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   505  		return nil
   506  	}
   507  	// No cached dentry exists; however, in InteropModeShared there might still be
   508  	// an existing file at name. Just attempt the file creation RPC anyways. If a
   509  	// file does exist, the RPC will fail with EEXIST like we would have.
   510  	child, err := createInRemoteDir(parent, name, &ds)
   511  	if err != nil {
   512  		return err
   513  	}
   514  	parent.childrenMu.Lock()
   515  	parent.cacheNewChildLocked(child, name)
   516  	if child.isSynthetic() {
   517  		parent.syntheticChildren++
   518  		ds = appendDentry(ds, parent)
   519  	} else {
   520  		appendNewChildDentry(&ds, parent, child)
   521  	}
   522  	if fs.opts.interop != InteropModeShared {
   523  		if child, ok := parent.children[name]; ok && child == nil {
   524  			// Delete the now-stale negative dentry.
   525  			delete(parent.children, name)
   526  			parent.negativeChildren--
   527  		}
   528  		parent.clearDirentsLocked()
   529  		parent.touchCMtime()
   530  	}
   531  	parent.childrenMu.Unlock()
   532  	ev := linux.IN_CREATE
   533  	if dir {
   534  		ev |= linux.IN_ISDIR
   535  	}
   536  	parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   537  	return nil
   538  }
   539  
   540  // Preconditions: !rp.Done().
   541  func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error {
   542  	var ds *[]*dentry
   543  	fs.renameMu.RLock()
   544  	// We need to DecRef outside of fs.renameMu because forgetting a dead
   545  	// mountpoint could result in this filesystem being released which acquires
   546  	// fs.renameMu.
   547  	var toDecRef []refs.RefCounter
   548  	defer func() {
   549  		for _, ref := range toDecRef {
   550  			ref.DecRef(ctx)
   551  		}
   552  	}()
   553  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   554  	start := rp.Start().Impl().(*dentry)
   555  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   556  	if err != nil {
   557  		return err
   558  	}
   559  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
   560  		return err
   561  	}
   562  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   563  		return err
   564  	}
   565  	defer rp.Mount().EndWrite()
   566  
   567  	name := rp.Component()
   568  	if dir {
   569  		if name == "." {
   570  			return linuxerr.EINVAL
   571  		}
   572  		if name == ".." {
   573  			return linuxerr.ENOTEMPTY
   574  		}
   575  	} else {
   576  		if name == "." || name == ".." {
   577  			return linuxerr.EISDIR
   578  		}
   579  	}
   580  
   581  	vfsObj := rp.VirtualFilesystem()
   582  	if err := fs.revalidateOne(ctx, vfsObj, parent, rp.Component(), &ds); err != nil {
   583  		return err
   584  	}
   585  
   586  	mntns := vfs.MountNamespaceFromContext(ctx)
   587  	defer mntns.DecRef(ctx)
   588  
   589  	parent.opMu.Lock()
   590  	defer parent.opMu.Unlock()
   591  
   592  	parent.childrenMu.Lock()
   593  	if parent.childrenSet != nil {
   594  		if _, ok := parent.childrenSet[name]; !ok {
   595  			parent.childrenMu.Unlock()
   596  			return linuxerr.ENOENT
   597  		}
   598  	}
   599  	parent.childrenMu.Unlock()
   600  
   601  	// Load child if sticky bit is set because we need to determine whether
   602  	// deletion is allowed.
   603  	var child *dentry
   604  	if parent.mode.Load()&linux.ModeSticky == 0 {
   605  		var ok bool
   606  		parent.childrenMu.Lock()
   607  		child, ok = parent.children[name]
   608  		parent.childrenMu.Unlock()
   609  		if ok && child == nil {
   610  			// Hit a negative cached entry, child doesn't exist.
   611  			return linuxerr.ENOENT
   612  		}
   613  	} else {
   614  		child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds)
   615  		if err != nil {
   616  			return err
   617  		}
   618  		if err := parent.mayDelete(rp.Credentials(), child); err != nil {
   619  			return err
   620  		}
   621  	}
   622  
   623  	// If a child dentry exists, prepare to delete it. This should fail if it is
   624  	// a mount point. We detect mount points by speculatively calling
   625  	// PrepareDeleteDentry, which fails if child is a mount point.
   626  	//
   627  	// Also note that if child is nil, then it can't be a mount point.
   628  	if child != nil {
   629  		// Hold child.childrenMu so we can check child.children and
   630  		// child.syntheticChildren. We don't access these fields until a bit later,
   631  		// but locking child.childrenMu after calling vfs.PrepareDeleteDentry() would
   632  		// create an inconsistent lock ordering between dentry.childrenMu and
   633  		// vfs.Dentry.mu (in the VFS lock order, it would make dentry.childrenMu both "a
   634  		// FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between
   635  		// PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock
   636  		// child.childrenMu before calling PrepareDeleteDentry.
   637  		child.childrenMu.Lock()
   638  		defer child.childrenMu.Unlock()
   639  		if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
   640  			return err
   641  		}
   642  	}
   643  	flags := uint32(0)
   644  	// If a dentry exists, use it for best-effort checks on its deletability.
   645  	if dir {
   646  		if child != nil {
   647  			// child must be an empty directory.
   648  			if child.syntheticChildren != 0 { // +checklocksforce: child.childrenMu is held if child != nil.
   649  				// This is definitely not an empty directory, irrespective of
   650  				// fs.opts.interop.
   651  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
   652  				return linuxerr.ENOTEMPTY
   653  			}
   654  			// If InteropModeShared is in effect and the first call to
   655  			// PrepareDeleteDentry above succeeded, then child wasn't
   656  			// revalidated (so we can't expect its file type to be correct) and
   657  			// individually revalidating its children (to confirm that they
   658  			// still exist) would be a waste of time.
   659  			if child.cachedMetadataAuthoritative() {
   660  				if !child.isDir() {
   661  					vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   662  					return linuxerr.ENOTDIR
   663  				}
   664  				for _, grandchild := range child.children { // +checklocksforce: child.childrenMu is held if child != nil.
   665  					if grandchild != nil {
   666  						vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   667  						return linuxerr.ENOTEMPTY
   668  					}
   669  				}
   670  			}
   671  		}
   672  		flags = linux.AT_REMOVEDIR
   673  	} else {
   674  		// child must be a non-directory file.
   675  		if child != nil && child.isDir() {
   676  			vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   677  			return linuxerr.EISDIR
   678  		}
   679  		if rp.MustBeDir() {
   680  			if child != nil {
   681  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   682  			}
   683  			return linuxerr.ENOTDIR
   684  		}
   685  	}
   686  	if parent.isSynthetic() {
   687  		if child == nil {
   688  			return linuxerr.ENOENT
   689  		}
   690  	} else if child == nil || !child.isSynthetic() {
   691  		if err := parent.unlink(ctx, name, flags); err != nil {
   692  			if child != nil {
   693  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   694  			}
   695  			return err
   696  		}
   697  	}
   698  
   699  	// Generate inotify events for rmdir or unlink.
   700  	if dir {
   701  		parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */)
   702  	} else {
   703  		var cw *vfs.Watches
   704  		if child != nil {
   705  			cw = &child.watches
   706  		}
   707  		vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name)
   708  	}
   709  
   710  	parent.childrenMu.Lock()
   711  	defer parent.childrenMu.Unlock()
   712  
   713  	if child != nil {
   714  		toDecRef = vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
   715  		child.setDeleted()
   716  		if child.isSynthetic() {
   717  			parent.syntheticChildren--
   718  			child.decRefNoCaching()
   719  		}
   720  		ds = appendDentry(ds, child)
   721  	}
   722  	parent.cacheNegativeLookupLocked(name)
   723  	if parent.cachedMetadataAuthoritative() {
   724  		parent.clearDirentsLocked()
   725  		parent.touchCMtime()
   726  		if dir {
   727  			parent.decLinks()
   728  		}
   729  	}
   730  	return nil
   731  }
   732  
   733  // AccessAt implements vfs.Filesystem.Impl.AccessAt.
   734  func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
   735  	var ds *[]*dentry
   736  	fs.renameMu.RLock()
   737  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   738  	d, err := fs.resolveLocked(ctx, rp, &ds)
   739  	if err != nil {
   740  		return err
   741  	}
   742  	if err := d.checkPermissions(creds, ats); err != nil {
   743  		return err
   744  	}
   745  	if ats.MayWrite() && rp.Mount().ReadOnly() {
   746  		return linuxerr.EROFS
   747  	}
   748  	return nil
   749  }
   750  
   751  // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
   752  func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
   753  	var ds *[]*dentry
   754  	fs.renameMu.RLock()
   755  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   756  	d, err := fs.resolveLocked(ctx, rp, &ds)
   757  	if err != nil {
   758  		return nil, err
   759  	}
   760  	if opts.CheckSearchable {
   761  		if !d.isDir() {
   762  			return nil, linuxerr.ENOTDIR
   763  		}
   764  		if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   765  			return nil, err
   766  		}
   767  	}
   768  	d.IncRef()
   769  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   770  	ds = appendDentry(ds, d)
   771  	return &d.vfsd, nil
   772  }
   773  
   774  // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
   775  func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
   776  	var ds *[]*dentry
   777  	fs.renameMu.RLock()
   778  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   779  	start := rp.Start().Impl().(*dentry)
   780  	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   781  	if err != nil {
   782  		return nil, err
   783  	}
   784  	d.IncRef()
   785  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   786  	ds = appendDentry(ds, d)
   787  	return &d.vfsd, nil
   788  }
   789  
   790  // LinkAt implements vfs.FilesystemImpl.LinkAt.
   791  func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
   792  	err := fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   793  		if rp.Mount() != vd.Mount() {
   794  			return nil, linuxerr.EXDEV
   795  		}
   796  		d := vd.Dentry().Impl().(*dentry)
   797  		if d.isDir() {
   798  			return nil, linuxerr.EPERM
   799  		}
   800  		gid := auth.KGID(d.gid.Load())
   801  		uid := auth.KUID(d.uid.Load())
   802  		mode := linux.FileMode(d.mode.Load())
   803  		if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil {
   804  			return nil, err
   805  		}
   806  		if d.nlink.Load() == 0 {
   807  			return nil, linuxerr.ENOENT
   808  		}
   809  		if d.nlink.Load() == math.MaxUint32 {
   810  			return nil, linuxerr.EMLINK
   811  		}
   812  		if d.isSynthetic() {
   813  			// TODO(gvisor.dev/issue/6739): Add synthetic file hard link support.
   814  			return nil, linuxerr.EOPNOTSUPP
   815  		}
   816  		return parent.link(ctx, d, name)
   817  	}, nil)
   818  
   819  	if err == nil {
   820  		// Success!
   821  		vd.Dentry().Impl().(*dentry).incLinks()
   822  	}
   823  	return err
   824  }
   825  
   826  // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
   827  func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
   828  	creds := rp.Credentials()
   829  	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   830  		// If the parent is a setgid directory, use the parent's GID
   831  		// rather than the caller's and enable setgid.
   832  		kgid := creds.EffectiveKGID
   833  		mode := opts.Mode
   834  		if parent.mode.Load()&linux.S_ISGID != 0 {
   835  			kgid = auth.KGID(parent.gid.Load())
   836  			mode |= linux.S_ISGID
   837  		}
   838  
   839  		child, err := parent.mkdir(ctx, name, mode, creds.EffectiveKUID, kgid)
   840  		if err == nil {
   841  			if fs.opts.interop != InteropModeShared {
   842  				parent.incLinks()
   843  			}
   844  			return child, nil
   845  		}
   846  
   847  		if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
   848  			return nil, err
   849  		}
   850  		ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err)
   851  		child = fs.newSyntheticDentry(&createSyntheticOpts{
   852  			name: name,
   853  			mode: linux.S_IFDIR | opts.Mode,
   854  			kuid: creds.EffectiveKUID,
   855  			kgid: creds.EffectiveKGID,
   856  		})
   857  		if fs.opts.interop != InteropModeShared {
   858  			parent.incLinks()
   859  		}
   860  		return child, nil
   861  	}, func(parent *dentry, name string) (*dentry, error) {
   862  		if !opts.ForSyntheticMountpoint {
   863  			// Can't create non-synthetic files in synthetic directories.
   864  			return nil, linuxerr.EPERM
   865  		}
   866  		child := fs.newSyntheticDentry(&createSyntheticOpts{
   867  			name: name,
   868  			mode: linux.S_IFDIR | opts.Mode,
   869  			kuid: creds.EffectiveKUID,
   870  			kgid: creds.EffectiveKGID,
   871  		})
   872  		parent.incLinks()
   873  		return child, nil
   874  	})
   875  }
   876  
   877  // MknodAt implements vfs.FilesystemImpl.MknodAt.
   878  func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
   879  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   880  		creds := rp.Credentials()
   881  		if child, err := parent.mknod(ctx, name, creds, &opts); err == nil {
   882  			return child, nil
   883  		} else if !linuxerr.Equals(linuxerr.EPERM, err) {
   884  			return nil, err
   885  		}
   886  
   887  		// EPERM means that gofer does not allow creating a socket or pipe. Fallback
   888  		// to creating a synthetic one, i.e. one that is kept entirely in memory.
   889  
   890  		// Check that we're not overriding an existing file with a synthetic one.
   891  		_, _, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, ds) // +checklocksforce: parent.opMu taken by doCreateAt.
   892  		switch {
   893  		case err == nil:
   894  			// Step succeeded, another file exists.
   895  			return nil, linuxerr.EEXIST
   896  		case !linuxerr.Equals(linuxerr.ENOENT, err):
   897  			// Schrödinger. File/Cat may or may not exist.
   898  			return nil, err
   899  		}
   900  
   901  		switch opts.Mode.FileType() {
   902  		case linux.S_IFSOCK:
   903  			return fs.newSyntheticDentry(&createSyntheticOpts{
   904  				name:     name,
   905  				mode:     opts.Mode,
   906  				kuid:     creds.EffectiveKUID,
   907  				kgid:     creds.EffectiveKGID,
   908  				endpoint: opts.Endpoint,
   909  			}), nil
   910  		case linux.S_IFIFO:
   911  			return fs.newSyntheticDentry(&createSyntheticOpts{
   912  				name: name,
   913  				mode: opts.Mode,
   914  				kuid: creds.EffectiveKUID,
   915  				kgid: creds.EffectiveKGID,
   916  				pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize),
   917  			}), nil
   918  		}
   919  		// Retain error from gofer if synthetic file cannot be created internally.
   920  		return nil, linuxerr.EPERM
   921  	}, nil)
   922  }
   923  
   924  // OpenAt implements vfs.FilesystemImpl.OpenAt.
   925  func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   926  	// Reject O_TMPFILE, which is not supported; supporting it correctly in the
   927  	// presence of other remote filesystem users requires remote filesystem
   928  	// support, and it isn't clear that there's any way to implement this in
   929  	// 9P.
   930  	if opts.Flags&linux.O_TMPFILE != 0 {
   931  		return nil, linuxerr.EOPNOTSUPP
   932  	}
   933  	mayCreate := opts.Flags&linux.O_CREAT != 0
   934  	mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
   935  
   936  	var ds *[]*dentry
   937  	fs.renameMu.RLock()
   938  	unlocked := false
   939  	unlock := func() {
   940  		if !unlocked {
   941  			fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   942  			unlocked = true
   943  		}
   944  	}
   945  	defer unlock()
   946  
   947  	start := rp.Start().Impl().(*dentry)
   948  	if rp.Done() {
   949  		// Reject attempts to open mount root directory with O_CREAT.
   950  		if mayCreate && rp.MustBeDir() {
   951  			return nil, linuxerr.EISDIR
   952  		}
   953  		if mustCreate {
   954  			return nil, linuxerr.EEXIST
   955  		}
   956  		if !start.cachedMetadataAuthoritative() {
   957  			// Refresh dentry's attributes before opening.
   958  			if err := start.updateMetadata(ctx); err != nil {
   959  				return nil, err
   960  			}
   961  		}
   962  		start.IncRef()
   963  		defer start.DecRef(ctx)
   964  		unlock()
   965  		// start is intentionally not added to ds (which would remove it from the
   966  		// cache) because doing so regresses performance in practice.
   967  		return start.open(ctx, rp, &opts)
   968  	}
   969  
   970  afterTrailingSymlink:
   971  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   972  	if err != nil {
   973  		return nil, err
   974  	}
   975  	// Check for search permission in the parent directory.
   976  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   977  		return nil, err
   978  	}
   979  	// Reject attempts to open directories with O_CREAT.
   980  	if mayCreate && rp.MustBeDir() {
   981  		return nil, linuxerr.EISDIR
   982  	}
   983  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil {
   984  		return nil, err
   985  	}
   986  	// Determine whether or not we need to create a file.
   987  	// NOTE(b/263297063): Don't hold opMu for writing here, to avoid
   988  	// serializing OpenAt calls in the same directory in the common case
   989  	// that the file exists.
   990  	parent.opMu.RLock()
   991  	child, followedSymlink, err := fs.stepLocked(ctx, resolvingPathFull(rp), parent, true /* mayFollowSymlinks */, &ds)
   992  	parent.opMu.RUnlock()
   993  	if followedSymlink {
   994  		if mustCreate {
   995  			// EEXIST must be returned if an existing symlink is opened with O_EXCL.
   996  			return nil, linuxerr.EEXIST
   997  		}
   998  		if err != nil {
   999  			// If followedSymlink && err != nil, then this symlink resolution error
  1000  			// must be handled by the VFS layer.
  1001  			return nil, err
  1002  		}
  1003  		start = parent
  1004  		goto afterTrailingSymlink
  1005  	}
  1006  	if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
  1007  		if parent.isSynthetic() {
  1008  			return nil, linuxerr.EPERM
  1009  		}
  1010  
  1011  		// Take opMu for writing, but note that the file may have been
  1012  		// created by another goroutine since we checked for existence
  1013  		// a few lines ago. We must handle that case.
  1014  		parent.opMu.Lock()
  1015  		fd, createErr := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds)
  1016  		if !linuxerr.Equals(linuxerr.EEXIST, createErr) {
  1017  			// Either the creation was a success, or we got an
  1018  			// unexpected error. Either way we can return here.
  1019  			parent.opMu.Unlock()
  1020  			return fd, createErr
  1021  		}
  1022  
  1023  		// We raced, and now the file exists.
  1024  		if mustCreate {
  1025  			parent.opMu.Unlock()
  1026  			return nil, linuxerr.EEXIST
  1027  		}
  1028  
  1029  		// Step to the file again. Since we still hold opMu for
  1030  		// writing, there can't be a race here.
  1031  		child, _, err = fs.stepLocked(ctx, resolvingPathFull(rp), parent, false /* mayFollowSymlinks */, &ds)
  1032  		parent.opMu.Unlock()
  1033  	}
  1034  	if err != nil {
  1035  		return nil, err
  1036  	}
  1037  	if mustCreate {
  1038  		return nil, linuxerr.EEXIST
  1039  	}
  1040  	if rp.MustBeDir() && !child.isDir() {
  1041  		return nil, linuxerr.ENOTDIR
  1042  	}
  1043  	child.IncRef()
  1044  	defer child.DecRef(ctx)
  1045  	unlock()
  1046  	// child is intentionally not added to ds (which would remove it from the
  1047  	// cache) because doing so regresses performance in practice.
  1048  	return child.open(ctx, rp, &opts)
  1049  }
  1050  
  1051  // Preconditions: The caller must hold no locks (since opening pipes may block
  1052  // indefinitely).
  1053  func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1054  	ats := vfs.AccessTypesForOpenFlags(opts)
  1055  	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
  1056  		return nil, err
  1057  	}
  1058  
  1059  	if !d.isSynthetic() {
  1060  		// renameMu is locked here because it is required by d.openHandle(), which
  1061  		// is called by d.ensureSharedHandle() and d.openSpecialFile() below. It is
  1062  		// also required by d.connect() which is called by
  1063  		// d.openSocketByConnecting(). Note that opening non-synthetic pipes may
  1064  		// block, renameMu is unlocked separately in d.openSpecialFile() for pipes.
  1065  		d.fs.renameMu.RLock()
  1066  		defer d.fs.renameMu.RUnlock()
  1067  	}
  1068  
  1069  	trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG
  1070  	if trunc {
  1071  		// Lock metadataMu *while* we open a regular file with O_TRUNC because
  1072  		// open(2) will change the file size on server.
  1073  		d.metadataMu.Lock()
  1074  		defer d.metadataMu.Unlock()
  1075  	}
  1076  
  1077  	var vfd *vfs.FileDescription
  1078  	var err error
  1079  	mnt := rp.Mount()
  1080  	switch d.fileType() {
  1081  	case linux.S_IFREG:
  1082  		if !d.fs.opts.regularFilesUseSpecialFileFD {
  1083  			if err := d.ensureSharedHandle(ctx, ats.MayRead(), ats.MayWrite(), trunc); err != nil {
  1084  				return nil, err
  1085  			}
  1086  			fd, err := newRegularFileFD(mnt, d, opts.Flags)
  1087  			if err != nil {
  1088  				return nil, err
  1089  			}
  1090  			vfd = &fd.vfsfd
  1091  		}
  1092  	case linux.S_IFDIR:
  1093  		// Can't open directories with O_CREAT.
  1094  		if opts.Flags&linux.O_CREAT != 0 {
  1095  			return nil, linuxerr.EISDIR
  1096  		}
  1097  		// Can't open directories writably.
  1098  		if ats&vfs.MayWrite != 0 {
  1099  			return nil, linuxerr.EISDIR
  1100  		}
  1101  		if opts.Flags&linux.O_DIRECT != 0 {
  1102  			return nil, linuxerr.EINVAL
  1103  		}
  1104  		if !d.isSynthetic() {
  1105  			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
  1106  				return nil, err
  1107  			}
  1108  		}
  1109  		fd := &directoryFD{}
  1110  		fd.LockFD.Init(&d.locks)
  1111  		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
  1112  			return nil, err
  1113  		}
  1114  		if d.readFD.Load() >= 0 {
  1115  			fsmetric.GoferOpensHost.Increment()
  1116  		} else {
  1117  			fsmetric.GoferOpens9P.Increment()
  1118  		}
  1119  		return &fd.vfsfd, nil
  1120  	case linux.S_IFLNK:
  1121  		// Can't open symlinks without O_PATH, which is handled at the VFS layer.
  1122  		return nil, linuxerr.ELOOP
  1123  	case linux.S_IFSOCK:
  1124  		if d.isSynthetic() {
  1125  			return nil, linuxerr.ENXIO
  1126  		}
  1127  		if d.fs.iopts.OpenSocketsByConnecting {
  1128  			return d.openSocketByConnecting(ctx, opts)
  1129  		}
  1130  	case linux.S_IFIFO:
  1131  		if d.isSynthetic() {
  1132  			return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks)
  1133  		}
  1134  		if d.fs.opts.disableFifoOpen {
  1135  			return nil, linuxerr.EPERM
  1136  		}
  1137  	}
  1138  
  1139  	if vfd == nil {
  1140  		if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil {
  1141  			return nil, err
  1142  		}
  1143  	}
  1144  
  1145  	if trunc {
  1146  		// If no errors occurred so far then update file size in memory. This
  1147  		// step is required even if !d.cachedMetadataAuthoritative() because
  1148  		// d.mappings has to be updated.
  1149  		// d.metadataMu has already been acquired if trunc == true.
  1150  		d.updateSizeLocked(0)
  1151  
  1152  		if d.cachedMetadataAuthoritative() {
  1153  			d.touchCMtimeLocked()
  1154  		}
  1155  	}
  1156  	return vfd, err
  1157  }
  1158  
  1159  // Precondition: fs.renameMu is locked.
  1160  func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1161  	if opts.Flags&linux.O_DIRECT != 0 {
  1162  		return nil, linuxerr.EINVAL
  1163  	}
  1164  	// Note that special value of linux.SockType = 0 is interpreted by lisafs
  1165  	// as "do not care about the socket type". Analogous to p9.AnonymousSocket.
  1166  	sockFD, err := d.connect(ctx, 0 /* sockType */)
  1167  	if err != nil {
  1168  		return nil, err
  1169  	}
  1170  	fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), sockFD, &host.NewFDOptions{
  1171  		HaveFlags: true,
  1172  		Flags:     opts.Flags,
  1173  	})
  1174  	if err != nil {
  1175  		unix.Close(sockFD)
  1176  		return nil, err
  1177  	}
  1178  	return fd, nil
  1179  }
  1180  
  1181  // Preconditions:
  1182  //   - !d.isSynthetic().
  1183  //   - fs.renameMu is locked. It may be released temporarily while pipe blocks.
  1184  //   - If d is a pipe, no other locks (other than fs.renameMu) should be held.
  1185  func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1186  	ats := vfs.AccessTypesForOpenFlags(opts)
  1187  	if opts.Flags&linux.O_DIRECT != 0 && !d.isRegularFile() {
  1188  		return nil, linuxerr.EINVAL
  1189  	}
  1190  	// We assume that the server silently inserts O_NONBLOCK in the open flags
  1191  	// for all named pipes (because all existing gofers do this).
  1192  	//
  1193  	// NOTE(b/133875563): This makes named pipe opens racy, because the
  1194  	// mechanisms for translating nonblocking to blocking opens can only detect
  1195  	// the instantaneous presence of a peer holding the other end of the pipe
  1196  	// open, not whether the pipe was *previously* opened by a peer that has
  1197  	// since closed its end.
  1198  	isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0
  1199  retry:
  1200  	h, err := d.openHandle(ctx, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0)
  1201  	if err != nil {
  1202  		if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) {
  1203  			// An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails
  1204  			// with ENXIO if opening the same named pipe with O_WRONLY would
  1205  			// block because there are no readers of the pipe. Release renameMu
  1206  			// while blocking.
  1207  			d.fs.renameMu.RUnlock()
  1208  			err := sleepBetweenNamedPipeOpenChecks(ctx)
  1209  			d.fs.renameMu.RLock()
  1210  			if err != nil {
  1211  				return nil, err
  1212  			}
  1213  			goto retry
  1214  		}
  1215  		return nil, err
  1216  	}
  1217  	if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 {
  1218  		// Release renameMu while blocking.
  1219  		d.fs.renameMu.RUnlock()
  1220  		err := blockUntilNonblockingPipeHasWriter(ctx, h.fd)
  1221  		d.fs.renameMu.RLock()
  1222  		if err != nil {
  1223  			h.close(ctx)
  1224  			return nil, err
  1225  		}
  1226  	}
  1227  	fd, err := newSpecialFileFD(h, mnt, d, opts.Flags)
  1228  	if err != nil {
  1229  		h.close(ctx)
  1230  		return nil, err
  1231  	}
  1232  	return &fd.vfsfd, nil
  1233  }
  1234  
  1235  // Preconditions:
  1236  //   - d.fs.renameMu must be locked.
  1237  //   - d.opMu must be locked for writing.
  1238  //   - !d.isSynthetic().
  1239  //
  1240  // +checklocks:d.opMu
  1241  func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
  1242  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1243  		return nil, err
  1244  	}
  1245  	if d.isDeleted() {
  1246  		return nil, linuxerr.ENOENT
  1247  	}
  1248  	mnt := rp.Mount()
  1249  	if err := mnt.CheckBeginWrite(); err != nil {
  1250  		return nil, err
  1251  	}
  1252  	defer mnt.EndWrite()
  1253  
  1254  	creds := rp.Credentials()
  1255  	name := rp.Component()
  1256  	// If the parent is a setgid directory, use the parent's GID rather
  1257  	// than the caller's.
  1258  	kgid := creds.EffectiveKGID
  1259  	if d.mode.Load()&linux.S_ISGID != 0 {
  1260  		kgid = auth.KGID(d.gid.Load())
  1261  	}
  1262  
  1263  	child, h, err := d.openCreate(ctx, name, opts.Flags&linux.O_ACCMODE, opts.Mode, creds.EffectiveKUID, kgid)
  1264  	if err != nil {
  1265  		return nil, err
  1266  	}
  1267  
  1268  	// Incorporate the fid that was opened by lcreate.
  1269  	useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
  1270  	if useRegularFileFD {
  1271  		var readable, writable bool
  1272  		child.handleMu.Lock()
  1273  		if vfs.MayReadFileWithOpenFlags(opts.Flags) {
  1274  			readable = true
  1275  			if h.fd != -1 {
  1276  				child.readFD = atomicbitops.FromInt32(h.fd)
  1277  				child.mmapFD = atomicbitops.FromInt32(h.fd)
  1278  			}
  1279  		}
  1280  		if vfs.MayWriteFileWithOpenFlags(opts.Flags) {
  1281  			writable = true
  1282  			child.writeFD = atomicbitops.FromInt32(h.fd)
  1283  		}
  1284  		child.updateHandles(ctx, h, readable, writable)
  1285  		child.handleMu.Unlock()
  1286  	}
  1287  	// Insert the dentry into the tree.
  1288  	d.childrenMu.Lock()
  1289  	// We have d.opMu for writing, so there can not be a cached child with
  1290  	// this name.  We could not have raced.
  1291  	d.cacheNewChildLocked(child, name)
  1292  	appendNewChildDentry(ds, d, child)
  1293  	if d.cachedMetadataAuthoritative() {
  1294  		d.touchCMtime()
  1295  		d.clearDirentsLocked()
  1296  	}
  1297  	d.childrenMu.Unlock()
  1298  
  1299  	// Finally, construct a file description representing the created file.
  1300  	var childVFSFD *vfs.FileDescription
  1301  	if useRegularFileFD {
  1302  		fd, err := newRegularFileFD(mnt, child, opts.Flags)
  1303  		if err != nil {
  1304  			return nil, err
  1305  		}
  1306  		childVFSFD = &fd.vfsfd
  1307  	} else {
  1308  		fd, err := newSpecialFileFD(h, mnt, child, opts.Flags)
  1309  		if err != nil {
  1310  			h.close(ctx)
  1311  			return nil, err
  1312  		}
  1313  		childVFSFD = &fd.vfsfd
  1314  	}
  1315  	d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */)
  1316  	return childVFSFD, nil
  1317  }
  1318  
  1319  // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
  1320  func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
  1321  	var ds *[]*dentry
  1322  	fs.renameMu.RLock()
  1323  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1324  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1325  	if err != nil {
  1326  		return "", err
  1327  	}
  1328  	if !d.isSymlink() {
  1329  		return "", linuxerr.EINVAL
  1330  	}
  1331  	return d.readlink(ctx, rp.Mount())
  1332  }
  1333  
  1334  // RenameAt implements vfs.FilesystemImpl.RenameAt.
  1335  func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
  1336  	// Resolve newParent first to verify that it's on this Mount.
  1337  	var ds *[]*dentry
  1338  	fs.renameMu.Lock()
  1339  	// We need to DecRef outside of fs.mu because forgetting a dead mountpoint
  1340  	// could result in this filesystem being released which acquires fs.mu.
  1341  	var toDecRef []refs.RefCounter
  1342  	defer func() {
  1343  		for _, ref := range toDecRef {
  1344  			ref.DecRef(ctx)
  1345  		}
  1346  	}()
  1347  	defer fs.renameMuUnlockAndCheckCaching(ctx, &ds)
  1348  	newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
  1349  	if err != nil {
  1350  		return err
  1351  	}
  1352  
  1353  	if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
  1354  		return linuxerr.EINVAL
  1355  	}
  1356  	if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1357  		// Requires 9P support to synchronize with other remote filesystem
  1358  		// users.
  1359  		return linuxerr.EINVAL
  1360  	}
  1361  
  1362  	newName := rp.Component()
  1363  	if newName == "." || newName == ".." {
  1364  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1365  			return linuxerr.EEXIST
  1366  		}
  1367  		return linuxerr.EBUSY
  1368  	}
  1369  	if len(newName) > MaxFilenameLen {
  1370  		return linuxerr.ENAMETOOLONG
  1371  	}
  1372  	mnt := rp.Mount()
  1373  	if mnt != oldParentVD.Mount() {
  1374  		return linuxerr.EXDEV
  1375  	}
  1376  	if err := mnt.CheckBeginWrite(); err != nil {
  1377  		return err
  1378  	}
  1379  	defer mnt.EndWrite()
  1380  
  1381  	oldParent := oldParentVD.Dentry().Impl().(*dentry)
  1382  	if !oldParent.cachedMetadataAuthoritative() {
  1383  		if err := oldParent.updateMetadata(ctx); err != nil {
  1384  			return err
  1385  		}
  1386  	}
  1387  	creds := rp.Credentials()
  1388  	if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1389  		return err
  1390  	}
  1391  
  1392  	vfsObj := rp.VirtualFilesystem()
  1393  	if err := fs.revalidateOne(ctx, vfsObj, newParent, newName, &ds); err != nil {
  1394  		return err
  1395  	}
  1396  	if err := fs.revalidateOne(ctx, vfsObj, oldParent, oldName, &ds); err != nil {
  1397  		return err
  1398  	}
  1399  
  1400  	// We need a dentry representing the renamed file since, if it's a
  1401  	// directory, we need to check for write permission on it.
  1402  	oldParent.opMu.Lock()
  1403  	defer oldParent.opMu.Unlock()
  1404  	renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
  1405  	if err != nil {
  1406  		return err
  1407  	}
  1408  	if err := oldParent.mayDelete(creds, renamed); err != nil {
  1409  		return err
  1410  	}
  1411  	if renamed.isDir() {
  1412  		if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
  1413  			return linuxerr.EINVAL
  1414  		}
  1415  		if oldParent != newParent {
  1416  			if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
  1417  				return err
  1418  			}
  1419  		}
  1420  	} else {
  1421  		if opts.MustBeDir || rp.MustBeDir() {
  1422  			return linuxerr.ENOTDIR
  1423  		}
  1424  	}
  1425  
  1426  	if oldParent != newParent {
  1427  		if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1428  			return err
  1429  		}
  1430  		newParent.opMu.Lock()
  1431  		defer newParent.opMu.Unlock()
  1432  	}
  1433  	if newParent.isDeleted() {
  1434  		return linuxerr.ENOENT
  1435  	}
  1436  	replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds) // +checklocksforce: newParent.opMu taken if newParent != oldParent.
  1437  	if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
  1438  		return err
  1439  	}
  1440  	var replacedVFSD *vfs.Dentry
  1441  	if replaced != nil {
  1442  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1443  			return linuxerr.EEXIST
  1444  		}
  1445  		replacedVFSD = &replaced.vfsd
  1446  		if replaced.isDir() {
  1447  			if !renamed.isDir() {
  1448  				return linuxerr.EISDIR
  1449  			}
  1450  			if genericIsAncestorDentry(replaced, renamed) {
  1451  				return linuxerr.ENOTEMPTY
  1452  			}
  1453  		} else {
  1454  			if rp.MustBeDir() || renamed.isDir() {
  1455  				return linuxerr.ENOTDIR
  1456  			}
  1457  		}
  1458  	}
  1459  
  1460  	if oldParent == newParent && oldName == newName {
  1461  		return nil
  1462  	}
  1463  	mntns := vfs.MountNamespaceFromContext(ctx)
  1464  	defer mntns.DecRef(ctx)
  1465  	if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil {
  1466  		return err
  1467  	}
  1468  
  1469  	// Update the remote filesystem.
  1470  	if !renamed.isSynthetic() {
  1471  		if err := oldParent.rename(ctx, oldName, newParent, newName); err != nil {
  1472  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1473  			return err
  1474  		}
  1475  	} else if replaced != nil && !replaced.isSynthetic() {
  1476  		// We are replacing an existing real file with a synthetic one, so we
  1477  		// need to unlink the former.
  1478  		flags := uint32(0)
  1479  		if replaced.isDir() {
  1480  			flags = linux.AT_REMOVEDIR
  1481  		}
  1482  		if err := newParent.unlink(ctx, newName, flags); err != nil {
  1483  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1484  			return err
  1485  		}
  1486  	}
  1487  
  1488  	// Update the dentry tree.
  1489  	newParent.childrenMu.Lock()
  1490  	defer newParent.childrenMu.Unlock()
  1491  	if oldParent != newParent {
  1492  		oldParent.childrenMu.Lock()
  1493  		defer oldParent.childrenMu.Unlock()
  1494  	}
  1495  
  1496  	toDecRef = vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD)
  1497  	if replaced != nil {
  1498  		replaced.setDeleted()
  1499  		if replaced.isSynthetic() {
  1500  			newParent.syntheticChildren--
  1501  			replaced.decRefNoCaching()
  1502  		}
  1503  		ds = appendDentry(ds, replaced)
  1504  		// Remove the replaced entry from its parent's cache.
  1505  		delete(newParent.children, newName)
  1506  	}
  1507  	oldParent.cacheNegativeLookupLocked(oldName) // +checklocksforce: oldParent.childrenMu is held if oldParent != newParent.
  1508  	if renamed.isSynthetic() {
  1509  		oldParent.syntheticChildren--
  1510  		newParent.syntheticChildren++
  1511  	}
  1512  	// We have d.opMu for writing, so no need to check for existence of a
  1513  	// child with the given name. We could not have raced.
  1514  	newParent.cacheNewChildLocked(renamed, newName)
  1515  	oldParent.decRefNoCaching()
  1516  	if oldParent != newParent {
  1517  		ds = appendDentry(ds, newParent)
  1518  		ds = appendDentry(ds, oldParent)
  1519  	}
  1520  
  1521  	// Update metadata.
  1522  	if renamed.cachedMetadataAuthoritative() {
  1523  		renamed.touchCtime()
  1524  	}
  1525  	if oldParent.cachedMetadataAuthoritative() {
  1526  		oldParent.clearDirentsLocked()
  1527  		oldParent.touchCMtime()
  1528  		if renamed.isDir() {
  1529  			oldParent.decLinks()
  1530  		}
  1531  	}
  1532  	if newParent.cachedMetadataAuthoritative() {
  1533  		newParent.clearDirentsLocked()
  1534  		newParent.touchCMtime()
  1535  		if renamed.isDir() && (replaced == nil || !replaced.isDir()) {
  1536  			// Increase the link count if we did not replace another directory.
  1537  			newParent.incLinks()
  1538  		}
  1539  	}
  1540  	vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir())
  1541  	return nil
  1542  }
  1543  
  1544  // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
  1545  func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1546  	return fs.unlinkAt(ctx, rp, true /* dir */)
  1547  }
  1548  
  1549  // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
  1550  func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
  1551  	var ds *[]*dentry
  1552  	fs.renameMu.RLock()
  1553  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1554  	if err != nil {
  1555  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1556  		return err
  1557  	}
  1558  	err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount())
  1559  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1560  	if err != nil {
  1561  		return err
  1562  	}
  1563  
  1564  	if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
  1565  		d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
  1566  	}
  1567  	return nil
  1568  }
  1569  
  1570  // StatAt implements vfs.FilesystemImpl.StatAt.
  1571  func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
  1572  	var ds *[]*dentry
  1573  	fs.renameMu.RLock()
  1574  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1575  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1576  	if err != nil {
  1577  		return linux.Statx{}, err
  1578  	}
  1579  	// Since walking updates metadata for all traversed dentries under
  1580  	// InteropModeShared, including the returned one, we can return cached
  1581  	// metadata here regardless of fs.opts.interop.
  1582  	var stat linux.Statx
  1583  	d.statTo(&stat)
  1584  	return stat, nil
  1585  }
  1586  
  1587  // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
  1588  func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
  1589  	var ds *[]*dentry
  1590  	fs.renameMu.RLock()
  1591  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1592  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1593  	if err != nil {
  1594  		return linux.Statfs{}, err
  1595  	}
  1596  	// If d is synthetic, invoke statfs on the first ancestor of d that isn't.
  1597  	for d.isSynthetic() {
  1598  		d = d.parent.Load()
  1599  	}
  1600  	statfs, err := d.statfs(ctx)
  1601  	if err != nil {
  1602  		return linux.Statfs{}, err
  1603  	}
  1604  	if statfs.NameLength == 0 || statfs.NameLength > MaxFilenameLen {
  1605  		statfs.NameLength = MaxFilenameLen
  1606  	}
  1607  	// This is primarily for distinguishing a gofer file system in
  1608  	// tests. Testing is important, so instead of defining
  1609  	// something completely random, use a standard value.
  1610  	statfs.Type = linux.V9FS_MAGIC
  1611  	return statfs, nil
  1612  }
  1613  
  1614  // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
  1615  func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
  1616  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
  1617  		child, err := parent.symlink(ctx, name, target, rp.Credentials())
  1618  		if err != nil {
  1619  			return nil, err
  1620  		}
  1621  		if parent.fs.opts.interop != InteropModeShared {
  1622  			// Cache the symlink target on creation. In practice, this helps avoid a
  1623  			// lot of ReadLink RPCs. Note that when InteropModeShared is in effect,
  1624  			// we are forced to make Readlink RPCs. Because in this mode, we use host
  1625  			// timestamps, not timestamps based on our internal clock. And readlink
  1626  			// updates the atime on the host.
  1627  			child.haveTarget = true
  1628  			child.target = target
  1629  		}
  1630  		return child, nil
  1631  	}, nil)
  1632  }
  1633  
  1634  // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
  1635  func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1636  	return fs.unlinkAt(ctx, rp, false /* dir */)
  1637  }
  1638  
  1639  // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
  1640  func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
  1641  	var ds *[]*dentry
  1642  	fs.renameMu.RLock()
  1643  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1644  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1645  	if err != nil {
  1646  		return nil, err
  1647  	}
  1648  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1649  		return nil, err
  1650  	}
  1651  	if !d.isSocket() {
  1652  		return nil, linuxerr.ECONNREFUSED
  1653  	}
  1654  	if d.endpoint != nil {
  1655  		return d.endpoint, nil
  1656  	}
  1657  	if !d.isSynthetic() {
  1658  		d.IncRef()
  1659  		ds = appendDentry(ds, d)
  1660  		return &endpoint{
  1661  			dentry: d,
  1662  			path:   opts.Addr,
  1663  		}, nil
  1664  	}
  1665  	return nil, linuxerr.ECONNREFUSED
  1666  }
  1667  
  1668  // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
  1669  func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
  1670  	var ds *[]*dentry
  1671  	fs.renameMu.RLock()
  1672  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1673  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1674  	if err != nil {
  1675  		return nil, err
  1676  	}
  1677  	return d.listXattr(ctx, size)
  1678  }
  1679  
  1680  // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
  1681  func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
  1682  	var ds *[]*dentry
  1683  	fs.renameMu.RLock()
  1684  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1685  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1686  	if err != nil {
  1687  		return "", err
  1688  	}
  1689  	return d.getXattr(ctx, rp.Credentials(), &opts)
  1690  }
  1691  
  1692  // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
  1693  func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
  1694  	var ds *[]*dentry
  1695  	fs.renameMu.RLock()
  1696  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1697  	if err != nil {
  1698  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1699  		return err
  1700  	}
  1701  	err = d.setXattr(ctx, rp.Credentials(), &opts)
  1702  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1703  	if err != nil {
  1704  		return err
  1705  	}
  1706  
  1707  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1708  	return nil
  1709  }
  1710  
  1711  // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
  1712  func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
  1713  	var ds *[]*dentry
  1714  	fs.renameMu.RLock()
  1715  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1716  	if err != nil {
  1717  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1718  		return err
  1719  	}
  1720  	err = d.removeXattr(ctx, rp.Credentials(), name)
  1721  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1722  	if err != nil {
  1723  		return err
  1724  	}
  1725  
  1726  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1727  	return nil
  1728  }
  1729  
  1730  // PrependPath implements vfs.FilesystemImpl.PrependPath.
  1731  func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
  1732  	fs.renameMu.RLock()
  1733  	defer fs.renameMu.RUnlock()
  1734  	return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
  1735  }
  1736  
  1737  type mopt struct {
  1738  	key   string
  1739  	value any
  1740  }
  1741  
  1742  func (m mopt) String() string {
  1743  	if m.value == nil {
  1744  		return fmt.Sprintf("%s", m.key)
  1745  	}
  1746  	return fmt.Sprintf("%s=%v", m.key, m.value)
  1747  }
  1748  
  1749  // MountOptions implements vfs.FilesystemImpl.MountOptions.
  1750  func (fs *filesystem) MountOptions() string {
  1751  	optsKV := []mopt{
  1752  		{moptTransport, transportModeFD}, // Only valid value, currently.
  1753  		{moptReadFD, fs.opts.fd},         // Currently, read and write FD are the same.
  1754  		{moptWriteFD, fs.opts.fd},        // Currently, read and write FD are the same.
  1755  		{moptAname, fs.opts.aname},
  1756  		{moptDfltUID, fs.opts.dfltuid},
  1757  		{moptDfltGID, fs.opts.dfltgid},
  1758  	}
  1759  
  1760  	switch fs.opts.interop {
  1761  	case InteropModeExclusive:
  1762  		optsKV = append(optsKV, mopt{moptCache, cacheFSCache})
  1763  	case InteropModeWritethrough:
  1764  		optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough})
  1765  	case InteropModeShared:
  1766  		optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating})
  1767  	}
  1768  	if fs.opts.regularFilesUseSpecialFileFD {
  1769  		optsKV = append(optsKV, mopt{moptDisableFileHandleSharing, nil})
  1770  	}
  1771  	if fs.opts.disableFifoOpen {
  1772  		optsKV = append(optsKV, mopt{moptDisableFifoOpen, nil})
  1773  	}
  1774  	if fs.opts.forcePageCache {
  1775  		optsKV = append(optsKV, mopt{moptForcePageCache, nil})
  1776  	}
  1777  	if fs.opts.limitHostFDTranslation {
  1778  		optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil})
  1779  	}
  1780  	if fs.opts.overlayfsStaleRead {
  1781  		optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil})
  1782  	}
  1783  	if fs.opts.directfs.enabled {
  1784  		optsKV = append(optsKV, mopt{moptDirectfs, nil})
  1785  	}
  1786  
  1787  	opts := make([]string, 0, len(optsKV))
  1788  	for _, opt := range optsKV {
  1789  		opts = append(opts, opt.String())
  1790  	}
  1791  	return strings.Join(opts, ",")
  1792  }
  1793  
  1794  // IsDescendant implements vfs.FilesystemImpl.IsDescendant.
  1795  func (fs *filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool {
  1796  	return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*dentry))
  1797  }