github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/filesystem.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"strings"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/context"
    26  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    27  	"github.com/SagerNet/gvisor/pkg/fspath"
    28  	"github.com/SagerNet/gvisor/pkg/p9"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/host"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/fsmetric"
    31  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    32  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    33  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe"
    34  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    35  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    36  	"github.com/SagerNet/gvisor/pkg/syserror"
    37  )
    38  
    39  // Sync implements vfs.FilesystemImpl.Sync.
    40  func (fs *filesystem) Sync(ctx context.Context) error {
    41  	// Snapshot current syncable dentries and special file FDs.
    42  	fs.renameMu.RLock()
    43  	fs.syncMu.Lock()
    44  	ds := make([]*dentry, 0, len(fs.syncableDentries))
    45  	for d := range fs.syncableDentries {
    46  		// It's safe to use IncRef here even though fs.syncableDentries doesn't
    47  		// hold references since we hold fs.renameMu. Note that we can't use
    48  		// TryIncRef since cached dentries at zero references should still be
    49  		// synced.
    50  		d.IncRef()
    51  		ds = append(ds, d)
    52  	}
    53  	fs.renameMu.RUnlock()
    54  	sffds := make([]*specialFileFD, 0, len(fs.specialFileFDs))
    55  	for sffd := range fs.specialFileFDs {
    56  		// As above, fs.specialFileFDs doesn't hold references. However, unlike
    57  		// dentries, an FD that has reached zero references can't be
    58  		// resurrected, so we can use TryIncRef.
    59  		if sffd.vfsfd.TryIncRef() {
    60  			sffds = append(sffds, sffd)
    61  		}
    62  	}
    63  	fs.syncMu.Unlock()
    64  
    65  	// Return the first error we encounter, but sync everything we can
    66  	// regardless.
    67  	var retErr error
    68  
    69  	// Sync syncable dentries.
    70  	for _, d := range ds {
    71  		err := d.syncCachedFile(ctx, true /* forFilesystemSync */)
    72  		d.DecRef(ctx)
    73  		if err != nil {
    74  			ctx.Infof("gofer.filesystem.Sync: dentry.syncCachedFile failed: %v", err)
    75  			if retErr == nil {
    76  				retErr = err
    77  			}
    78  		}
    79  	}
    80  
    81  	// Sync special files, which may be writable but do not use dentry shared
    82  	// handles (so they won't be synced by the above).
    83  	for _, sffd := range sffds {
    84  		err := sffd.sync(ctx, true /* forFilesystemSync */)
    85  		sffd.vfsfd.DecRef(ctx)
    86  		if err != nil {
    87  			ctx.Infof("gofer.filesystem.Sync: specialFileFD.sync failed: %v", err)
    88  			if retErr == nil {
    89  				retErr = err
    90  			}
    91  		}
    92  	}
    93  
    94  	return retErr
    95  }
    96  
    97  // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
    98  // encoding of strings, which uses 2 bytes for the length prefix.
    99  const maxFilenameLen = (1 << 16) - 1
   100  
   101  // dentrySlicePool is a pool of *[]*dentry used to store dentries for which
   102  // dentry.checkCachingLocked() must be called. The pool holds pointers to
   103  // slices because Go lacks generics, so sync.Pool operates on interface{}, so
   104  // every call to (what should be) sync.Pool<[]*dentry>.Put() allocates a copy
   105  // of the slice header on the heap.
   106  var dentrySlicePool = sync.Pool{
   107  	New: func() interface{} {
   108  		ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
   109  		return &ds
   110  	},
   111  }
   112  
   113  func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
   114  	if ds == nil {
   115  		ds = dentrySlicePool.Get().(*[]*dentry)
   116  	}
   117  	*ds = append(*ds, d)
   118  	return ds
   119  }
   120  
   121  // Precondition: !parent.isSynthetic() && !child.isSynthetic().
   122  func appendNewChildDentry(ds **[]*dentry, parent *dentry, child *dentry) {
   123  	// The new child was added to parent and took a ref on the parent (hence
   124  	// parent can be removed from cache). A new child has 0 refs for now. So
   125  	// checkCachingLocked() should be called on both. Call it first on the parent
   126  	// as it may create space in the cache for child to be inserted - hence
   127  	// avoiding a cache eviction.
   128  	*ds = appendDentry(*ds, parent)
   129  	*ds = appendDentry(*ds, child)
   130  }
   131  
   132  // Preconditions: ds != nil.
   133  func putDentrySlice(ds *[]*dentry) {
   134  	// Allow dentries to be GC'd.
   135  	for i := range *ds {
   136  		(*ds)[i] = nil
   137  	}
   138  	*ds = (*ds)[:0]
   139  	dentrySlicePool.Put(ds)
   140  }
   141  
   142  // renameMuRUnlockAndCheckCaching calls fs.renameMu.RUnlock(), then calls
   143  // dentry.checkCachingLocked on all dentries in *dsp with fs.renameMu locked
   144  // for writing.
   145  //
   146  // dsp is a pointer-to-pointer since defer evaluates its arguments immediately,
   147  // but dentry slices are allocated lazily, and it's much easier to say "defer
   148  // fs.renameMuRUnlockAndCheckCaching(&ds)" than "defer func() {
   149  // fs.renameMuRUnlockAndCheckCaching(ds) }()" to work around this.
   150  // +checklocksrelease:fs.renameMu
   151  func (fs *filesystem) renameMuRUnlockAndCheckCaching(ctx context.Context, dsp **[]*dentry) {
   152  	fs.renameMu.RUnlock()
   153  	if *dsp == nil {
   154  		return
   155  	}
   156  	ds := **dsp
   157  	for _, d := range ds {
   158  		d.checkCachingLocked(ctx, false /* renameMuWriteLocked */)
   159  	}
   160  	putDentrySlice(*dsp)
   161  }
   162  
   163  // +checklocksrelease:fs.renameMu
   164  func (fs *filesystem) renameMuUnlockAndCheckCaching(ctx context.Context, ds **[]*dentry) {
   165  	if *ds == nil {
   166  		fs.renameMu.Unlock()
   167  		return
   168  	}
   169  	for _, d := range **ds {
   170  		d.checkCachingLocked(ctx, true /* renameMuWriteLocked */)
   171  	}
   172  	fs.renameMu.Unlock()
   173  	putDentrySlice(*ds)
   174  }
   175  
   176  // stepLocked resolves rp.Component() to an existing file, starting from the
   177  // given directory.
   178  //
   179  // Dentries which may become cached as a result of the traversal are appended
   180  // to *ds.
   181  //
   182  // Preconditions:
   183  // * fs.renameMu must be locked.
   184  // * d.dirMu must be locked.
   185  // * !rp.Done().
   186  // * If !d.cachedMetadataAuthoritative(), then d and all children that are
   187  //   part of rp must have been revalidated.
   188  //
   189  // Postconditions: The returned dentry's cached metadata is up to date.
   190  func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, bool, error) {
   191  	if !d.isDir() {
   192  		return nil, false, syserror.ENOTDIR
   193  	}
   194  	if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   195  		return nil, false, err
   196  	}
   197  	followedSymlink := false
   198  afterSymlink:
   199  	name := rp.Component()
   200  	if name == "." {
   201  		rp.Advance()
   202  		return d, followedSymlink, nil
   203  	}
   204  	if name == ".." {
   205  		if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
   206  			return nil, false, err
   207  		} else if isRoot || d.parent == nil {
   208  			rp.Advance()
   209  			return d, followedSymlink, nil
   210  		}
   211  		if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
   212  			return nil, false, err
   213  		}
   214  		rp.Advance()
   215  		return d.parent, followedSymlink, nil
   216  	}
   217  	child, err := fs.getChildLocked(ctx, d, name, ds)
   218  	if err != nil {
   219  		return nil, false, err
   220  	}
   221  	if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
   222  		return nil, false, err
   223  	}
   224  	if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
   225  		target, err := child.readlink(ctx, rp.Mount())
   226  		if err != nil {
   227  			return nil, false, err
   228  		}
   229  		if err := rp.HandleSymlink(target); err != nil {
   230  			return nil, false, err
   231  		}
   232  		followedSymlink = true
   233  		goto afterSymlink // don't check the current directory again
   234  	}
   235  	rp.Advance()
   236  	return child, followedSymlink, nil
   237  }
   238  
   239  // getChildLocked returns a dentry representing the child of parent with the
   240  // given name. Returns ENOENT if the child doesn't exist.
   241  //
   242  // Preconditions:
   243  // * fs.renameMu must be locked.
   244  // * parent.dirMu must be locked.
   245  // * parent.isDir().
   246  // * name is not "." or "..".
   247  // * dentry at name has been revalidated
   248  func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, error) {
   249  	if len(name) > maxFilenameLen {
   250  		return nil, linuxerr.ENAMETOOLONG
   251  	}
   252  	if child, ok := parent.children[name]; ok || parent.isSynthetic() {
   253  		if child == nil {
   254  			return nil, syserror.ENOENT
   255  		}
   256  		return child, nil
   257  	}
   258  
   259  	qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
   260  	if err != nil {
   261  		if linuxerr.Equals(linuxerr.ENOENT, err) {
   262  			parent.cacheNegativeLookupLocked(name)
   263  		}
   264  		return nil, err
   265  	}
   266  
   267  	// Create a new dentry representing the file.
   268  	child, err := fs.newDentry(ctx, file, qid, attrMask, &attr)
   269  	if err != nil {
   270  		file.close(ctx)
   271  		delete(parent.children, name)
   272  		return nil, err
   273  	}
   274  	parent.cacheNewChildLocked(child, name)
   275  	appendNewChildDentry(ds, parent, child)
   276  	return child, nil
   277  }
   278  
   279  // walkParentDirLocked resolves all but the last path component of rp to an
   280  // existing directory, starting from the given directory (which is usually
   281  // rp.Start().Impl().(*dentry)). It does not check that the returned directory
   282  // is searchable by the provider of rp.
   283  //
   284  // Preconditions:
   285  // * fs.renameMu must be locked.
   286  // * !rp.Done().
   287  // * If !d.cachedMetadataAuthoritative(), then d's cached metadata must be up
   288  //   to date.
   289  func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
   290  	if err := fs.revalidateParentDir(ctx, rp, d, ds); err != nil {
   291  		return nil, err
   292  	}
   293  	for !rp.Final() {
   294  		d.dirMu.Lock()
   295  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   296  		d.dirMu.Unlock()
   297  		if err != nil {
   298  			return nil, err
   299  		}
   300  		d = next
   301  		if followedSymlink {
   302  			if err := fs.revalidateParentDir(ctx, rp, d, ds); err != nil {
   303  				return nil, err
   304  			}
   305  		}
   306  	}
   307  	if !d.isDir() {
   308  		return nil, syserror.ENOTDIR
   309  	}
   310  	return d, nil
   311  }
   312  
   313  // resolveLocked resolves rp to an existing file.
   314  //
   315  // Preconditions: fs.renameMu must be locked.
   316  func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
   317  	d := rp.Start().Impl().(*dentry)
   318  	if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   319  		return nil, err
   320  	}
   321  	for !rp.Done() {
   322  		d.dirMu.Lock()
   323  		next, followedSymlink, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   324  		d.dirMu.Unlock()
   325  		if err != nil {
   326  			return nil, err
   327  		}
   328  		d = next
   329  		if followedSymlink {
   330  			if err := fs.revalidatePath(ctx, rp, d, ds); err != nil {
   331  				return nil, err
   332  			}
   333  		}
   334  	}
   335  	if rp.MustBeDir() && !d.isDir() {
   336  		return nil, syserror.ENOTDIR
   337  	}
   338  	return d, nil
   339  }
   340  
   341  // doCreateAt checks that creating a file at rp is permitted, then invokes
   342  // createInRemoteDir (if the parent directory is a real remote directory) or
   343  // createInSyntheticDir (if the parent directory is synthetic) to do so.
   344  //
   345  // Preconditions:
   346  // * !rp.Done().
   347  // * For the final path component in rp, !rp.ShouldFollowSymlink().
   348  func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, createInRemoteDir func(parent *dentry, name string, ds **[]*dentry) error, createInSyntheticDir func(parent *dentry, name string) error) error {
   349  	var ds *[]*dentry
   350  	fs.renameMu.RLock()
   351  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   352  	start := rp.Start().Impl().(*dentry)
   353  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	// Order of checks is important. First check if parent directory can be
   359  	// executed, then check for existence, and lastly check if mount is writable.
   360  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   361  		return err
   362  	}
   363  	name := rp.Component()
   364  	if name == "." || name == ".." {
   365  		return syserror.EEXIST
   366  	}
   367  	if parent.isDeleted() {
   368  		return syserror.ENOENT
   369  	}
   370  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, name, &ds); err != nil {
   371  		return err
   372  	}
   373  
   374  	parent.dirMu.Lock()
   375  	defer parent.dirMu.Unlock()
   376  
   377  	if len(name) > maxFilenameLen {
   378  		return linuxerr.ENAMETOOLONG
   379  	}
   380  	// Check for existence only if caching information is available. Otherwise,
   381  	// don't check for existence just yet. We will check for existence if the
   382  	// checks for writability fail below. Existence check is done by the creation
   383  	// RPCs themselves.
   384  	if child, ok := parent.children[name]; ok && child != nil {
   385  		return syserror.EEXIST
   386  	}
   387  	checkExistence := func() error {
   388  		if child, err := fs.getChildLocked(ctx, parent, name, &ds); err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
   389  			return err
   390  		} else if child != nil {
   391  			return syserror.EEXIST
   392  		}
   393  		return nil
   394  	}
   395  
   396  	mnt := rp.Mount()
   397  	if err := mnt.CheckBeginWrite(); err != nil {
   398  		// Existence check takes precedence.
   399  		if existenceErr := checkExistence(); existenceErr != nil {
   400  			return existenceErr
   401  		}
   402  		return err
   403  	}
   404  	defer mnt.EndWrite()
   405  
   406  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
   407  		// Existence check takes precedence.
   408  		if existenceErr := checkExistence(); existenceErr != nil {
   409  			return existenceErr
   410  		}
   411  		return err
   412  	}
   413  	if !dir && rp.MustBeDir() {
   414  		return syserror.ENOENT
   415  	}
   416  	if parent.isSynthetic() {
   417  		if createInSyntheticDir == nil {
   418  			return linuxerr.EPERM
   419  		}
   420  		if err := createInSyntheticDir(parent, name); err != nil {
   421  			return err
   422  		}
   423  		parent.touchCMtime()
   424  		parent.dirents = nil
   425  		ev := linux.IN_CREATE
   426  		if dir {
   427  			ev |= linux.IN_ISDIR
   428  		}
   429  		parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   430  		return nil
   431  	}
   432  	// No cached dentry exists; however, in InteropModeShared there might still be
   433  	// an existing file at name. Just attempt the file creation RPC anyways. If a
   434  	// file does exist, the RPC will fail with EEXIST like we would have.
   435  	if err := createInRemoteDir(parent, name, &ds); err != nil {
   436  		return err
   437  	}
   438  	if fs.opts.interop != InteropModeShared {
   439  		if child, ok := parent.children[name]; ok && child == nil {
   440  			// Delete the now-stale negative dentry.
   441  			delete(parent.children, name)
   442  		}
   443  		parent.touchCMtime()
   444  		parent.dirents = nil
   445  	}
   446  	ev := linux.IN_CREATE
   447  	if dir {
   448  		ev |= linux.IN_ISDIR
   449  	}
   450  	parent.watches.Notify(ctx, name, uint32(ev), 0, vfs.InodeEvent, false /* unlinked */)
   451  	return nil
   452  }
   453  
   454  // Preconditions: !rp.Done().
   455  func (fs *filesystem) unlinkAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool) error {
   456  	var ds *[]*dentry
   457  	fs.renameMu.RLock()
   458  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   459  	start := rp.Start().Impl().(*dentry)
   460  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   461  	if err != nil {
   462  		return err
   463  	}
   464  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
   465  		return err
   466  	}
   467  	if err := rp.Mount().CheckBeginWrite(); err != nil {
   468  		return err
   469  	}
   470  	defer rp.Mount().EndWrite()
   471  
   472  	name := rp.Component()
   473  	if dir {
   474  		if name == "." {
   475  			return linuxerr.EINVAL
   476  		}
   477  		if name == ".." {
   478  			return linuxerr.ENOTEMPTY
   479  		}
   480  	} else {
   481  		if name == "." || name == ".." {
   482  			return syserror.EISDIR
   483  		}
   484  	}
   485  
   486  	vfsObj := rp.VirtualFilesystem()
   487  	if err := fs.revalidateOne(ctx, vfsObj, parent, rp.Component(), &ds); err != nil {
   488  		return err
   489  	}
   490  
   491  	mntns := vfs.MountNamespaceFromContext(ctx)
   492  	defer mntns.DecRef(ctx)
   493  
   494  	parent.dirMu.Lock()
   495  	defer parent.dirMu.Unlock()
   496  
   497  	// Load child if sticky bit is set because we need to determine whether
   498  	// deletion is allowed.
   499  	var child *dentry
   500  	if atomic.LoadUint32(&parent.mode)&linux.ModeSticky == 0 {
   501  		var ok bool
   502  		child, ok = parent.children[name]
   503  		if ok && child == nil {
   504  			// Hit a negative cached entry, child doesn't exist.
   505  			return syserror.ENOENT
   506  		}
   507  	} else {
   508  		child, _, err = fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
   509  		if err != nil {
   510  			return err
   511  		}
   512  		if err := parent.mayDelete(rp.Credentials(), child); err != nil {
   513  			return err
   514  		}
   515  	}
   516  
   517  	// If a child dentry exists, prepare to delete it. This should fail if it is
   518  	// a mount point. We detect mount points by speculatively calling
   519  	// PrepareDeleteDentry, which fails if child is a mount point.
   520  	//
   521  	// Also note that if child is nil, then it can't be a mount point.
   522  	if child != nil {
   523  		// Hold child.dirMu so we can check child.children and
   524  		// child.syntheticChildren. We don't access these fields until a bit later,
   525  		// but locking child.dirMu after calling vfs.PrepareDeleteDentry() would
   526  		// create an inconsistent lock ordering between dentry.dirMu and
   527  		// vfs.Dentry.mu (in the VFS lock order, it would make dentry.dirMu both "a
   528  		// FilesystemImpl lock" and "a lock acquired by a FilesystemImpl between
   529  		// PrepareDeleteDentry and CommitDeleteDentry). To avoid this, lock
   530  		// child.dirMu before calling PrepareDeleteDentry.
   531  		child.dirMu.Lock()
   532  		defer child.dirMu.Unlock()
   533  		if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
   534  			return err
   535  		}
   536  	}
   537  	flags := uint32(0)
   538  	// If a dentry exists, use it for best-effort checks on its deletability.
   539  	if dir {
   540  		if child != nil {
   541  			// child must be an empty directory.
   542  			if child.syntheticChildren != 0 {
   543  				// This is definitely not an empty directory, irrespective of
   544  				// fs.opts.interop.
   545  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: PrepareDeleteDentry called if child != nil.
   546  				return linuxerr.ENOTEMPTY
   547  			}
   548  			// If InteropModeShared is in effect and the first call to
   549  			// PrepareDeleteDentry above succeeded, then child wasn't
   550  			// revalidated (so we can't expect its file type to be correct) and
   551  			// individually revalidating its children (to confirm that they
   552  			// still exist) would be a waste of time.
   553  			if child.cachedMetadataAuthoritative() {
   554  				if !child.isDir() {
   555  					vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   556  					return syserror.ENOTDIR
   557  				}
   558  				for _, grandchild := range child.children {
   559  					if grandchild != nil {
   560  						vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   561  						return linuxerr.ENOTEMPTY
   562  					}
   563  				}
   564  			}
   565  		}
   566  		flags = linux.AT_REMOVEDIR
   567  	} else {
   568  		// child must be a non-directory file.
   569  		if child != nil && child.isDir() {
   570  			vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   571  			return syserror.EISDIR
   572  		}
   573  		if rp.MustBeDir() {
   574  			if child != nil {
   575  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   576  			}
   577  			return syserror.ENOTDIR
   578  		}
   579  	}
   580  	if parent.isSynthetic() {
   581  		if child == nil {
   582  			return syserror.ENOENT
   583  		}
   584  	} else if child == nil || !child.isSynthetic() {
   585  		err = parent.file.unlinkAt(ctx, name, flags)
   586  		if err != nil {
   587  			if child != nil {
   588  				vfsObj.AbortDeleteDentry(&child.vfsd) // +checklocksforce: see above.
   589  			}
   590  			return err
   591  		}
   592  	}
   593  
   594  	// Generate inotify events for rmdir or unlink.
   595  	if dir {
   596  		parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */)
   597  	} else {
   598  		var cw *vfs.Watches
   599  		if child != nil {
   600  			cw = &child.watches
   601  		}
   602  		vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name)
   603  	}
   604  
   605  	if child != nil {
   606  		vfsObj.CommitDeleteDentry(ctx, &child.vfsd) // +checklocksforce: see above.
   607  		child.setDeleted()
   608  		if child.isSynthetic() {
   609  			parent.syntheticChildren--
   610  			child.decRefNoCaching()
   611  		}
   612  		ds = appendDentry(ds, child)
   613  	}
   614  	parent.cacheNegativeLookupLocked(name)
   615  	if parent.cachedMetadataAuthoritative() {
   616  		parent.dirents = nil
   617  		parent.touchCMtime()
   618  		if dir {
   619  			parent.decLinks()
   620  		}
   621  	}
   622  	return nil
   623  }
   624  
   625  // AccessAt implements vfs.Filesystem.Impl.AccessAt.
   626  func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
   627  	var ds *[]*dentry
   628  	fs.renameMu.RLock()
   629  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   630  	d, err := fs.resolveLocked(ctx, rp, &ds)
   631  	if err != nil {
   632  		return err
   633  	}
   634  	return d.checkPermissions(creds, ats)
   635  }
   636  
   637  // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
   638  func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
   639  	var ds *[]*dentry
   640  	fs.renameMu.RLock()
   641  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   642  	d, err := fs.resolveLocked(ctx, rp, &ds)
   643  	if err != nil {
   644  		return nil, err
   645  	}
   646  	if opts.CheckSearchable {
   647  		if !d.isDir() {
   648  			return nil, syserror.ENOTDIR
   649  		}
   650  		if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   651  			return nil, err
   652  		}
   653  	}
   654  	d.IncRef()
   655  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   656  	ds = appendDentry(ds, d)
   657  	return &d.vfsd, nil
   658  }
   659  
   660  // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
   661  func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
   662  	var ds *[]*dentry
   663  	fs.renameMu.RLock()
   664  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   665  	start := rp.Start().Impl().(*dentry)
   666  	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   667  	if err != nil {
   668  		return nil, err
   669  	}
   670  	d.IncRef()
   671  	// Call d.checkCachingLocked() so it can be removed from the cache if needed.
   672  	ds = appendDentry(ds, d)
   673  	return &d.vfsd, nil
   674  }
   675  
   676  // LinkAt implements vfs.FilesystemImpl.LinkAt.
   677  func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
   678  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, _ **[]*dentry) error {
   679  		if rp.Mount() != vd.Mount() {
   680  			return linuxerr.EXDEV
   681  		}
   682  		d := vd.Dentry().Impl().(*dentry)
   683  		if d.isDir() {
   684  			return linuxerr.EPERM
   685  		}
   686  		gid := auth.KGID(atomic.LoadUint32(&d.gid))
   687  		uid := auth.KUID(atomic.LoadUint32(&d.uid))
   688  		mode := linux.FileMode(atomic.LoadUint32(&d.mode))
   689  		if err := vfs.MayLink(rp.Credentials(), mode, uid, gid); err != nil {
   690  			return err
   691  		}
   692  		if d.nlink == 0 {
   693  			return syserror.ENOENT
   694  		}
   695  		if d.nlink == math.MaxUint32 {
   696  			return linuxerr.EMLINK
   697  		}
   698  		if err := parent.file.link(ctx, d.file, childName); err != nil {
   699  			return err
   700  		}
   701  
   702  		// Success!
   703  		atomic.AddUint32(&d.nlink, 1)
   704  		return nil
   705  	}, nil)
   706  }
   707  
   708  // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
   709  func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
   710  	creds := rp.Credentials()
   711  	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, name string, ds **[]*dentry) error {
   712  		// If the parent is a setgid directory, use the parent's GID
   713  		// rather than the caller's and enable setgid.
   714  		kgid := creds.EffectiveKGID
   715  		mode := opts.Mode
   716  		if atomic.LoadUint32(&parent.mode)&linux.S_ISGID != 0 {
   717  			kgid = auth.KGID(atomic.LoadUint32(&parent.gid))
   718  			mode |= linux.S_ISGID
   719  		}
   720  		if _, err := parent.file.mkdir(ctx, name, p9.FileMode(mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid)); err != nil {
   721  			if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) {
   722  				return err
   723  			}
   724  			ctx.Infof("Failed to create remote directory %q: %v; falling back to synthetic directory", name, err)
   725  			parent.createSyntheticChildLocked(&createSyntheticOpts{
   726  				name: name,
   727  				mode: linux.S_IFDIR | opts.Mode,
   728  				kuid: creds.EffectiveKUID,
   729  				kgid: creds.EffectiveKGID,
   730  			})
   731  			*ds = appendDentry(*ds, parent)
   732  		}
   733  		if fs.opts.interop != InteropModeShared {
   734  			parent.incLinks()
   735  		}
   736  		return nil
   737  	}, func(parent *dentry, name string) error {
   738  		if !opts.ForSyntheticMountpoint {
   739  			// Can't create non-synthetic files in synthetic directories.
   740  			return linuxerr.EPERM
   741  		}
   742  		parent.createSyntheticChildLocked(&createSyntheticOpts{
   743  			name: name,
   744  			mode: linux.S_IFDIR | opts.Mode,
   745  			kuid: creds.EffectiveKUID,
   746  			kgid: creds.EffectiveKGID,
   747  		})
   748  		parent.incLinks()
   749  		return nil
   750  	})
   751  }
   752  
   753  // MknodAt implements vfs.FilesystemImpl.MknodAt.
   754  func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
   755  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, ds **[]*dentry) error {
   756  		creds := rp.Credentials()
   757  		_, err := parent.file.mknod(ctx, name, (p9.FileMode)(opts.Mode), opts.DevMajor, opts.DevMinor, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
   758  		if !linuxerr.Equals(linuxerr.EPERM, err) {
   759  			return err
   760  		}
   761  
   762  		// EPERM means that gofer does not allow creating a socket or pipe. Fallback
   763  		// to creating a synthetic one, i.e. one that is kept entirely in memory.
   764  
   765  		// Check that we're not overriding an existing file with a synthetic one.
   766  		_, _, err = fs.stepLocked(ctx, rp, parent, true, ds)
   767  		switch {
   768  		case err == nil:
   769  			// Step succeeded, another file exists.
   770  			return syserror.EEXIST
   771  		case !linuxerr.Equals(linuxerr.ENOENT, err):
   772  			// Unexpected error.
   773  			return err
   774  		}
   775  
   776  		switch opts.Mode.FileType() {
   777  		case linux.S_IFSOCK:
   778  			parent.createSyntheticChildLocked(&createSyntheticOpts{
   779  				name:     name,
   780  				mode:     opts.Mode,
   781  				kuid:     creds.EffectiveKUID,
   782  				kgid:     creds.EffectiveKGID,
   783  				endpoint: opts.Endpoint,
   784  			})
   785  			*ds = appendDentry(*ds, parent)
   786  			return nil
   787  		case linux.S_IFIFO:
   788  			parent.createSyntheticChildLocked(&createSyntheticOpts{
   789  				name: name,
   790  				mode: opts.Mode,
   791  				kuid: creds.EffectiveKUID,
   792  				kgid: creds.EffectiveKGID,
   793  				pipe: pipe.NewVFSPipe(true /* isNamed */, pipe.DefaultPipeSize),
   794  			})
   795  			*ds = appendDentry(*ds, parent)
   796  			return nil
   797  		}
   798  		// Retain error from gofer if synthetic file cannot be created internally.
   799  		return linuxerr.EPERM
   800  	}, nil)
   801  }
   802  
   803  // OpenAt implements vfs.FilesystemImpl.OpenAt.
   804  func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   805  	// Reject O_TMPFILE, which is not supported; supporting it correctly in the
   806  	// presence of other remote filesystem users requires remote filesystem
   807  	// support, and it isn't clear that there's any way to implement this in
   808  	// 9P.
   809  	if opts.Flags&linux.O_TMPFILE != 0 {
   810  		return nil, syserror.EOPNOTSUPP
   811  	}
   812  	mayCreate := opts.Flags&linux.O_CREAT != 0
   813  	mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
   814  
   815  	var ds *[]*dentry
   816  	fs.renameMu.RLock()
   817  	unlocked := false
   818  	unlock := func() {
   819  		if !unlocked {
   820  			fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
   821  			unlocked = true
   822  		}
   823  	}
   824  	defer unlock()
   825  
   826  	start := rp.Start().Impl().(*dentry)
   827  	if rp.Done() {
   828  		// Reject attempts to open mount root directory with O_CREAT.
   829  		if mayCreate && rp.MustBeDir() {
   830  			return nil, syserror.EISDIR
   831  		}
   832  		if mustCreate {
   833  			return nil, syserror.EEXIST
   834  		}
   835  		if !start.cachedMetadataAuthoritative() {
   836  			// Refresh dentry's attributes before opening.
   837  			if err := start.updateFromGetattr(ctx); err != nil {
   838  				return nil, err
   839  			}
   840  		}
   841  		start.IncRef()
   842  		defer start.DecRef(ctx)
   843  		unlock()
   844  		// start is intentionally not added to ds (which would remove it from the
   845  		// cache) because doing so regresses performance in practice.
   846  		return start.open(ctx, rp, &opts)
   847  	}
   848  
   849  afterTrailingSymlink:
   850  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   851  	if err != nil {
   852  		return nil, err
   853  	}
   854  	// Check for search permission in the parent directory.
   855  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   856  		return nil, err
   857  	}
   858  	// Reject attempts to open directories with O_CREAT.
   859  	if mayCreate && rp.MustBeDir() {
   860  		return nil, syserror.EISDIR
   861  	}
   862  	if err := fs.revalidateOne(ctx, rp.VirtualFilesystem(), parent, rp.Component(), &ds); err != nil {
   863  		return nil, err
   864  	}
   865  	// Determine whether or not we need to create a file.
   866  	parent.dirMu.Lock()
   867  	child, _, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
   868  	if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
   869  		if parent.isSynthetic() {
   870  			parent.dirMu.Unlock()
   871  			return nil, linuxerr.EPERM
   872  		}
   873  		fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds)
   874  		parent.dirMu.Unlock()
   875  		return fd, err
   876  	}
   877  	parent.dirMu.Unlock()
   878  	if err != nil {
   879  		return nil, err
   880  	}
   881  	if mustCreate {
   882  		return nil, syserror.EEXIST
   883  	}
   884  	// Open existing child or follow symlink.
   885  	if child.isSymlink() && rp.ShouldFollowSymlink() {
   886  		target, err := child.readlink(ctx, rp.Mount())
   887  		if err != nil {
   888  			return nil, err
   889  		}
   890  		if err := rp.HandleSymlink(target); err != nil {
   891  			return nil, err
   892  		}
   893  		start = parent
   894  		goto afterTrailingSymlink
   895  	}
   896  	if rp.MustBeDir() && !child.isDir() {
   897  		return nil, syserror.ENOTDIR
   898  	}
   899  	child.IncRef()
   900  	defer child.DecRef(ctx)
   901  	unlock()
   902  	// child is intentionally not added to ds (which would remove it from the
   903  	// cache) because doing so regresses performance in practice.
   904  	return child.open(ctx, rp, &opts)
   905  }
   906  
   907  // Preconditions: The caller must hold no locks (since opening pipes may block
   908  // indefinitely).
   909  func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
   910  	ats := vfs.AccessTypesForOpenFlags(opts)
   911  	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
   912  		return nil, err
   913  	}
   914  
   915  	trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG
   916  	if trunc {
   917  		// Lock metadataMu *while* we open a regular file with O_TRUNC because
   918  		// open(2) will change the file size on server.
   919  		d.metadataMu.Lock()
   920  		defer d.metadataMu.Unlock()
   921  	}
   922  
   923  	var vfd *vfs.FileDescription
   924  	var err error
   925  	mnt := rp.Mount()
   926  	switch d.fileType() {
   927  	case linux.S_IFREG:
   928  		if !d.fs.opts.regularFilesUseSpecialFileFD {
   929  			if err := d.ensureSharedHandle(ctx, ats.MayRead(), ats.MayWrite(), trunc); err != nil {
   930  				return nil, err
   931  			}
   932  			fd, err := newRegularFileFD(mnt, d, opts.Flags)
   933  			if err != nil {
   934  				return nil, err
   935  			}
   936  			vfd = &fd.vfsfd
   937  		}
   938  	case linux.S_IFDIR:
   939  		// Can't open directories with O_CREAT.
   940  		if opts.Flags&linux.O_CREAT != 0 {
   941  			return nil, syserror.EISDIR
   942  		}
   943  		// Can't open directories writably.
   944  		if ats&vfs.MayWrite != 0 {
   945  			return nil, syserror.EISDIR
   946  		}
   947  		if opts.Flags&linux.O_DIRECT != 0 {
   948  			return nil, linuxerr.EINVAL
   949  		}
   950  		if !d.isSynthetic() {
   951  			if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, false /* write */, false /* trunc */); err != nil {
   952  				return nil, err
   953  			}
   954  		}
   955  		fd := &directoryFD{}
   956  		fd.LockFD.Init(&d.locks)
   957  		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
   958  			return nil, err
   959  		}
   960  		if atomic.LoadInt32(&d.readFD) >= 0 {
   961  			fsmetric.GoferOpensHost.Increment()
   962  		} else {
   963  			fsmetric.GoferOpens9P.Increment()
   964  		}
   965  		return &fd.vfsfd, nil
   966  	case linux.S_IFLNK:
   967  		// Can't open symlinks without O_PATH, which is handled at the VFS layer.
   968  		return nil, linuxerr.ELOOP
   969  	case linux.S_IFSOCK:
   970  		if d.isSynthetic() {
   971  			return nil, linuxerr.ENXIO
   972  		}
   973  		if d.fs.iopts.OpenSocketsByConnecting {
   974  			return d.openSocketByConnecting(ctx, opts)
   975  		}
   976  	case linux.S_IFIFO:
   977  		if d.isSynthetic() {
   978  			return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks)
   979  		}
   980  	}
   981  
   982  	if vfd == nil {
   983  		if vfd, err = d.openSpecialFile(ctx, mnt, opts); err != nil {
   984  			return nil, err
   985  		}
   986  	}
   987  
   988  	if trunc {
   989  		// If no errors occured so far then update file size in memory. This
   990  		// step is required even if !d.cachedMetadataAuthoritative() because
   991  		// d.mappings has to be updated.
   992  		// d.metadataMu has already been acquired if trunc == true.
   993  		d.updateSizeLocked(0)
   994  
   995  		if d.cachedMetadataAuthoritative() {
   996  			d.touchCMtimeLocked()
   997  		}
   998  	}
   999  	return vfd, err
  1000  }
  1001  
  1002  func (d *dentry) openSocketByConnecting(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1003  	if opts.Flags&linux.O_DIRECT != 0 {
  1004  		return nil, linuxerr.EINVAL
  1005  	}
  1006  	fdObj, err := d.file.connect(ctx, p9.AnonymousSocket)
  1007  	if err != nil {
  1008  		return nil, err
  1009  	}
  1010  	fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fdObj.FD(), &host.NewFDOptions{
  1011  		HaveFlags: true,
  1012  		Flags:     opts.Flags,
  1013  	})
  1014  	if err != nil {
  1015  		fdObj.Close()
  1016  		return nil, err
  1017  	}
  1018  	fdObj.Release()
  1019  	return fd, nil
  1020  }
  1021  
  1022  func (d *dentry) openSpecialFile(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
  1023  	ats := vfs.AccessTypesForOpenFlags(opts)
  1024  	if opts.Flags&linux.O_DIRECT != 0 {
  1025  		return nil, linuxerr.EINVAL
  1026  	}
  1027  	// We assume that the server silently inserts O_NONBLOCK in the open flags
  1028  	// for all named pipes (because all existing gofers do this).
  1029  	//
  1030  	// NOTE(b/133875563): This makes named pipe opens racy, because the
  1031  	// mechanisms for translating nonblocking to blocking opens can only detect
  1032  	// the instantaneous presence of a peer holding the other end of the pipe
  1033  	// open, not whether the pipe was *previously* opened by a peer that has
  1034  	// since closed its end.
  1035  	isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0
  1036  retry:
  1037  	h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0)
  1038  	if err != nil {
  1039  		if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && linuxerr.Equals(linuxerr.ENXIO, err) {
  1040  			// An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails
  1041  			// with ENXIO if opening the same named pipe with O_WRONLY would
  1042  			// block because there are no readers of the pipe.
  1043  			if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil {
  1044  				return nil, err
  1045  			}
  1046  			goto retry
  1047  		}
  1048  		return nil, err
  1049  	}
  1050  	if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 {
  1051  		if err := blockUntilNonblockingPipeHasWriter(ctx, h.fd); err != nil {
  1052  			h.close(ctx)
  1053  			return nil, err
  1054  		}
  1055  	}
  1056  	fd, err := newSpecialFileFD(h, mnt, d, opts.Flags)
  1057  	if err != nil {
  1058  		h.close(ctx)
  1059  		return nil, err
  1060  	}
  1061  	return &fd.vfsfd, nil
  1062  }
  1063  
  1064  // Preconditions:
  1065  // * d.fs.renameMu must be locked.
  1066  // * d.dirMu must be locked.
  1067  // * !d.isSynthetic().
  1068  func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) {
  1069  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1070  		return nil, err
  1071  	}
  1072  	if d.isDeleted() {
  1073  		return nil, syserror.ENOENT
  1074  	}
  1075  	mnt := rp.Mount()
  1076  	if err := mnt.CheckBeginWrite(); err != nil {
  1077  		return nil, err
  1078  	}
  1079  	defer mnt.EndWrite()
  1080  
  1081  	// 9P2000.L's lcreate takes a fid representing the parent directory, and
  1082  	// converts it into an open fid representing the created file, so we need
  1083  	// to duplicate the directory fid first.
  1084  	_, dirfile, err := d.file.walk(ctx, nil)
  1085  	if err != nil {
  1086  		return nil, err
  1087  	}
  1088  	creds := rp.Credentials()
  1089  	name := rp.Component()
  1090  	// We only want the access mode for creating the file.
  1091  	createFlags := p9.OpenFlags(opts.Flags) & p9.OpenFlagsModeMask
  1092  
  1093  	// If the parent is a setgid directory, use the parent's GID rather
  1094  	// than the caller's.
  1095  	kgid := creds.EffectiveKGID
  1096  	if atomic.LoadUint32(&d.mode)&linux.S_ISGID != 0 {
  1097  		kgid = auth.KGID(atomic.LoadUint32(&d.gid))
  1098  	}
  1099  
  1100  	fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, p9.FileMode(opts.Mode), (p9.UID)(creds.EffectiveKUID), p9.GID(kgid))
  1101  	if err != nil {
  1102  		dirfile.close(ctx)
  1103  		return nil, err
  1104  	}
  1105  	// Then we need to walk to the file we just created to get a non-open fid
  1106  	// representing it, and to get its metadata. This must use d.file since, as
  1107  	// explained above, dirfile was invalidated by dirfile.Create().
  1108  	_, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
  1109  	if err != nil {
  1110  		openFile.close(ctx)
  1111  		if fdobj != nil {
  1112  			fdobj.Close()
  1113  		}
  1114  		return nil, err
  1115  	}
  1116  
  1117  	// Construct the new dentry.
  1118  	child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr)
  1119  	if err != nil {
  1120  		nonOpenFile.close(ctx)
  1121  		openFile.close(ctx)
  1122  		if fdobj != nil {
  1123  			fdobj.Close()
  1124  		}
  1125  		return nil, err
  1126  	}
  1127  	// Incorporate the fid that was opened by lcreate.
  1128  	useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD
  1129  	if useRegularFileFD {
  1130  		openFD := int32(-1)
  1131  		if fdobj != nil {
  1132  			openFD = int32(fdobj.Release())
  1133  		}
  1134  		child.handleMu.Lock()
  1135  		if vfs.MayReadFileWithOpenFlags(opts.Flags) {
  1136  			child.readFile = openFile
  1137  			if fdobj != nil {
  1138  				child.readFD = openFD
  1139  				child.mmapFD = openFD
  1140  			}
  1141  		}
  1142  		if vfs.MayWriteFileWithOpenFlags(opts.Flags) {
  1143  			child.writeFile = openFile
  1144  			child.writeFD = openFD
  1145  		}
  1146  		child.handleMu.Unlock()
  1147  	}
  1148  	// Insert the dentry into the tree.
  1149  	d.cacheNewChildLocked(child, name)
  1150  	appendNewChildDentry(ds, d, child)
  1151  	if d.cachedMetadataAuthoritative() {
  1152  		d.touchCMtime()
  1153  		d.dirents = nil
  1154  	}
  1155  
  1156  	// Finally, construct a file description representing the created file.
  1157  	var childVFSFD *vfs.FileDescription
  1158  	if useRegularFileFD {
  1159  		fd, err := newRegularFileFD(mnt, child, opts.Flags)
  1160  		if err != nil {
  1161  			return nil, err
  1162  		}
  1163  		childVFSFD = &fd.vfsfd
  1164  	} else {
  1165  		h := handle{
  1166  			file: openFile,
  1167  			fd:   -1,
  1168  		}
  1169  		if fdobj != nil {
  1170  			h.fd = int32(fdobj.Release())
  1171  		}
  1172  		fd, err := newSpecialFileFD(h, mnt, child, opts.Flags)
  1173  		if err != nil {
  1174  			h.close(ctx)
  1175  			return nil, err
  1176  		}
  1177  		childVFSFD = &fd.vfsfd
  1178  	}
  1179  	d.watches.Notify(ctx, name, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */)
  1180  	return childVFSFD, nil
  1181  }
  1182  
  1183  // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
  1184  func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
  1185  	var ds *[]*dentry
  1186  	fs.renameMu.RLock()
  1187  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1188  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1189  	if err != nil {
  1190  		return "", err
  1191  	}
  1192  	if !d.isSymlink() {
  1193  		return "", linuxerr.EINVAL
  1194  	}
  1195  	return d.readlink(ctx, rp.Mount())
  1196  }
  1197  
  1198  // RenameAt implements vfs.FilesystemImpl.RenameAt.
  1199  func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
  1200  	// Resolve newParent first to verify that it's on this Mount.
  1201  	var ds *[]*dentry
  1202  	fs.renameMu.Lock()
  1203  	defer fs.renameMuUnlockAndCheckCaching(ctx, &ds)
  1204  	newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
  1205  	if err != nil {
  1206  		return err
  1207  	}
  1208  
  1209  	if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
  1210  		return linuxerr.EINVAL
  1211  	}
  1212  	if fs.opts.interop == InteropModeShared && opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1213  		// Requires 9P support to synchronize with other remote filesystem
  1214  		// users.
  1215  		return linuxerr.EINVAL
  1216  	}
  1217  
  1218  	newName := rp.Component()
  1219  	if newName == "." || newName == ".." {
  1220  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1221  			return syserror.EEXIST
  1222  		}
  1223  		return linuxerr.EBUSY
  1224  	}
  1225  	mnt := rp.Mount()
  1226  	if mnt != oldParentVD.Mount() {
  1227  		return linuxerr.EXDEV
  1228  	}
  1229  	if err := mnt.CheckBeginWrite(); err != nil {
  1230  		return err
  1231  	}
  1232  	defer mnt.EndWrite()
  1233  
  1234  	oldParent := oldParentVD.Dentry().Impl().(*dentry)
  1235  	if !oldParent.cachedMetadataAuthoritative() {
  1236  		if err := oldParent.updateFromGetattr(ctx); err != nil {
  1237  			return err
  1238  		}
  1239  	}
  1240  	creds := rp.Credentials()
  1241  	if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1242  		return err
  1243  	}
  1244  
  1245  	vfsObj := rp.VirtualFilesystem()
  1246  	if err := fs.revalidateOne(ctx, vfsObj, newParent, newName, &ds); err != nil {
  1247  		return err
  1248  	}
  1249  	if err := fs.revalidateOne(ctx, vfsObj, oldParent, oldName, &ds); err != nil {
  1250  		return err
  1251  	}
  1252  
  1253  	// We need a dentry representing the renamed file since, if it's a
  1254  	// directory, we need to check for write permission on it.
  1255  	oldParent.dirMu.Lock()
  1256  	defer oldParent.dirMu.Unlock()
  1257  	renamed, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
  1258  	if err != nil {
  1259  		return err
  1260  	}
  1261  	if err := oldParent.mayDelete(creds, renamed); err != nil {
  1262  		return err
  1263  	}
  1264  	if renamed.isDir() {
  1265  		if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
  1266  			return linuxerr.EINVAL
  1267  		}
  1268  		if oldParent != newParent {
  1269  			if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
  1270  				return err
  1271  			}
  1272  		}
  1273  	} else {
  1274  		if opts.MustBeDir || rp.MustBeDir() {
  1275  			return syserror.ENOTDIR
  1276  		}
  1277  	}
  1278  
  1279  	if oldParent != newParent {
  1280  		if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1281  			return err
  1282  		}
  1283  		newParent.dirMu.Lock()
  1284  		defer newParent.dirMu.Unlock()
  1285  	}
  1286  	if newParent.isDeleted() {
  1287  		return syserror.ENOENT
  1288  	}
  1289  	replaced, err := fs.getChildLocked(ctx, newParent, newName, &ds)
  1290  	if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
  1291  		return err
  1292  	}
  1293  	var replacedVFSD *vfs.Dentry
  1294  	if replaced != nil {
  1295  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1296  			return syserror.EEXIST
  1297  		}
  1298  		replacedVFSD = &replaced.vfsd
  1299  		if replaced.isDir() {
  1300  			if !renamed.isDir() {
  1301  				return syserror.EISDIR
  1302  			}
  1303  			if genericIsAncestorDentry(replaced, renamed) {
  1304  				return linuxerr.ENOTEMPTY
  1305  			}
  1306  		} else {
  1307  			if rp.MustBeDir() || renamed.isDir() {
  1308  				return syserror.ENOTDIR
  1309  			}
  1310  		}
  1311  	}
  1312  
  1313  	if oldParent == newParent && oldName == newName {
  1314  		return nil
  1315  	}
  1316  	mntns := vfs.MountNamespaceFromContext(ctx)
  1317  	defer mntns.DecRef(ctx)
  1318  	if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil {
  1319  		return err
  1320  	}
  1321  
  1322  	// Update the remote filesystem.
  1323  	if !renamed.isSynthetic() {
  1324  		if err := renamed.file.rename(ctx, newParent.file, newName); err != nil {
  1325  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1326  			return err
  1327  		}
  1328  	} else if replaced != nil && !replaced.isSynthetic() {
  1329  		// We are replacing an existing real file with a synthetic one, so we
  1330  		// need to unlink the former.
  1331  		flags := uint32(0)
  1332  		if replaced.isDir() {
  1333  			flags = linux.AT_REMOVEDIR
  1334  		}
  1335  		if err := newParent.file.unlinkAt(ctx, newName, flags); err != nil {
  1336  			vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1337  			return err
  1338  		}
  1339  	}
  1340  
  1341  	// Update the dentry tree.
  1342  	vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD)
  1343  	if replaced != nil {
  1344  		replaced.setDeleted()
  1345  		if replaced.isSynthetic() {
  1346  			newParent.syntheticChildren--
  1347  			replaced.decRefNoCaching()
  1348  		}
  1349  		ds = appendDentry(ds, replaced)
  1350  	}
  1351  	oldParent.cacheNegativeLookupLocked(oldName)
  1352  	// We don't use newParent.cacheNewChildLocked() since we don't want to mess
  1353  	// with reference counts and queue oldParent for checkCachingLocked if the
  1354  	// parent isn't actually changing.
  1355  	if oldParent != newParent {
  1356  		oldParent.decRefNoCaching()
  1357  		newParent.IncRef()
  1358  		ds = appendDentry(ds, newParent)
  1359  		ds = appendDentry(ds, oldParent)
  1360  		if renamed.isSynthetic() {
  1361  			oldParent.syntheticChildren--
  1362  			newParent.syntheticChildren++
  1363  		}
  1364  		renamed.parent = newParent
  1365  	}
  1366  	renamed.name = newName
  1367  	if newParent.children == nil {
  1368  		newParent.children = make(map[string]*dentry)
  1369  	}
  1370  	newParent.children[newName] = renamed
  1371  
  1372  	// Update metadata.
  1373  	if renamed.cachedMetadataAuthoritative() {
  1374  		renamed.touchCtime()
  1375  	}
  1376  	if oldParent.cachedMetadataAuthoritative() {
  1377  		oldParent.dirents = nil
  1378  		oldParent.touchCMtime()
  1379  		if renamed.isDir() {
  1380  			oldParent.decLinks()
  1381  		}
  1382  	}
  1383  	if newParent.cachedMetadataAuthoritative() {
  1384  		newParent.dirents = nil
  1385  		newParent.touchCMtime()
  1386  		if renamed.isDir() && (replaced == nil || !replaced.isDir()) {
  1387  			// Increase the link count if we did not replace another directory.
  1388  			newParent.incLinks()
  1389  		}
  1390  	}
  1391  	vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir())
  1392  	return nil
  1393  }
  1394  
  1395  // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
  1396  func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1397  	return fs.unlinkAt(ctx, rp, true /* dir */)
  1398  }
  1399  
  1400  // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
  1401  func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
  1402  	var ds *[]*dentry
  1403  	fs.renameMu.RLock()
  1404  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1405  	if err != nil {
  1406  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1407  		return err
  1408  	}
  1409  	err = d.setStat(ctx, rp.Credentials(), &opts, rp.Mount())
  1410  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1411  	if err != nil {
  1412  		return err
  1413  	}
  1414  
  1415  	if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
  1416  		d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent)
  1417  	}
  1418  	return nil
  1419  }
  1420  
  1421  // StatAt implements vfs.FilesystemImpl.StatAt.
  1422  func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
  1423  	var ds *[]*dentry
  1424  	fs.renameMu.RLock()
  1425  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1426  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1427  	if err != nil {
  1428  		return linux.Statx{}, err
  1429  	}
  1430  	// Since walking updates metadata for all traversed dentries under
  1431  	// InteropModeShared, including the returned one, we can return cached
  1432  	// metadata here regardless of fs.opts.interop.
  1433  	var stat linux.Statx
  1434  	d.statTo(&stat)
  1435  	return stat, nil
  1436  }
  1437  
  1438  // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
  1439  func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
  1440  	var ds *[]*dentry
  1441  	fs.renameMu.RLock()
  1442  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1443  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1444  	if err != nil {
  1445  		return linux.Statfs{}, err
  1446  	}
  1447  	// If d is synthetic, invoke statfs on the first ancestor of d that isn't.
  1448  	for d.isSynthetic() {
  1449  		d = d.parent
  1450  	}
  1451  	fsstat, err := d.file.statFS(ctx)
  1452  	if err != nil {
  1453  		return linux.Statfs{}, err
  1454  	}
  1455  	nameLen := uint64(fsstat.NameLength)
  1456  	if nameLen > maxFilenameLen {
  1457  		nameLen = maxFilenameLen
  1458  	}
  1459  	return linux.Statfs{
  1460  		// This is primarily for distinguishing a gofer file system in
  1461  		// tests. Testing is important, so instead of defining
  1462  		// something completely random, use a standard value.
  1463  		Type:            linux.V9FS_MAGIC,
  1464  		BlockSize:       int64(fsstat.BlockSize),
  1465  		Blocks:          fsstat.Blocks,
  1466  		BlocksFree:      fsstat.BlocksFree,
  1467  		BlocksAvailable: fsstat.BlocksAvailable,
  1468  		Files:           fsstat.Files,
  1469  		FilesFree:       fsstat.FilesFree,
  1470  		NameLength:      nameLen,
  1471  	}, nil
  1472  }
  1473  
  1474  // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
  1475  func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
  1476  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, name string, _ **[]*dentry) error {
  1477  		creds := rp.Credentials()
  1478  		_, err := parent.file.symlink(ctx, target, name, (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
  1479  		return err
  1480  	}, nil)
  1481  }
  1482  
  1483  // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
  1484  func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1485  	return fs.unlinkAt(ctx, rp, false /* dir */)
  1486  }
  1487  
  1488  // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
  1489  func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
  1490  	var ds *[]*dentry
  1491  	fs.renameMu.RLock()
  1492  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1493  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1494  	if err != nil {
  1495  		return nil, err
  1496  	}
  1497  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
  1498  		return nil, err
  1499  	}
  1500  	if d.isSocket() {
  1501  		if !d.isSynthetic() {
  1502  			d.IncRef()
  1503  			ds = appendDentry(ds, d)
  1504  			return &endpoint{
  1505  				dentry: d,
  1506  				path:   opts.Addr,
  1507  			}, nil
  1508  		}
  1509  		if d.endpoint != nil {
  1510  			return d.endpoint, nil
  1511  		}
  1512  	}
  1513  	return nil, linuxerr.ECONNREFUSED
  1514  }
  1515  
  1516  // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
  1517  func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
  1518  	var ds *[]*dentry
  1519  	fs.renameMu.RLock()
  1520  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1521  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1522  	if err != nil {
  1523  		return nil, err
  1524  	}
  1525  	return d.listXattr(ctx, rp.Credentials(), size)
  1526  }
  1527  
  1528  // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
  1529  func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
  1530  	var ds *[]*dentry
  1531  	fs.renameMu.RLock()
  1532  	defer fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1533  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1534  	if err != nil {
  1535  		return "", err
  1536  	}
  1537  	return d.getXattr(ctx, rp.Credentials(), &opts)
  1538  }
  1539  
  1540  // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
  1541  func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
  1542  	var ds *[]*dentry
  1543  	fs.renameMu.RLock()
  1544  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1545  	if err != nil {
  1546  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1547  		return err
  1548  	}
  1549  	err = d.setXattr(ctx, rp.Credentials(), &opts)
  1550  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1551  	if err != nil {
  1552  		return err
  1553  	}
  1554  
  1555  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1556  	return nil
  1557  }
  1558  
  1559  // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
  1560  func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
  1561  	var ds *[]*dentry
  1562  	fs.renameMu.RLock()
  1563  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1564  	if err != nil {
  1565  		fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1566  		return err
  1567  	}
  1568  	err = d.removeXattr(ctx, rp.Credentials(), name)
  1569  	fs.renameMuRUnlockAndCheckCaching(ctx, &ds)
  1570  	if err != nil {
  1571  		return err
  1572  	}
  1573  
  1574  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0, vfs.InodeEvent)
  1575  	return nil
  1576  }
  1577  
  1578  // PrependPath implements vfs.FilesystemImpl.PrependPath.
  1579  func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
  1580  	fs.renameMu.RLock()
  1581  	defer fs.renameMu.RUnlock()
  1582  	return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
  1583  }
  1584  
  1585  type mopt struct {
  1586  	key   string
  1587  	value interface{}
  1588  }
  1589  
  1590  func (m mopt) String() string {
  1591  	if m.value == nil {
  1592  		return fmt.Sprintf("%s", m.key)
  1593  	}
  1594  	return fmt.Sprintf("%s=%v", m.key, m.value)
  1595  }
  1596  
  1597  // MountOptions implements vfs.FilesystemImpl.MountOptions.
  1598  func (fs *filesystem) MountOptions() string {
  1599  	optsKV := []mopt{
  1600  		{moptTransport, transportModeFD}, // Only valid value, currently.
  1601  		{moptReadFD, fs.opts.fd},         // Currently, read and write FD are the same.
  1602  		{moptWriteFD, fs.opts.fd},        // Currently, read and write FD are the same.
  1603  		{moptAname, fs.opts.aname},
  1604  		{moptDfltUID, fs.opts.dfltuid},
  1605  		{moptDfltGID, fs.opts.dfltgid},
  1606  		{moptMsize, fs.opts.msize},
  1607  		{moptVersion, fs.opts.version},
  1608  		{moptDentryCacheLimit, fs.opts.maxCachedDentries},
  1609  	}
  1610  
  1611  	switch fs.opts.interop {
  1612  	case InteropModeExclusive:
  1613  		optsKV = append(optsKV, mopt{moptCache, cacheFSCache})
  1614  	case InteropModeWritethrough:
  1615  		optsKV = append(optsKV, mopt{moptCache, cacheFSCacheWritethrough})
  1616  	case InteropModeShared:
  1617  		if fs.opts.regularFilesUseSpecialFileFD {
  1618  			optsKV = append(optsKV, mopt{moptCache, cacheNone})
  1619  		} else {
  1620  			optsKV = append(optsKV, mopt{moptCache, cacheRemoteRevalidating})
  1621  		}
  1622  	}
  1623  	if fs.opts.forcePageCache {
  1624  		optsKV = append(optsKV, mopt{moptForcePageCache, nil})
  1625  	}
  1626  	if fs.opts.limitHostFDTranslation {
  1627  		optsKV = append(optsKV, mopt{moptLimitHostFDTranslation, nil})
  1628  	}
  1629  	if fs.opts.overlayfsStaleRead {
  1630  		optsKV = append(optsKV, mopt{moptOverlayfsStaleRead, nil})
  1631  	}
  1632  
  1633  	opts := make([]string, 0, len(optsKV))
  1634  	for _, opt := range optsKV {
  1635  		opts = append(opts, opt.String())
  1636  	}
  1637  	return strings.Join(opts, ",")
  1638  }