github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/overlay/filesystem.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package overlay
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"sync/atomic"
    21  
    22  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    23  	"github.com/SagerNet/gvisor/pkg/context"
    24  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    25  	"github.com/SagerNet/gvisor/pkg/fspath"
    26  	"github.com/SagerNet/gvisor/pkg/log"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    30  	"github.com/SagerNet/gvisor/pkg/sync"
    31  	"github.com/SagerNet/gvisor/pkg/syserror"
    32  )
    33  
    34  // _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs
    35  // attributes.
    36  // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_PREFIX
    37  const _OVL_XATTR_PREFIX = linux.XATTR_TRUSTED_PREFIX + "overlay."
    38  
    39  // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for
    40  // opaque directories.
    41  // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE
    42  const _OVL_XATTR_OPAQUE = _OVL_XATTR_PREFIX + "opaque"
    43  
    44  func isWhiteout(stat *linux.Statx) bool {
    45  	return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0
    46  }
    47  
    48  // Sync implements vfs.FilesystemImpl.Sync.
    49  func (fs *filesystem) Sync(ctx context.Context) error {
    50  	if fs.opts.UpperRoot.Ok() {
    51  		return fs.opts.UpperRoot.Mount().Filesystem().Impl().Sync(ctx)
    52  	}
    53  	return nil
    54  }
    55  
    56  var dentrySlicePool = sync.Pool{
    57  	New: func() interface{} {
    58  		ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity
    59  		return &ds
    60  	},
    61  }
    62  
    63  func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry {
    64  	if ds == nil {
    65  		ds = dentrySlicePool.Get().(*[]*dentry)
    66  	}
    67  	*ds = append(*ds, d)
    68  	return ds
    69  }
    70  
    71  // Preconditions: ds != nil.
    72  func putDentrySlice(ds *[]*dentry) {
    73  	// Allow dentries to be GC'd.
    74  	for i := range *ds {
    75  		(*ds)[i] = nil
    76  	}
    77  	*ds = (*ds)[:0]
    78  	dentrySlicePool.Put(ds)
    79  }
    80  
    81  // renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls
    82  // dentry.checkDropLocked on all dentries in *dsp with fs.renameMu locked for
    83  // writing.
    84  //
    85  // dsp is a pointer-to-pointer since defer evaluates its arguments immediately,
    86  // but dentry slices are allocated lazily, and it's much easier to say "defer
    87  // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() {
    88  // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this.
    89  //
    90  // +checklocksrelease:fs.renameMu
    91  func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) {
    92  	fs.renameMu.RUnlock()
    93  	if *dsp == nil {
    94  		return
    95  	}
    96  	ds := **dsp
    97  	// Only go through calling dentry.checkDropLocked() (which requires
    98  	// re-locking renameMu) if we actually have any dentries with zero refs.
    99  	checkAny := false
   100  	for i := range ds {
   101  		if atomic.LoadInt64(&ds[i].refs) == 0 {
   102  			checkAny = true
   103  			break
   104  		}
   105  	}
   106  	if checkAny {
   107  		fs.renameMu.Lock()
   108  		for _, d := range ds {
   109  			d.checkDropLocked(ctx)
   110  		}
   111  		fs.renameMu.Unlock()
   112  	}
   113  	putDentrySlice(*dsp)
   114  }
   115  
   116  // +checklocksrelease:fs.renameMu
   117  func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) {
   118  	if *ds == nil {
   119  		fs.renameMu.Unlock()
   120  		return
   121  	}
   122  	for _, d := range **ds {
   123  		d.checkDropLocked(ctx)
   124  	}
   125  	fs.renameMu.Unlock()
   126  	putDentrySlice(*ds)
   127  }
   128  
   129  // stepLocked resolves rp.Component() to an existing file, starting from the
   130  // given directory.
   131  //
   132  // Dentries which may have a reference count of zero, and which therefore
   133  // should be dropped once traversal is complete, are appended to ds.
   134  //
   135  // Preconditions:
   136  // * fs.renameMu must be locked.
   137  // * d.dirMu must be locked.
   138  // * !rp.Done().
   139  func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, mayFollowSymlinks bool, ds **[]*dentry) (*dentry, lookupLayer, error) {
   140  	if !d.isDir() {
   141  		return nil, lookupLayerNone, syserror.ENOTDIR
   142  	}
   143  	if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   144  		return nil, lookupLayerNone, err
   145  	}
   146  afterSymlink:
   147  	name := rp.Component()
   148  	if name == "." {
   149  		rp.Advance()
   150  		return d, d.topLookupLayer(), nil
   151  	}
   152  	if name == ".." {
   153  		if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil {
   154  			return nil, lookupLayerNone, err
   155  		} else if isRoot || d.parent == nil {
   156  			rp.Advance()
   157  			return d, d.topLookupLayer(), nil
   158  		}
   159  		if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil {
   160  			return nil, lookupLayerNone, err
   161  		}
   162  		rp.Advance()
   163  		return d.parent, d.parent.topLookupLayer(), nil
   164  	}
   165  	child, topLookupLayer, err := fs.getChildLocked(ctx, d, name, ds)
   166  	if err != nil {
   167  		return nil, topLookupLayer, err
   168  	}
   169  	if err := rp.CheckMount(ctx, &child.vfsd); err != nil {
   170  		return nil, lookupLayerNone, err
   171  	}
   172  	if child.isSymlink() && mayFollowSymlinks && rp.ShouldFollowSymlink() {
   173  		target, err := child.readlink(ctx)
   174  		if err != nil {
   175  			return nil, lookupLayerNone, err
   176  		}
   177  		if err := rp.HandleSymlink(target); err != nil {
   178  			return nil, topLookupLayer, err
   179  		}
   180  		goto afterSymlink // don't check the current directory again
   181  	}
   182  	rp.Advance()
   183  	return child, topLookupLayer, nil
   184  }
   185  
   186  // Preconditions:
   187  // * fs.renameMu must be locked.
   188  // * d.dirMu must be locked.
   189  func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, lookupLayer, error) {
   190  	if child, ok := parent.children[name]; ok {
   191  		return child, child.topLookupLayer(), nil
   192  	}
   193  	child, topLookupLayer, err := fs.lookupLocked(ctx, parent, name)
   194  	if err != nil {
   195  		return nil, topLookupLayer, err
   196  	}
   197  	if parent.children == nil {
   198  		parent.children = make(map[string]*dentry)
   199  	}
   200  	parent.children[name] = child
   201  	// child's refcount is initially 0, so it may be dropped after traversal.
   202  	*ds = appendDentry(*ds, child)
   203  	return child, topLookupLayer, nil
   204  }
   205  
   206  // Preconditions:
   207  // * fs.renameMu must be locked.
   208  // * parent.dirMu must be locked.
   209  func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, lookupLayer, error) {
   210  	childPath := fspath.Parse(name)
   211  	child := fs.newDentry()
   212  	topLookupLayer := lookupLayerNone
   213  	var lookupErr error
   214  
   215  	vfsObj := fs.vfsfs.VirtualFilesystem()
   216  	parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool {
   217  		childVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{
   218  			Root:  parentVD,
   219  			Start: parentVD,
   220  			Path:  childPath,
   221  		}, &vfs.GetDentryOptions{})
   222  		if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) {
   223  			// The file doesn't exist on this layer. Proceed to the next one.
   224  			return true
   225  		}
   226  		if err != nil {
   227  			lookupErr = err
   228  			return false
   229  		}
   230  		defer childVD.DecRef(ctx)
   231  
   232  		mask := uint32(linux.STATX_TYPE)
   233  		if topLookupLayer == lookupLayerNone {
   234  			// Mode, UID, GID, and (for non-directories) inode number come from
   235  			// the topmost layer on which the file exists.
   236  			mask |= linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO
   237  		}
   238  		stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{
   239  			Root:  childVD,
   240  			Start: childVD,
   241  		}, &vfs.StatOptions{
   242  			Mask: mask,
   243  		})
   244  		if err != nil {
   245  			lookupErr = err
   246  			return false
   247  		}
   248  		if stat.Mask&mask != mask {
   249  			lookupErr = linuxerr.EREMOTE
   250  			return false
   251  		}
   252  
   253  		if isWhiteout(&stat) {
   254  			// This is a whiteout, so it "doesn't exist" on this layer, and
   255  			// layers below this one are ignored.
   256  			if isUpper {
   257  				topLookupLayer = lookupLayerUpperWhiteout
   258  			}
   259  			return false
   260  		}
   261  		isDir := stat.Mode&linux.S_IFMT == linux.S_IFDIR
   262  		if topLookupLayer != lookupLayerNone && !isDir {
   263  			// Directories are not merged with non-directory files from lower
   264  			// layers; instead, layers including and below the first
   265  			// non-directory file are ignored. (This file must be a directory
   266  			// on previous layers, since lower layers aren't searched for
   267  			// non-directory files.)
   268  			return false
   269  		}
   270  
   271  		// Update child to include this layer.
   272  		childVD.IncRef()
   273  		if isUpper {
   274  			child.upperVD = childVD
   275  			child.copiedUp = 1
   276  		} else {
   277  			child.lowerVDs = append(child.lowerVDs, childVD)
   278  		}
   279  		if topLookupLayer == lookupLayerNone {
   280  			if isUpper {
   281  				topLookupLayer = lookupLayerUpper
   282  			} else {
   283  				topLookupLayer = lookupLayerLower
   284  			}
   285  			child.mode = uint32(stat.Mode)
   286  			child.uid = stat.UID
   287  			child.gid = stat.GID
   288  			child.devMajor = stat.DevMajor
   289  			child.devMinor = stat.DevMinor
   290  			child.ino = stat.Ino
   291  		}
   292  
   293  		// For non-directory files, only the topmost layer that contains a file
   294  		// matters.
   295  		if !isDir {
   296  			return false
   297  		}
   298  
   299  		// Directories are merged with directories from lower layers if they
   300  		// are not explicitly opaque.
   301  		opaqueVal, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{
   302  			Root:  childVD,
   303  			Start: childVD,
   304  		}, &vfs.GetXattrOptions{
   305  			Name: _OVL_XATTR_OPAQUE,
   306  			Size: 1,
   307  		})
   308  		return !(err == nil && opaqueVal == "y")
   309  	})
   310  
   311  	if lookupErr != nil {
   312  		child.destroyLocked(ctx)
   313  		return nil, topLookupLayer, lookupErr
   314  	}
   315  	if !topLookupLayer.existsInOverlay() {
   316  		child.destroyLocked(ctx)
   317  		return nil, topLookupLayer, syserror.ENOENT
   318  	}
   319  
   320  	// Device and inode numbers were copied from the topmost layer above. Remap
   321  	// the device number to an appropriate overlay-private one.
   322  	childDevMinor, err := fs.getPrivateDevMinor(child.devMajor, child.devMinor)
   323  	if err != nil {
   324  		ctx.Infof("overlay.filesystem.lookupLocked: failed to map layer device number (%d, %d) to an overlay-specific device number: %v", child.devMajor, child.devMinor, err)
   325  		child.destroyLocked(ctx)
   326  		return nil, topLookupLayer, err
   327  	}
   328  	child.devMajor = linux.UNNAMED_MAJOR
   329  	child.devMinor = childDevMinor
   330  
   331  	parent.IncRef()
   332  	child.parent = parent
   333  	child.name = name
   334  	return child, topLookupLayer, nil
   335  }
   336  
   337  // lookupLayerLocked is similar to lookupLocked, but only returns information
   338  // about the file rather than a dentry.
   339  //
   340  // Preconditions:
   341  // * fs.renameMu must be locked.
   342  // * parent.dirMu must be locked.
   343  func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, name string) (lookupLayer, error) {
   344  	childPath := fspath.Parse(name)
   345  	lookupLayer := lookupLayerNone
   346  	var lookupErr error
   347  
   348  	parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool {
   349  		stat, err := fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
   350  			Root:  parentVD,
   351  			Start: parentVD,
   352  			Path:  childPath,
   353  		}, &vfs.StatOptions{
   354  			Mask: linux.STATX_TYPE,
   355  		})
   356  		if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) {
   357  			// The file doesn't exist on this layer. Proceed to the next
   358  			// one.
   359  			return true
   360  		}
   361  		if err != nil {
   362  			lookupErr = err
   363  			return false
   364  		}
   365  		if stat.Mask&linux.STATX_TYPE == 0 {
   366  			// Linux's overlayfs tends to return EREMOTE in cases where a file
   367  			// is unusable for reasons that are not better captured by another
   368  			// errno.
   369  			lookupErr = linuxerr.EREMOTE
   370  			return false
   371  		}
   372  		if isWhiteout(&stat) {
   373  			// This is a whiteout, so it "doesn't exist" on this layer, and
   374  			// layers below this one are ignored.
   375  			if isUpper {
   376  				lookupLayer = lookupLayerUpperWhiteout
   377  			}
   378  			return false
   379  		}
   380  		// The file exists; we can stop searching.
   381  		if isUpper {
   382  			lookupLayer = lookupLayerUpper
   383  		} else {
   384  			lookupLayer = lookupLayerLower
   385  		}
   386  		return false
   387  	})
   388  
   389  	return lookupLayer, lookupErr
   390  }
   391  
   392  type lookupLayer int
   393  
   394  const (
   395  	// lookupLayerNone indicates that no file exists at the given path on the
   396  	// upper layer, and is either whited out or does not exist on lower layers.
   397  	// Therefore, the file does not exist in the overlay filesystem, and file
   398  	// creation may proceed normally (if an upper layer exists).
   399  	lookupLayerNone lookupLayer = iota
   400  
   401  	// lookupLayerLower indicates that no file exists at the given path on the
   402  	// upper layer, but exists on a lower layer. Therefore, the file exists in
   403  	// the overlay filesystem, but must be copied-up before mutation.
   404  	lookupLayerLower
   405  
   406  	// lookupLayerUpper indicates that a non-whiteout file exists at the given
   407  	// path on the upper layer. Therefore, the file exists in the overlay
   408  	// filesystem, and is already copied-up.
   409  	lookupLayerUpper
   410  
   411  	// lookupLayerUpperWhiteout indicates that a whiteout exists at the given
   412  	// path on the upper layer. Therefore, the file does not exist in the
   413  	// overlay filesystem, and file creation must remove the whiteout before
   414  	// proceeding.
   415  	lookupLayerUpperWhiteout
   416  )
   417  
   418  func (ll lookupLayer) existsInOverlay() bool {
   419  	return ll == lookupLayerLower || ll == lookupLayerUpper
   420  }
   421  
   422  // walkParentDirLocked resolves all but the last path component of rp to an
   423  // existing directory, starting from the given directory (which is usually
   424  // rp.Start().Impl().(*dentry)). It does not check that the returned directory
   425  // is searchable by the provider of rp.
   426  //
   427  // Preconditions:
   428  // * fs.renameMu must be locked.
   429  // * !rp.Done().
   430  func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) {
   431  	for !rp.Final() {
   432  		d.dirMu.Lock()
   433  		next, _, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   434  		d.dirMu.Unlock()
   435  		if err != nil {
   436  			return nil, err
   437  		}
   438  		d = next
   439  	}
   440  	if !d.isDir() {
   441  		return nil, syserror.ENOTDIR
   442  	}
   443  	return d, nil
   444  }
   445  
   446  // resolveLocked resolves rp to an existing file.
   447  //
   448  // Preconditions: fs.renameMu must be locked.
   449  func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) {
   450  	d := rp.Start().Impl().(*dentry)
   451  	for !rp.Done() {
   452  		d.dirMu.Lock()
   453  		next, _, err := fs.stepLocked(ctx, rp, d, true /* mayFollowSymlinks */, ds)
   454  		d.dirMu.Unlock()
   455  		if err != nil {
   456  			return nil, err
   457  		}
   458  		d = next
   459  	}
   460  	if rp.MustBeDir() && !d.isDir() {
   461  		return nil, syserror.ENOTDIR
   462  	}
   463  	return d, nil
   464  }
   465  
   466  // doCreateAt checks that creating a file at rp is permitted, then invokes
   467  // create to do so.
   468  //
   469  // Preconditions:
   470  // * !rp.Done().
   471  // * For the final path component in rp, !rp.ShouldFollowSymlink().
   472  func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error {
   473  	var ds *[]*dentry
   474  	fs.renameMu.RLock()
   475  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   476  	start := rp.Start().Impl().(*dentry)
   477  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   478  	if err != nil {
   479  		return err
   480  	}
   481  	name := rp.Component()
   482  	if name == "." || name == ".." {
   483  		return syserror.EEXIST
   484  	}
   485  	if parent.vfsd.IsDead() {
   486  		return syserror.ENOENT
   487  	}
   488  
   489  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   490  		return err
   491  	}
   492  
   493  	parent.dirMu.Lock()
   494  	defer parent.dirMu.Unlock()
   495  
   496  	// Determine if a file already exists at name.
   497  	if _, ok := parent.children[name]; ok {
   498  		return syserror.EEXIST
   499  	}
   500  	childLayer, err := fs.lookupLayerLocked(ctx, parent, name)
   501  	if err != nil {
   502  		return err
   503  	}
   504  	if childLayer.existsInOverlay() {
   505  		return syserror.EEXIST
   506  	}
   507  
   508  	if !dir && rp.MustBeDir() {
   509  		return syserror.ENOENT
   510  	}
   511  
   512  	mnt := rp.Mount()
   513  	if err := mnt.CheckBeginWrite(); err != nil {
   514  		return err
   515  	}
   516  	defer mnt.EndWrite()
   517  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
   518  		return err
   519  	}
   520  	// Ensure that the parent directory is copied-up so that we can create the
   521  	// new file in the upper layer.
   522  	if err := parent.copyUpLocked(ctx); err != nil {
   523  		return err
   524  	}
   525  
   526  	// Finally create the new file.
   527  	if err := create(parent, name, childLayer == lookupLayerUpperWhiteout); err != nil {
   528  		return err
   529  	}
   530  
   531  	parent.dirents = nil
   532  	ev := linux.IN_CREATE
   533  	if dir {
   534  		ev |= linux.IN_ISDIR
   535  	}
   536  	parent.watches.Notify(ctx, name, uint32(ev), 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
   537  	return nil
   538  }
   539  
   540  // Preconditions: pop's parent directory has been copied up.
   541  func (fs *filesystem) createWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) error {
   542  	return vfsObj.MknodAt(ctx, fs.creds, pop, &vfs.MknodOptions{
   543  		Mode: linux.S_IFCHR, // permissions == include/linux/fs.h:WHITEOUT_MODE == 0
   544  		// DevMajor == DevMinor == 0, from include/linux/fs.h:WHITEOUT_DEV
   545  	})
   546  }
   547  
   548  func (fs *filesystem) cleanupRecreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) {
   549  	if err := fs.createWhiteout(ctx, vfsObj, pop); err != nil {
   550  		panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate whiteout after failed file creation: %v", err))
   551  	}
   552  }
   553  
   554  // AccessAt implements vfs.Filesystem.Impl.AccessAt.
   555  func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error {
   556  	var ds *[]*dentry
   557  	fs.renameMu.RLock()
   558  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   559  	d, err := fs.resolveLocked(ctx, rp, &ds)
   560  	if err != nil {
   561  		return err
   562  	}
   563  	return d.checkPermissions(creds, ats)
   564  }
   565  
   566  // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
   567  func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
   568  	var ds *[]*dentry
   569  	fs.renameMu.RLock()
   570  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   571  	d, err := fs.resolveLocked(ctx, rp, &ds)
   572  	if err != nil {
   573  		return nil, err
   574  	}
   575  	if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil {
   576  		return nil, err
   577  	}
   578  	layerVD := d.topLayer()
   579  	return fs.vfsfs.VirtualFilesystem().BoundEndpointAt(ctx, fs.creds, &vfs.PathOperation{
   580  		Root:  layerVD,
   581  		Start: layerVD,
   582  	}, &opts)
   583  }
   584  
   585  // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt.
   586  func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) {
   587  	var ds *[]*dentry
   588  	fs.renameMu.RLock()
   589  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   590  	d, err := fs.resolveLocked(ctx, rp, &ds)
   591  	if err != nil {
   592  		return nil, err
   593  	}
   594  	if opts.CheckSearchable {
   595  		if !d.isDir() {
   596  			return nil, syserror.ENOTDIR
   597  		}
   598  		if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   599  			return nil, err
   600  		}
   601  	}
   602  	d.IncRef()
   603  	return &d.vfsd, nil
   604  }
   605  
   606  // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt.
   607  func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) {
   608  	var ds *[]*dentry
   609  	fs.renameMu.RLock()
   610  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   611  	start := rp.Start().Impl().(*dentry)
   612  	d, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   613  	if err != nil {
   614  		return nil, err
   615  	}
   616  	d.IncRef()
   617  	return &d.vfsd, nil
   618  }
   619  
   620  // LinkAt implements vfs.FilesystemImpl.LinkAt.
   621  func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error {
   622  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
   623  		if rp.Mount() != vd.Mount() {
   624  			return linuxerr.EXDEV
   625  		}
   626  		old := vd.Dentry().Impl().(*dentry)
   627  		if old.isDir() {
   628  			return linuxerr.EPERM
   629  		}
   630  		if err := old.copyUpLocked(ctx); err != nil {
   631  			return err
   632  		}
   633  		vfsObj := fs.vfsfs.VirtualFilesystem()
   634  		newpop := vfs.PathOperation{
   635  			Root:  parent.upperVD,
   636  			Start: parent.upperVD,
   637  			Path:  fspath.Parse(childName),
   638  		}
   639  		if haveUpperWhiteout {
   640  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil {
   641  				return err
   642  			}
   643  		}
   644  		if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{
   645  			Root:  old.upperVD,
   646  			Start: old.upperVD,
   647  		}, &newpop); err != nil {
   648  			if haveUpperWhiteout {
   649  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
   650  			}
   651  			return err
   652  		}
   653  		creds := rp.Credentials()
   654  		if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{
   655  			Stat: linux.Statx{
   656  				Mask: linux.STATX_UID | linux.STATX_GID,
   657  				UID:  uint32(creds.EffectiveKUID),
   658  				GID:  uint32(creds.EffectiveKGID),
   659  			},
   660  		}); err != nil {
   661  			if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil {
   662  				panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr))
   663  			} else if haveUpperWhiteout {
   664  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
   665  			}
   666  			return err
   667  		}
   668  		old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
   669  		return nil
   670  	})
   671  }
   672  
   673  // MkdirAt implements vfs.FilesystemImpl.MkdirAt.
   674  func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error {
   675  	return fs.doCreateAt(ctx, rp, true /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
   676  		vfsObj := fs.vfsfs.VirtualFilesystem()
   677  		pop := vfs.PathOperation{
   678  			Root:  parent.upperVD,
   679  			Start: parent.upperVD,
   680  			Path:  fspath.Parse(childName),
   681  		}
   682  		if haveUpperWhiteout {
   683  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
   684  				return err
   685  			}
   686  		}
   687  		if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil {
   688  			if haveUpperWhiteout {
   689  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   690  			}
   691  			return err
   692  		}
   693  
   694  		if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
   695  			Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()),
   696  		}); err != nil {
   697  			if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
   698  				panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr))
   699  			} else if haveUpperWhiteout {
   700  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   701  			}
   702  			return err
   703  		}
   704  		if haveUpperWhiteout {
   705  			// There may be directories on lower layers (previously hidden by
   706  			// the whiteout) that the new directory should not be merged with.
   707  			// Mark it opaque to prevent merging.
   708  			if err := vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{
   709  				Name:  _OVL_XATTR_OPAQUE,
   710  				Value: "y",
   711  			}); err != nil {
   712  				if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
   713  					panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt set-opaque failure: %v", cleanupErr))
   714  				} else {
   715  					fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   716  				}
   717  				return err
   718  			}
   719  		}
   720  		return nil
   721  	})
   722  }
   723  
   724  // MknodAt implements vfs.FilesystemImpl.MknodAt.
   725  func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error {
   726  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
   727  		// Disallow attempts to create whiteouts.
   728  		if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 {
   729  			return linuxerr.EPERM
   730  		}
   731  		vfsObj := fs.vfsfs.VirtualFilesystem()
   732  		pop := vfs.PathOperation{
   733  			Root:  parent.upperVD,
   734  			Start: parent.upperVD,
   735  			Path:  fspath.Parse(childName),
   736  		}
   737  		if haveUpperWhiteout {
   738  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
   739  				return err
   740  			}
   741  		}
   742  		if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil {
   743  			if haveUpperWhiteout {
   744  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   745  			}
   746  			return err
   747  		}
   748  		creds := rp.Credentials()
   749  		if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
   750  			Stat: parent.newChildOwnerStat(opts.Mode, creds),
   751  		}); err != nil {
   752  			if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
   753  				panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr))
   754  			} else if haveUpperWhiteout {
   755  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   756  			}
   757  			return err
   758  		}
   759  		return nil
   760  	})
   761  }
   762  
   763  // OpenAt implements vfs.FilesystemImpl.OpenAt.
   764  func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
   765  	mayCreate := opts.Flags&linux.O_CREAT != 0
   766  	mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL)
   767  	mayWrite := vfs.AccessTypesForOpenFlags(&opts).MayWrite()
   768  
   769  	var ds *[]*dentry
   770  	fs.renameMu.RLock()
   771  	unlocked := false
   772  	unlock := func() {
   773  		if !unlocked {
   774  			fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
   775  			unlocked = true
   776  		}
   777  	}
   778  	defer unlock()
   779  
   780  	start := rp.Start().Impl().(*dentry)
   781  	if rp.Done() {
   782  		if mayCreate && rp.MustBeDir() {
   783  			return nil, syserror.EISDIR
   784  		}
   785  		if mustCreate {
   786  			return nil, syserror.EEXIST
   787  		}
   788  		if start.isRegularFile() && mayWrite {
   789  			if err := start.copyUpLocked(ctx); err != nil {
   790  				return nil, err
   791  			}
   792  		}
   793  		start.IncRef()
   794  		defer start.DecRef(ctx)
   795  		unlock()
   796  		return start.openCopiedUp(ctx, rp, &opts)
   797  	}
   798  
   799  afterTrailingSymlink:
   800  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
   801  	if err != nil {
   802  		return nil, err
   803  	}
   804  	// Check for search permission in the parent directory.
   805  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil {
   806  		return nil, err
   807  	}
   808  	// Reject attempts to open directories with O_CREAT.
   809  	if mayCreate && rp.MustBeDir() {
   810  		return nil, syserror.EISDIR
   811  	}
   812  	// Determine whether or not we need to create a file.
   813  	parent.dirMu.Lock()
   814  	child, topLookupLayer, err := fs.stepLocked(ctx, rp, parent, false /* mayFollowSymlinks */, &ds)
   815  	if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate {
   816  		fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout)
   817  		parent.dirMu.Unlock()
   818  		return fd, err
   819  	}
   820  	parent.dirMu.Unlock()
   821  	if err != nil {
   822  		return nil, err
   823  	}
   824  	// Open existing child or follow symlink.
   825  	if mustCreate {
   826  		return nil, syserror.EEXIST
   827  	}
   828  	if child.isSymlink() && rp.ShouldFollowSymlink() {
   829  		target, err := child.readlink(ctx)
   830  		if err != nil {
   831  			return nil, err
   832  		}
   833  		if err := rp.HandleSymlink(target); err != nil {
   834  			return nil, err
   835  		}
   836  		start = parent
   837  		goto afterTrailingSymlink
   838  	}
   839  	if rp.MustBeDir() && !child.isDir() {
   840  		return nil, syserror.ENOTDIR
   841  	}
   842  	if child.isRegularFile() && mayWrite {
   843  		if err := child.copyUpLocked(ctx); err != nil {
   844  			return nil, err
   845  		}
   846  	}
   847  	child.IncRef()
   848  	defer child.DecRef(ctx)
   849  	unlock()
   850  	return child.openCopiedUp(ctx, rp, &opts)
   851  }
   852  
   853  // Preconditions: If vfs.AccessTypesForOpenFlags(opts).MayWrite(), then d has
   854  // been copied up.
   855  func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
   856  	ats := vfs.AccessTypesForOpenFlags(opts)
   857  	if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
   858  		return nil, err
   859  	}
   860  	mnt := rp.Mount()
   861  
   862  	// Directory FDs open FDs from each layer when directory entries are read,
   863  	// so they don't require opening an FD from d.topLayer() up front.
   864  	ftype := atomic.LoadUint32(&d.mode) & linux.S_IFMT
   865  	if ftype == linux.S_IFDIR {
   866  		// Can't open directories with O_CREAT.
   867  		if opts.Flags&linux.O_CREAT != 0 {
   868  			return nil, syserror.EISDIR
   869  		}
   870  		// Can't open directories writably.
   871  		if ats.MayWrite() {
   872  			return nil, syserror.EISDIR
   873  		}
   874  		if opts.Flags&linux.O_DIRECT != 0 {
   875  			return nil, linuxerr.EINVAL
   876  		}
   877  		fd := &directoryFD{}
   878  		fd.LockFD.Init(&d.locks)
   879  		if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
   880  			UseDentryMetadata: true,
   881  		}); err != nil {
   882  			return nil, err
   883  		}
   884  		return &fd.vfsfd, nil
   885  	}
   886  
   887  	layerVD, isUpper := d.topLayerInfo()
   888  	layerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
   889  		Root:  layerVD,
   890  		Start: layerVD,
   891  	}, opts)
   892  	if err != nil {
   893  		return nil, err
   894  	}
   895  	if ftype != linux.S_IFREG {
   896  		return layerFD, nil
   897  	}
   898  	layerFlags := layerFD.StatusFlags()
   899  	fd := &regularFileFD{
   900  		copiedUp:    isUpper,
   901  		cachedFD:    layerFD,
   902  		cachedFlags: layerFlags,
   903  	}
   904  	fd.LockFD.Init(&d.locks)
   905  	layerFDOpts := layerFD.Options()
   906  	if err := fd.vfsfd.Init(fd, layerFlags, mnt, &d.vfsd, &layerFDOpts); err != nil {
   907  		layerFD.DecRef(ctx)
   908  		return nil, err
   909  	}
   910  	return &fd.vfsfd, nil
   911  }
   912  
   913  // Preconditions:
   914  // * parent.dirMu must be locked.
   915  // * parent does not already contain a child named rp.Component().
   916  func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry, haveUpperWhiteout bool) (*vfs.FileDescription, error) {
   917  	creds := rp.Credentials()
   918  	if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil {
   919  		return nil, err
   920  	}
   921  	if parent.vfsd.IsDead() {
   922  		return nil, syserror.ENOENT
   923  	}
   924  	mnt := rp.Mount()
   925  	if err := mnt.CheckBeginWrite(); err != nil {
   926  		return nil, err
   927  	}
   928  	defer mnt.EndWrite()
   929  
   930  	if err := parent.copyUpLocked(ctx); err != nil {
   931  		return nil, err
   932  	}
   933  
   934  	vfsObj := fs.vfsfs.VirtualFilesystem()
   935  	childName := rp.Component()
   936  	pop := vfs.PathOperation{
   937  		Root:  parent.upperVD,
   938  		Start: parent.upperVD,
   939  		Path:  fspath.Parse(childName),
   940  	}
   941  	// Unlink the whiteout if it exists.
   942  	if haveUpperWhiteout {
   943  		if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
   944  			log.Warningf("overlay.filesystem.createAndOpenLocked: failed to unlink whiteout: %v", err)
   945  			return nil, err
   946  		}
   947  	}
   948  	// Create the file on the upper layer, and get an FD representing it.
   949  	upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{
   950  		Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL,
   951  		Mode:  opts.Mode,
   952  	})
   953  	if err != nil {
   954  		if haveUpperWhiteout {
   955  			fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   956  		}
   957  		return nil, err
   958  	}
   959  
   960  	// Change the file's owner to the caller. We can't use upperFD.SetStat()
   961  	// because it will pick up creds from ctx.
   962  	if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
   963  		Stat: parent.newChildOwnerStat(opts.Mode, creds),
   964  	}); err != nil {
   965  		if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
   966  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr))
   967  		} else if haveUpperWhiteout {
   968  			fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   969  		}
   970  		return nil, err
   971  	}
   972  	// Re-lookup to get a dentry representing the new file, which is needed for
   973  	// the returned FD.
   974  	child, _, err := fs.getChildLocked(ctx, parent, childName, ds)
   975  	if err != nil {
   976  		if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
   977  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) dentry lookup failure: %v", cleanupErr))
   978  		} else if haveUpperWhiteout {
   979  			fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
   980  		}
   981  		return nil, err
   982  	}
   983  	// Finally construct the overlay FD. Below this point, we don't perform
   984  	// cleanup (the file was created successfully even if we can no longer open
   985  	// it for some reason).
   986  	parent.dirents = nil
   987  	upperFlags := upperFD.StatusFlags()
   988  	fd := &regularFileFD{
   989  		copiedUp:    true,
   990  		cachedFD:    upperFD,
   991  		cachedFlags: upperFlags,
   992  	}
   993  	fd.LockFD.Init(&child.locks)
   994  	upperFDOpts := upperFD.Options()
   995  	if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil {
   996  		upperFD.DecRef(ctx)
   997  		return nil, err
   998  	}
   999  	parent.watches.Notify(ctx, childName, linux.IN_CREATE, 0 /* cookie */, vfs.PathEvent, false /* unlinked */)
  1000  	return &fd.vfsfd, nil
  1001  }
  1002  
  1003  // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
  1004  func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
  1005  	var ds *[]*dentry
  1006  	fs.renameMu.RLock()
  1007  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1008  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1009  	if err != nil {
  1010  		return "", err
  1011  	}
  1012  	layerVD := d.topLayer()
  1013  	return fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
  1014  		Root:  layerVD,
  1015  		Start: layerVD,
  1016  	})
  1017  }
  1018  
  1019  // RenameAt implements vfs.FilesystemImpl.RenameAt.
  1020  func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error {
  1021  	// Resolve newParent first to verify that it's on this Mount.
  1022  	var ds *[]*dentry
  1023  	fs.renameMu.Lock()
  1024  	defer fs.renameMuUnlockAndCheckDrop(ctx, &ds)
  1025  	newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds)
  1026  	if err != nil {
  1027  		return err
  1028  	}
  1029  
  1030  	if opts.Flags&^linux.RENAME_NOREPLACE != 0 {
  1031  		return linuxerr.EINVAL
  1032  	}
  1033  
  1034  	newName := rp.Component()
  1035  	if newName == "." || newName == ".." {
  1036  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1037  			return syserror.EEXIST
  1038  		}
  1039  		return linuxerr.EBUSY
  1040  	}
  1041  	mnt := rp.Mount()
  1042  	if mnt != oldParentVD.Mount() {
  1043  		return linuxerr.EXDEV
  1044  	}
  1045  	if err := mnt.CheckBeginWrite(); err != nil {
  1046  		return err
  1047  	}
  1048  	defer mnt.EndWrite()
  1049  
  1050  	oldParent := oldParentVD.Dentry().Impl().(*dentry)
  1051  	creds := rp.Credentials()
  1052  	if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1053  		return err
  1054  	}
  1055  	// We need a dentry representing the renamed file since, if it's a
  1056  	// directory, we need to check for write permission on it.
  1057  	oldParent.dirMu.Lock()
  1058  	defer oldParent.dirMu.Unlock()
  1059  	renamed, _, err := fs.getChildLocked(ctx, oldParent, oldName, &ds)
  1060  	if err != nil {
  1061  		return err
  1062  	}
  1063  	if err := oldParent.mayDelete(creds, renamed); err != nil {
  1064  		return err
  1065  	}
  1066  	if renamed.isDir() {
  1067  		if renamed == newParent || genericIsAncestorDentry(renamed, newParent) {
  1068  			return linuxerr.EINVAL
  1069  		}
  1070  		if oldParent != newParent {
  1071  			if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil {
  1072  				return err
  1073  			}
  1074  		}
  1075  	} else {
  1076  		if opts.MustBeDir || rp.MustBeDir() {
  1077  			return syserror.ENOTDIR
  1078  		}
  1079  	}
  1080  
  1081  	if oldParent != newParent {
  1082  		if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil {
  1083  			return err
  1084  		}
  1085  		newParent.dirMu.Lock()
  1086  		defer newParent.dirMu.Unlock()
  1087  	}
  1088  	if newParent.vfsd.IsDead() {
  1089  		return syserror.ENOENT
  1090  	}
  1091  	var (
  1092  		replaced      *dentry
  1093  		replacedVFSD  *vfs.Dentry
  1094  		replacedLayer lookupLayer
  1095  		whiteouts     map[string]bool
  1096  	)
  1097  	replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds)
  1098  	if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) {
  1099  		return err
  1100  	}
  1101  	if replaced != nil {
  1102  		if opts.Flags&linux.RENAME_NOREPLACE != 0 {
  1103  			return syserror.EEXIST
  1104  		}
  1105  		replacedVFSD = &replaced.vfsd
  1106  		if replaced.isDir() {
  1107  			if !renamed.isDir() {
  1108  				return syserror.EISDIR
  1109  			}
  1110  			if genericIsAncestorDentry(replaced, renamed) {
  1111  				return linuxerr.ENOTEMPTY
  1112  			}
  1113  			replaced.dirMu.Lock()
  1114  			defer replaced.dirMu.Unlock()
  1115  			whiteouts, err = replaced.collectWhiteoutsForRmdirLocked(ctx)
  1116  			if err != nil {
  1117  				return err
  1118  			}
  1119  		} else {
  1120  			if rp.MustBeDir() || renamed.isDir() {
  1121  				return syserror.ENOTDIR
  1122  			}
  1123  		}
  1124  	}
  1125  
  1126  	if oldParent == newParent && oldName == newName {
  1127  		return nil
  1128  	}
  1129  
  1130  	// renamed and oldParent need to be copied-up before they're renamed on the
  1131  	// upper layer.
  1132  	if err := renamed.copyUpLocked(ctx); err != nil {
  1133  		return err
  1134  	}
  1135  	// If renamed is a directory, all of its descendants need to be copied-up
  1136  	// before they're renamed on the upper layer.
  1137  	if renamed.isDir() {
  1138  		if err := renamed.copyUpDescendantsLocked(ctx, &ds); err != nil {
  1139  			return err
  1140  		}
  1141  	}
  1142  	// newParent must be copied-up before it can contain renamed on the upper
  1143  	// layer.
  1144  	if err := newParent.copyUpLocked(ctx); err != nil {
  1145  		return err
  1146  	}
  1147  	// If replaced exists, it doesn't need to be copied-up, but we do need to
  1148  	// serialize with copy-up. Holding renameMu for writing should be
  1149  	// sufficient, but out of an abundance of caution...
  1150  	if replaced != nil {
  1151  		replaced.copyMu.RLock()
  1152  		defer replaced.copyMu.RUnlock()
  1153  	}
  1154  
  1155  	vfsObj := rp.VirtualFilesystem()
  1156  	mntns := vfs.MountNamespaceFromContext(ctx)
  1157  	defer mntns.DecRef(ctx)
  1158  	if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil {
  1159  		return err
  1160  	}
  1161  
  1162  	newpop := vfs.PathOperation{
  1163  		Root:  newParent.upperVD,
  1164  		Start: newParent.upperVD,
  1165  		Path:  fspath.Parse(newName),
  1166  	}
  1167  
  1168  	needRecreateWhiteouts := false
  1169  	cleanupRecreateWhiteouts := func() {
  1170  		if !needRecreateWhiteouts {
  1171  			return
  1172  		}
  1173  		for whiteoutName, whiteoutUpper := range whiteouts {
  1174  			if !whiteoutUpper {
  1175  				continue
  1176  			}
  1177  			if err := fs.createWhiteout(ctx, vfsObj, &vfs.PathOperation{
  1178  				Root:  replaced.upperVD,
  1179  				Start: replaced.upperVD,
  1180  				Path:  fspath.Parse(whiteoutName),
  1181  			}); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) {
  1182  				panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RenameAt failure: %v", err))
  1183  			}
  1184  		}
  1185  	}
  1186  	if renamed.isDir() {
  1187  		if replacedLayer == lookupLayerUpper {
  1188  			// Remove whiteouts from the directory being replaced.
  1189  			needRecreateWhiteouts = true
  1190  			for whiteoutName, whiteoutUpper := range whiteouts {
  1191  				if !whiteoutUpper {
  1192  					continue
  1193  				}
  1194  				if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{
  1195  					Root:  replaced.upperVD,
  1196  					Start: replaced.upperVD,
  1197  					Path:  fspath.Parse(whiteoutName),
  1198  				}); err != nil {
  1199  					cleanupRecreateWhiteouts()
  1200  					vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1201  					return err
  1202  				}
  1203  			}
  1204  		} else if replacedLayer == lookupLayerUpperWhiteout {
  1205  			// We need to explicitly remove the whiteout since otherwise rename
  1206  			// on the upper layer will fail with ENOTDIR.
  1207  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil {
  1208  				vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1209  				return err
  1210  			}
  1211  		}
  1212  	}
  1213  
  1214  	// Essentially no gVisor filesystem supports RENAME_WHITEOUT, so just do a
  1215  	// regular rename and create the whiteout at the origin manually. Unlike
  1216  	// RENAME_WHITEOUT, this isn't atomic with respect to other users of the
  1217  	// upper filesystem, but this is already the case for virtually all other
  1218  	// overlay filesystem operations too.
  1219  	oldpop := vfs.PathOperation{
  1220  		Root:  oldParent.upperVD,
  1221  		Start: oldParent.upperVD,
  1222  		Path:  fspath.Parse(oldName),
  1223  	}
  1224  	if err := vfsObj.RenameAt(ctx, creds, &oldpop, &newpop, &opts); err != nil {
  1225  		cleanupRecreateWhiteouts()
  1226  		vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD)
  1227  		return err
  1228  	}
  1229  
  1230  	// Below this point, the renamed dentry is now at newpop, and anything we
  1231  	// replaced is gone forever. Commit the rename, update the overlay
  1232  	// filesystem tree, and abandon attempts to recover from errors.
  1233  	vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD)
  1234  	delete(oldParent.children, oldName)
  1235  	if replaced != nil {
  1236  		ds = appendDentry(ds, replaced)
  1237  	}
  1238  	if oldParent != newParent {
  1239  		newParent.dirents = nil
  1240  		// This can't drop the last reference on oldParent because one is held
  1241  		// by oldParentVD, so lock recursion is impossible.
  1242  		oldParent.DecRef(ctx)
  1243  		ds = appendDentry(ds, oldParent)
  1244  		newParent.IncRef()
  1245  		renamed.parent = newParent
  1246  	}
  1247  	renamed.name = newName
  1248  	if newParent.children == nil {
  1249  		newParent.children = make(map[string]*dentry)
  1250  	}
  1251  	newParent.children[newName] = renamed
  1252  	oldParent.dirents = nil
  1253  
  1254  	if err := fs.createWhiteout(ctx, vfsObj, &oldpop); err != nil {
  1255  		panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout at origin after RenameAt: %v", err))
  1256  	}
  1257  	if renamed.isDir() {
  1258  		if err := vfsObj.SetXattrAt(ctx, fs.creds, &newpop, &vfs.SetXattrOptions{
  1259  			Name:  _OVL_XATTR_OPAQUE,
  1260  			Value: "y",
  1261  		}); err != nil {
  1262  			panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to make renamed directory opaque: %v", err))
  1263  		}
  1264  	}
  1265  
  1266  	vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir())
  1267  	return nil
  1268  }
  1269  
  1270  // RmdirAt implements vfs.FilesystemImpl.RmdirAt.
  1271  func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1272  	var ds *[]*dentry
  1273  	fs.renameMu.RLock()
  1274  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1275  	start := rp.Start().Impl().(*dentry)
  1276  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
  1277  	if err != nil {
  1278  		return err
  1279  	}
  1280  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
  1281  		return err
  1282  	}
  1283  	if err := rp.Mount().CheckBeginWrite(); err != nil {
  1284  		return err
  1285  	}
  1286  	defer rp.Mount().EndWrite()
  1287  	name := rp.Component()
  1288  	if name == "." {
  1289  		return linuxerr.EINVAL
  1290  	}
  1291  	if name == ".." {
  1292  		return linuxerr.ENOTEMPTY
  1293  	}
  1294  	vfsObj := rp.VirtualFilesystem()
  1295  	mntns := vfs.MountNamespaceFromContext(ctx)
  1296  	defer mntns.DecRef(ctx)
  1297  	parent.dirMu.Lock()
  1298  	defer parent.dirMu.Unlock()
  1299  
  1300  	// Ensure that parent is copied-up before potentially holding child.copyMu
  1301  	// below.
  1302  	if err := parent.copyUpLocked(ctx); err != nil {
  1303  		return err
  1304  	}
  1305  
  1306  	// We need a dentry representing the child directory being removed in order
  1307  	// to verify that it's empty.
  1308  	child, _, err := fs.getChildLocked(ctx, parent, name, &ds)
  1309  	if err != nil {
  1310  		return err
  1311  	}
  1312  	if !child.isDir() {
  1313  		return syserror.ENOTDIR
  1314  	}
  1315  	if err := parent.mayDelete(rp.Credentials(), child); err != nil {
  1316  		return err
  1317  	}
  1318  	child.dirMu.Lock()
  1319  	defer child.dirMu.Unlock()
  1320  	whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx)
  1321  	if err != nil {
  1322  		return err
  1323  	}
  1324  	child.copyMu.RLock()
  1325  	defer child.copyMu.RUnlock()
  1326  	if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
  1327  		return err
  1328  	}
  1329  
  1330  	pop := vfs.PathOperation{
  1331  		Root:  parent.upperVD,
  1332  		Start: parent.upperVD,
  1333  		Path:  fspath.Parse(name),
  1334  	}
  1335  	if child.upperVD.Ok() {
  1336  		cleanupRecreateWhiteouts := func() {
  1337  			if !child.upperVD.Ok() {
  1338  				return
  1339  			}
  1340  			for whiteoutName, whiteoutUpper := range whiteouts {
  1341  				if !whiteoutUpper {
  1342  					continue
  1343  				}
  1344  				if err := fs.createWhiteout(ctx, vfsObj, &vfs.PathOperation{
  1345  					Root:  child.upperVD,
  1346  					Start: child.upperVD,
  1347  					Path:  fspath.Parse(whiteoutName),
  1348  				}); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) {
  1349  					panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err))
  1350  				}
  1351  			}
  1352  		}
  1353  		// Remove existing whiteouts on the upper layer.
  1354  		for whiteoutName, whiteoutUpper := range whiteouts {
  1355  			if !whiteoutUpper {
  1356  				continue
  1357  			}
  1358  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{
  1359  				Root:  child.upperVD,
  1360  				Start: child.upperVD,
  1361  				Path:  fspath.Parse(whiteoutName),
  1362  			}); err != nil {
  1363  				cleanupRecreateWhiteouts()
  1364  				vfsObj.AbortDeleteDentry(&child.vfsd)
  1365  				return err
  1366  			}
  1367  		}
  1368  		// Remove the existing directory on the upper layer.
  1369  		if err := vfsObj.RmdirAt(ctx, fs.creds, &pop); err != nil {
  1370  			cleanupRecreateWhiteouts()
  1371  			vfsObj.AbortDeleteDentry(&child.vfsd)
  1372  			return err
  1373  		}
  1374  	}
  1375  	if err := fs.createWhiteout(ctx, vfsObj, &pop); err != nil {
  1376  		// Don't attempt to recover from this: the original directory is
  1377  		// already gone, so any dentries representing it are invalid, and
  1378  		// creating a new directory won't undo that.
  1379  		panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout during RmdirAt: %v", err))
  1380  	}
  1381  
  1382  	vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
  1383  	delete(parent.children, name)
  1384  	ds = appendDentry(ds, child)
  1385  	parent.dirents = nil
  1386  	parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0 /* cookie */, vfs.InodeEvent, true /* unlinked */)
  1387  	return nil
  1388  }
  1389  
  1390  // SetStatAt implements vfs.FilesystemImpl.SetStatAt.
  1391  func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
  1392  	var ds *[]*dentry
  1393  	fs.renameMu.RLock()
  1394  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1395  	if err != nil {
  1396  		fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1397  		return err
  1398  	}
  1399  	err = d.setStatLocked(ctx, rp, opts)
  1400  	fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1401  	if err != nil {
  1402  		return err
  1403  	}
  1404  
  1405  	if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
  1406  		d.InotifyWithParent(ctx, ev, 0 /* cookie */, vfs.InodeEvent)
  1407  	}
  1408  	return nil
  1409  }
  1410  
  1411  // Precondition: d.fs.renameMu must be held for reading.
  1412  func (d *dentry) setStatLocked(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
  1413  	mode := linux.FileMode(atomic.LoadUint32(&d.mode))
  1414  	if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
  1415  		return err
  1416  	}
  1417  	mnt := rp.Mount()
  1418  	if err := mnt.CheckBeginWrite(); err != nil {
  1419  		return err
  1420  	}
  1421  	defer mnt.EndWrite()
  1422  	if err := d.copyUpLocked(ctx); err != nil {
  1423  		return err
  1424  	}
  1425  	// Changes to d's attributes are serialized by d.copyMu.
  1426  	d.copyMu.Lock()
  1427  	defer d.copyMu.Unlock()
  1428  	if err := d.fs.vfsfs.VirtualFilesystem().SetStatAt(ctx, d.fs.creds, &vfs.PathOperation{
  1429  		Root:  d.upperVD,
  1430  		Start: d.upperVD,
  1431  	}, &opts); err != nil {
  1432  		return err
  1433  	}
  1434  	d.updateAfterSetStatLocked(&opts)
  1435  	return nil
  1436  }
  1437  
  1438  // StatAt implements vfs.FilesystemImpl.StatAt.
  1439  func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
  1440  	var ds *[]*dentry
  1441  	fs.renameMu.RLock()
  1442  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1443  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1444  	if err != nil {
  1445  		return linux.Statx{}, err
  1446  	}
  1447  
  1448  	var stat linux.Statx
  1449  	if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
  1450  		layerVD := d.topLayer()
  1451  		stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{
  1452  			Root:  layerVD,
  1453  			Start: layerVD,
  1454  		}, &vfs.StatOptions{
  1455  			Mask: layerMask,
  1456  			Sync: opts.Sync,
  1457  		})
  1458  		if err != nil {
  1459  			return linux.Statx{}, err
  1460  		}
  1461  	}
  1462  	d.statInternalTo(ctx, &opts, &stat)
  1463  	return stat, nil
  1464  }
  1465  
  1466  // StatFSAt implements vfs.FilesystemImpl.StatFSAt.
  1467  func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
  1468  	var ds *[]*dentry
  1469  	fs.renameMu.RLock()
  1470  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1471  	_, err := fs.resolveLocked(ctx, rp, &ds)
  1472  	if err != nil {
  1473  		return linux.Statfs{}, err
  1474  	}
  1475  	return fs.statFS(ctx)
  1476  }
  1477  
  1478  // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
  1479  func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error {
  1480  	return fs.doCreateAt(ctx, rp, false /* dir */, func(parent *dentry, childName string, haveUpperWhiteout bool) error {
  1481  		vfsObj := fs.vfsfs.VirtualFilesystem()
  1482  		pop := vfs.PathOperation{
  1483  			Root:  parent.upperVD,
  1484  			Start: parent.upperVD,
  1485  			Path:  fspath.Parse(childName),
  1486  		}
  1487  		if haveUpperWhiteout {
  1488  			if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
  1489  				return err
  1490  			}
  1491  		}
  1492  		if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil {
  1493  			if haveUpperWhiteout {
  1494  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
  1495  			}
  1496  			return err
  1497  		}
  1498  		creds := rp.Credentials()
  1499  		if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
  1500  			Stat: linux.Statx{
  1501  				Mask: linux.STATX_UID | linux.STATX_GID,
  1502  				UID:  uint32(creds.EffectiveKUID),
  1503  				GID:  uint32(creds.EffectiveKGID),
  1504  			},
  1505  		}); err != nil {
  1506  			if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
  1507  				panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr))
  1508  			} else if haveUpperWhiteout {
  1509  				fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
  1510  			}
  1511  			return err
  1512  		}
  1513  		return nil
  1514  	})
  1515  }
  1516  
  1517  // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
  1518  func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
  1519  	var ds *[]*dentry
  1520  	fs.renameMu.RLock()
  1521  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1522  	start := rp.Start().Impl().(*dentry)
  1523  	parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds)
  1524  	if err != nil {
  1525  		return err
  1526  	}
  1527  	if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil {
  1528  		return err
  1529  	}
  1530  	if err := rp.Mount().CheckBeginWrite(); err != nil {
  1531  		return err
  1532  	}
  1533  	defer rp.Mount().EndWrite()
  1534  	name := rp.Component()
  1535  	if name == "." || name == ".." {
  1536  		return syserror.EISDIR
  1537  	}
  1538  	if rp.MustBeDir() {
  1539  		return syserror.ENOTDIR
  1540  	}
  1541  	vfsObj := rp.VirtualFilesystem()
  1542  	mntns := vfs.MountNamespaceFromContext(ctx)
  1543  	defer mntns.DecRef(ctx)
  1544  	parent.dirMu.Lock()
  1545  	defer parent.dirMu.Unlock()
  1546  
  1547  	// Ensure that parent is copied-up before potentially holding child.copyMu
  1548  	// below.
  1549  	if err := parent.copyUpLocked(ctx); err != nil {
  1550  		return err
  1551  	}
  1552  
  1553  	// We need a dentry representing the child being removed in order to verify
  1554  	// that it's not a directory.
  1555  	child, childLayer, err := fs.getChildLocked(ctx, parent, name, &ds)
  1556  	if err != nil {
  1557  		return err
  1558  	}
  1559  	if child.isDir() {
  1560  		return syserror.EISDIR
  1561  	}
  1562  	if err := parent.mayDelete(rp.Credentials(), child); err != nil {
  1563  		return err
  1564  	}
  1565  	// Hold child.copyMu to prevent it from being copied-up during
  1566  	// deletion.
  1567  	child.copyMu.RLock()
  1568  	defer child.copyMu.RUnlock()
  1569  	if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
  1570  		return err
  1571  	}
  1572  
  1573  	pop := vfs.PathOperation{
  1574  		Root:  parent.upperVD,
  1575  		Start: parent.upperVD,
  1576  		Path:  fspath.Parse(name),
  1577  	}
  1578  	if childLayer == lookupLayerUpper {
  1579  		// Remove the existing file on the upper layer.
  1580  		if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil {
  1581  			if child != nil {
  1582  				vfsObj.AbortDeleteDentry(&child.vfsd)
  1583  			}
  1584  			return err
  1585  		}
  1586  	}
  1587  	if err := fs.createWhiteout(ctx, vfsObj, &pop); err != nil {
  1588  		panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout during UnlinkAt: %v", err))
  1589  	}
  1590  
  1591  	var cw *vfs.Watches
  1592  	if child != nil {
  1593  		vfsObj.CommitDeleteDentry(ctx, &child.vfsd)
  1594  		delete(parent.children, name)
  1595  		ds = appendDentry(ds, child)
  1596  		cw = &child.watches
  1597  	}
  1598  	vfs.InotifyRemoveChild(ctx, cw, &parent.watches, name)
  1599  	parent.dirents = nil
  1600  	return nil
  1601  }
  1602  
  1603  // isOverlayXattr returns whether the given extended attribute configures the
  1604  // overlay.
  1605  func isOverlayXattr(name string) bool {
  1606  	return strings.HasPrefix(name, _OVL_XATTR_PREFIX)
  1607  }
  1608  
  1609  // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
  1610  func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
  1611  	var ds *[]*dentry
  1612  	fs.renameMu.RLock()
  1613  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1614  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1615  	if err != nil {
  1616  		return nil, err
  1617  	}
  1618  
  1619  	return fs.listXattr(ctx, d, size)
  1620  }
  1621  
  1622  func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) {
  1623  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
  1624  	top := d.topLayer()
  1625  	names, err := vfsObj.ListXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size)
  1626  	if err != nil {
  1627  		return nil, err
  1628  	}
  1629  
  1630  	// Filter out all overlay attributes.
  1631  	n := 0
  1632  	for _, name := range names {
  1633  		if !isOverlayXattr(name) {
  1634  			names[n] = name
  1635  			n++
  1636  		}
  1637  	}
  1638  	return names[:n], err
  1639  }
  1640  
  1641  // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
  1642  func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
  1643  	var ds *[]*dentry
  1644  	fs.renameMu.RLock()
  1645  	defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1646  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1647  	if err != nil {
  1648  		return "", err
  1649  	}
  1650  
  1651  	return fs.getXattr(ctx, d, rp.Credentials(), &opts)
  1652  }
  1653  
  1654  func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) {
  1655  	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil {
  1656  		return "", err
  1657  	}
  1658  
  1659  	// Return EOPNOTSUPP when fetching an overlay attribute.
  1660  	// See fs/overlayfs/super.c:ovl_own_xattr_get().
  1661  	if isOverlayXattr(opts.Name) {
  1662  		return "", syserror.EOPNOTSUPP
  1663  	}
  1664  
  1665  	// Analogous to fs/overlayfs/super.c:ovl_other_xattr_get().
  1666  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
  1667  	top := d.topLayer()
  1668  	return vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts)
  1669  }
  1670  
  1671  // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
  1672  func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
  1673  	var ds *[]*dentry
  1674  	fs.renameMu.RLock()
  1675  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1676  	if err != nil {
  1677  		fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1678  		return err
  1679  	}
  1680  
  1681  	err = fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts)
  1682  	fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1683  	if err != nil {
  1684  		return err
  1685  	}
  1686  
  1687  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent)
  1688  	return nil
  1689  }
  1690  
  1691  // Precondition: fs.renameMu must be locked.
  1692  func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetXattrOptions) error {
  1693  	if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil {
  1694  		return err
  1695  	}
  1696  
  1697  	// Return EOPNOTSUPP when setting an overlay attribute.
  1698  	// See fs/overlayfs/super.c:ovl_own_xattr_set().
  1699  	if isOverlayXattr(opts.Name) {
  1700  		return syserror.EOPNOTSUPP
  1701  	}
  1702  
  1703  	// Analogous to fs/overlayfs/super.c:ovl_other_xattr_set().
  1704  	if err := mnt.CheckBeginWrite(); err != nil {
  1705  		return err
  1706  	}
  1707  	defer mnt.EndWrite()
  1708  	if err := d.copyUpLocked(ctx); err != nil {
  1709  		return err
  1710  	}
  1711  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
  1712  	return vfsObj.SetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts)
  1713  }
  1714  
  1715  // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
  1716  func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
  1717  	var ds *[]*dentry
  1718  	fs.renameMu.RLock()
  1719  	d, err := fs.resolveLocked(ctx, rp, &ds)
  1720  	if err != nil {
  1721  		fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1722  		return err
  1723  	}
  1724  
  1725  	err = fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name)
  1726  	fs.renameMuRUnlockAndCheckDrop(ctx, &ds)
  1727  	if err != nil {
  1728  		return err
  1729  	}
  1730  
  1731  	d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent)
  1732  	return nil
  1733  }
  1734  
  1735  // Precondition: fs.renameMu must be locked.
  1736  func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, name string) error {
  1737  	if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil {
  1738  		return err
  1739  	}
  1740  
  1741  	// Like SetXattrAt, return EOPNOTSUPP when removing an overlay attribute.
  1742  	// Linux passes the remove request to xattr_handler->set.
  1743  	// See fs/xattr.c:vfs_removexattr().
  1744  	if isOverlayXattr(name) {
  1745  		return syserror.EOPNOTSUPP
  1746  	}
  1747  
  1748  	if err := mnt.CheckBeginWrite(); err != nil {
  1749  		return err
  1750  	}
  1751  	defer mnt.EndWrite()
  1752  	if err := d.copyUpLocked(ctx); err != nil {
  1753  		return err
  1754  	}
  1755  	vfsObj := d.fs.vfsfs.VirtualFilesystem()
  1756  	return vfsObj.RemoveXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name)
  1757  }
  1758  
  1759  // PrependPath implements vfs.FilesystemImpl.PrependPath.
  1760  func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
  1761  	fs.renameMu.RLock()
  1762  	defer fs.renameMu.RUnlock()
  1763  	return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
  1764  }
  1765  
  1766  // MountOptions implements vfs.FilesystemImpl.MountOptions.
  1767  func (fs *filesystem) MountOptions() string {
  1768  	// Return the mount options from the topmost layer.
  1769  	var vd vfs.VirtualDentry
  1770  	if fs.opts.UpperRoot.Ok() {
  1771  		vd = fs.opts.UpperRoot
  1772  	} else {
  1773  		vd = fs.opts.LowerRoots[0]
  1774  	}
  1775  	return vd.Mount().Filesystem().Impl().MountOptions()
  1776  }