gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/overlay/filesystem.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "fmt" 19 "strings" 20 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/atomicbitops" 23 "gvisor.dev/gvisor/pkg/context" 24 "gvisor.dev/gvisor/pkg/errors/linuxerr" 25 "gvisor.dev/gvisor/pkg/fspath" 26 "gvisor.dev/gvisor/pkg/log" 27 "gvisor.dev/gvisor/pkg/refs" 28 "gvisor.dev/gvisor/pkg/sentry/kernel/auth" 29 "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" 30 "gvisor.dev/gvisor/pkg/sentry/vfs" 31 "gvisor.dev/gvisor/pkg/sync" 32 ) 33 34 // _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs 35 // attributes. 36 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_PREFIX 37 const _OVL_XATTR_PREFIX = linux.XATTR_TRUSTED_PREFIX + "overlay." 38 39 // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for 40 // opaque directories. 41 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE 42 const _OVL_XATTR_OPAQUE = _OVL_XATTR_PREFIX + "opaque" 43 44 func isWhiteout(stat *linux.Statx) bool { 45 return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0 46 } 47 48 // Sync implements vfs.FilesystemImpl.Sync. 49 func (fs *filesystem) Sync(ctx context.Context) error { 50 if fs.opts.UpperRoot.Ok() { 51 return fs.opts.UpperRoot.Mount().Filesystem().Impl().Sync(ctx) 52 } 53 return nil 54 } 55 56 var dentrySlicePool = sync.Pool{ 57 New: func() any { 58 ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity 59 return &ds 60 }, 61 } 62 63 func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { 64 if ds == nil { 65 ds = dentrySlicePool.Get().(*[]*dentry) 66 } 67 *ds = append(*ds, d) 68 return ds 69 } 70 71 // Preconditions: ds != nil. 72 func putDentrySlice(ds *[]*dentry) { 73 // Allow dentries to be GC'd. 74 for i := range *ds { 75 (*ds)[i] = nil 76 } 77 *ds = (*ds)[:0] 78 dentrySlicePool.Put(ds) 79 } 80 81 // renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls 82 // dentry.checkDropLocked on all dentries in *dsp with fs.renameMu locked for 83 // writing. 84 // 85 // dsp is a pointer-to-pointer since defer evaluates its arguments immediately, 86 // but dentry slices are allocated lazily, and it's much easier to say "defer 87 // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { 88 // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. 89 // 90 // +checklocksreleaseread:fs.renameMu 91 func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) { 92 fs.renameMu.RUnlock() 93 if *dsp == nil { 94 return 95 } 96 ds := **dsp 97 // Only go through calling dentry.checkDropLocked() (which requires 98 // re-locking renameMu) if we actually have any dentries with zero refs. 99 checkAny := false 100 for i := range ds { 101 if ds[i].refs.Load() == 0 { 102 checkAny = true 103 break 104 } 105 } 106 if checkAny { 107 fs.renameMu.Lock() 108 for _, d := range ds { 109 d.checkDropLocked(ctx) 110 } 111 fs.renameMu.Unlock() 112 } 113 putDentrySlice(*dsp) 114 } 115 116 // +checklocksrelease:fs.renameMu 117 func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { 118 if *ds == nil { 119 fs.renameMu.Unlock() 120 return 121 } 122 for _, d := range **ds { 123 d.checkDropLocked(ctx) 124 } 125 fs.renameMu.Unlock() 126 putDentrySlice(*ds) 127 } 128 129 // stepLocked resolves rp.Component() to an existing file, starting from the 130 // given directory. 131 // 132 // Dentries which may have a reference count of zero, and which therefore 133 // should be dropped once traversal is complete, are appended to ds. 134 // 135 // Preconditions: 136 // - fs.renameMu must be locked. 137 // - d.dirMu must be locked. 138 // - !rp.Done(). 139 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, lookupLayer, bool, error) { 140 if !d.isDir() { 141 return nil, lookupLayerNone, false, linuxerr.ENOTDIR 142 } 143 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 144 return nil, lookupLayerNone, false, err 145 } 146 name := rp.Component() 147 if name == "." { 148 rp.Advance() 149 return d, d.topLookupLayer(), false, nil 150 } 151 if name == ".." { 152 if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { 153 return nil, lookupLayerNone, false, err 154 } else if isRoot || d.parent.Load() == nil { 155 rp.Advance() 156 return d, d.topLookupLayer(), false, nil 157 } 158 if err := rp.CheckMount(ctx, &d.parent.Load().vfsd); err != nil { 159 return nil, lookupLayerNone, false, err 160 } 161 rp.Advance() 162 parent := d.parent.Load() 163 return parent, parent.topLookupLayer(), false, nil 164 } 165 if uint64(len(name)) > fs.maxFilenameLen { 166 return nil, lookupLayerNone, false, linuxerr.ENAMETOOLONG 167 } 168 child, topLookupLayer, err := fs.getChildLocked(ctx, d, name, ds) 169 if err != nil { 170 return nil, topLookupLayer, false, err 171 } 172 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 173 return nil, lookupLayerNone, false, err 174 } 175 if child.isSymlink() && rp.ShouldFollowSymlink() { 176 target, err := child.readlink(ctx) 177 if err != nil { 178 return nil, lookupLayerNone, false, err 179 } 180 followedSymlink, err := rp.HandleSymlink(target) 181 return d, topLookupLayer, followedSymlink, err 182 } 183 rp.Advance() 184 return child, topLookupLayer, false, nil 185 } 186 187 // Preconditions: 188 // - fs.renameMu must be locked. 189 // - d.dirMu must be locked. 190 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, lookupLayer, error) { 191 if child, ok := parent.children[name]; ok { 192 return child, child.topLookupLayer(), nil 193 } 194 child, topLookupLayer, err := fs.lookupLocked(ctx, parent, name) 195 if err != nil { 196 return nil, topLookupLayer, err 197 } 198 if parent.children == nil { 199 parent.children = make(map[string]*dentry) 200 } 201 parent.children[name] = child 202 // child's refcount is initially 0, so it may be dropped after traversal. 203 *ds = appendDentry(*ds, child) 204 return child, topLookupLayer, nil 205 } 206 207 // Preconditions: 208 // - fs.renameMu must be locked. 209 // - parent.dirMu must be locked. 210 func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, lookupLayer, error) { 211 childPath := fspath.Parse(name) 212 child := fs.newDentry() 213 topLookupLayer := lookupLayerNone 214 var lookupErr error 215 216 vfsObj := fs.vfsfs.VirtualFilesystem() 217 parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool { 218 childVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ 219 Root: parentVD, 220 Start: parentVD, 221 Path: childPath, 222 }, &vfs.GetDentryOptions{}) 223 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { 224 // The file doesn't exist on this layer. Proceed to the next one. 225 return true 226 } 227 if err != nil { 228 lookupErr = err 229 return false 230 } 231 defer childVD.DecRef(ctx) 232 233 mask := uint32(linux.STATX_TYPE) 234 if topLookupLayer == lookupLayerNone { 235 // Mode, UID, GID, and (for non-directories) inode number come from 236 // the topmost layer on which the file exists. 237 mask |= linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO 238 } 239 stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{ 240 Root: childVD, 241 Start: childVD, 242 }, &vfs.StatOptions{ 243 Mask: mask, 244 }) 245 if err != nil { 246 lookupErr = err 247 return false 248 } 249 if stat.Mask&mask != mask { 250 lookupErr = linuxerr.EREMOTE 251 return false 252 } 253 254 if isWhiteout(&stat) { 255 // This is a whiteout, so it "doesn't exist" on this layer, and 256 // layers below this one are ignored. 257 if isUpper { 258 topLookupLayer = lookupLayerUpperWhiteout 259 } 260 return false 261 } 262 isDir := stat.Mode&linux.S_IFMT == linux.S_IFDIR 263 if topLookupLayer != lookupLayerNone && !isDir { 264 // Directories are not merged with non-directory files from lower 265 // layers; instead, layers including and below the first 266 // non-directory file are ignored. (This file must be a directory 267 // on previous layers, since lower layers aren't searched for 268 // non-directory files.) 269 return false 270 } 271 272 // Update child to include this layer. 273 childVD.IncRef() 274 if isUpper { 275 child.upperVD = childVD 276 child.copiedUp = atomicbitops.FromUint32(1) 277 } else { 278 child.lowerVDs = append(child.lowerVDs, childVD) 279 } 280 if topLookupLayer == lookupLayerNone { 281 if isUpper { 282 topLookupLayer = lookupLayerUpper 283 } else { 284 topLookupLayer = lookupLayerLower 285 } 286 child.mode = atomicbitops.FromUint32(uint32(stat.Mode)) 287 child.uid = atomicbitops.FromUint32(stat.UID) 288 child.gid = atomicbitops.FromUint32(stat.GID) 289 child.devMajor = atomicbitops.FromUint32(stat.DevMajor) 290 child.devMinor = atomicbitops.FromUint32(stat.DevMinor) 291 child.ino = atomicbitops.FromUint64(stat.Ino) 292 } 293 294 // For non-directory files, only the topmost layer that contains a file 295 // matters. 296 if !isDir { 297 return false 298 } 299 300 // Directories use the lowest layer inode and device numbers to generate a 301 // filesystem local inode number. This way the inode number does not change 302 // after copy ups. 303 child.devMajor = atomicbitops.FromUint32(stat.DevMajor) 304 child.devMinor = atomicbitops.FromUint32(stat.DevMinor) 305 child.ino = atomicbitops.FromUint64(stat.Ino) 306 307 // Directories are merged with directories from lower layers if they 308 // are not explicitly opaque. 309 opaqueVal, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{ 310 Root: childVD, 311 Start: childVD, 312 }, &vfs.GetXattrOptions{ 313 Name: _OVL_XATTR_OPAQUE, 314 Size: 1, 315 }) 316 return !(err == nil && opaqueVal == "y") 317 }) 318 319 if lookupErr != nil { 320 child.destroyLocked(ctx) 321 return nil, topLookupLayer, lookupErr 322 } 323 if !topLookupLayer.existsInOverlay() { 324 child.destroyLocked(ctx) 325 return nil, topLookupLayer, linuxerr.ENOENT 326 } 327 328 // Device and inode numbers were copied from the topmost layer above for 329 // non-directories. They were copied from the bottommost layer for 330 // directories. Override them if necessary. We can use RacyLoad() because 331 // child is still being initialized. 332 if child.isDir() { 333 child.ino.Store(fs.newDirIno(child.devMajor.RacyLoad(), child.devMinor.RacyLoad(), child.ino.RacyLoad())) 334 child.devMajor = atomicbitops.FromUint32(linux.UNNAMED_MAJOR) 335 child.devMinor = atomicbitops.FromUint32(fs.dirDevMinor) 336 } else if !child.upperVD.Ok() { 337 childDevMinor, err := fs.getLowerDevMinor(child.devMajor.RacyLoad(), child.devMinor.RacyLoad()) 338 if err != nil { 339 ctx.Infof("overlay.filesystem.lookupLocked: failed to map lower layer device number (%d, %d) to an overlay-specific device number: %v", child.devMajor.RacyLoad(), child.devMinor.RacyLoad(), err) 340 child.destroyLocked(ctx) 341 return nil, topLookupLayer, err 342 } 343 child.devMajor = atomicbitops.FromUint32(linux.UNNAMED_MAJOR) 344 child.devMinor = atomicbitops.FromUint32(childDevMinor) 345 } 346 347 parent.IncRef() 348 child.parent.Store(parent) 349 child.name = name 350 return child, topLookupLayer, nil 351 } 352 353 // lookupLayerLocked is similar to lookupLocked, but only returns information 354 // about the file rather than a dentry. 355 // 356 // Preconditions: 357 // - fs.renameMu must be locked. 358 // - parent.dirMu must be locked. 359 func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, name string) (lookupLayer, error) { 360 childPath := fspath.Parse(name) 361 lookupLayer := lookupLayerNone 362 var lookupErr error 363 364 parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool { 365 stat, err := fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{ 366 Root: parentVD, 367 Start: parentVD, 368 Path: childPath, 369 }, &vfs.StatOptions{ 370 Mask: linux.STATX_TYPE, 371 }) 372 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { 373 // The file doesn't exist on this layer. Proceed to the next 374 // one. 375 return true 376 } 377 if err != nil { 378 lookupErr = err 379 return false 380 } 381 if stat.Mask&linux.STATX_TYPE == 0 { 382 // Linux's overlayfs tends to return EREMOTE in cases where a file 383 // is unusable for reasons that are not better captured by another 384 // errno. 385 lookupErr = linuxerr.EREMOTE 386 return false 387 } 388 if isWhiteout(&stat) { 389 // This is a whiteout, so it "doesn't exist" on this layer, and 390 // layers below this one are ignored. 391 if isUpper { 392 lookupLayer = lookupLayerUpperWhiteout 393 } 394 return false 395 } 396 // The file exists; we can stop searching. 397 if isUpper { 398 lookupLayer = lookupLayerUpper 399 } else { 400 lookupLayer = lookupLayerLower 401 } 402 return false 403 }) 404 405 return lookupLayer, lookupErr 406 } 407 408 type lookupLayer int 409 410 const ( 411 // lookupLayerNone indicates that no file exists at the given path on the 412 // upper layer, and is either whited out or does not exist on lower layers. 413 // Therefore, the file does not exist in the overlay filesystem, and file 414 // creation may proceed normally (if an upper layer exists). 415 lookupLayerNone lookupLayer = iota 416 417 // lookupLayerLower indicates that no file exists at the given path on the 418 // upper layer, but exists on a lower layer. Therefore, the file exists in 419 // the overlay filesystem, but must be copied-up before mutation. 420 lookupLayerLower 421 422 // lookupLayerUpper indicates that a non-whiteout file exists at the given 423 // path on the upper layer. Therefore, the file exists in the overlay 424 // filesystem, and is already copied-up. 425 lookupLayerUpper 426 427 // lookupLayerUpperWhiteout indicates that a whiteout exists at the given 428 // path on the upper layer. Therefore, the file does not exist in the 429 // overlay filesystem, and file creation must remove the whiteout before 430 // proceeding. 431 lookupLayerUpperWhiteout 432 ) 433 434 func (ll lookupLayer) existsInOverlay() bool { 435 return ll == lookupLayerLower || ll == lookupLayerUpper 436 } 437 438 // walkParentDirLocked resolves all but the last path component of rp to an 439 // existing directory, starting from the given directory (which is usually 440 // rp.Start().Impl().(*dentry)). It does not check that the returned directory 441 // is searchable by the provider of rp. 442 // 443 // Preconditions: 444 // - fs.renameMu must be locked. 445 // - !rp.Done(). 446 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { 447 for !rp.Final() { 448 d.dirMu.Lock() 449 next, _, _, err := fs.stepLocked(ctx, rp, d, ds) 450 d.dirMu.Unlock() 451 if err != nil { 452 return nil, err 453 } 454 d = next 455 } 456 if !d.isDir() { 457 return nil, linuxerr.ENOTDIR 458 } 459 return d, nil 460 } 461 462 // resolveLocked resolves rp to an existing file. 463 // 464 // Preconditions: fs.renameMu must be locked. 465 func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { 466 d := rp.Start().Impl().(*dentry) 467 for !rp.Done() { 468 d.dirMu.Lock() 469 next, _, _, err := fs.stepLocked(ctx, rp, d, ds) 470 d.dirMu.Unlock() 471 if err != nil { 472 return nil, err 473 } 474 d = next 475 } 476 if rp.MustBeDir() && !d.isDir() { 477 return nil, linuxerr.ENOTDIR 478 } 479 return d, nil 480 } 481 482 type createType int 483 484 const ( 485 createNonDirectory createType = iota 486 createDirectory 487 createSyntheticMountpoint 488 ) 489 490 // doCreateAt checks that creating a file at rp is permitted, then invokes 491 // create to do so. 492 // 493 // Preconditions: 494 // - !rp.Done(). 495 // - For the final path component in rp, !rp.ShouldFollowSymlink(). 496 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, ct createType, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error { 497 var ds *[]*dentry 498 fs.renameMu.RLock() 499 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 500 start := rp.Start().Impl().(*dentry) 501 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 502 if err != nil { 503 return err 504 } 505 name := rp.Component() 506 if name == "." || name == ".." { 507 return linuxerr.EEXIST 508 } 509 if uint64(len(name)) > fs.maxFilenameLen { 510 return linuxerr.ENAMETOOLONG 511 } 512 if parent.vfsd.IsDead() { 513 return linuxerr.ENOENT 514 } 515 516 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 517 return err 518 } 519 520 parent.dirMu.Lock() 521 defer parent.dirMu.Unlock() 522 523 // Determine if a file already exists at name. 524 if _, ok := parent.children[name]; ok { 525 return linuxerr.EEXIST 526 } 527 childLayer, err := fs.lookupLayerLocked(ctx, parent, name) 528 if err != nil { 529 return err 530 } 531 if childLayer.existsInOverlay() { 532 return linuxerr.EEXIST 533 } 534 535 if ct == createNonDirectory && rp.MustBeDir() { 536 return linuxerr.ENOENT 537 } 538 539 mnt := rp.Mount() 540 if err := mnt.CheckBeginWrite(); err != nil { 541 return err 542 } 543 defer mnt.EndWrite() 544 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 545 return err 546 } 547 // Ensure that the parent directory is copied-up so that we can create the 548 // new file in the upper layer. 549 if err := parent.copyUpMaybeSyntheticMountpointLocked(ctx, ct == createSyntheticMountpoint); err != nil { 550 return err 551 } 552 553 // Finally create the new file. 554 if err := create(parent, name, childLayer == lookupLayerUpperWhiteout); err != nil { 555 return err 556 } 557 558 parent.dirents = nil 559 ev := linux.IN_CREATE 560 if ct != createNonDirectory { 561 ev |= linux.IN_ISDIR 562 } 563 parent.watches.Notify(ctx, name, uint32(ev), 0 /* cookie */, vfs.InodeEvent, false /* unlinked */) 564 return nil 565 } 566 567 // CreateWhiteout creates a whiteout at pop. Whiteouts are created with 568 // character devices with device ID = 0. 569 // 570 // Preconditions: pop's parent directory has been copied up. 571 func CreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, pop *vfs.PathOperation) error { 572 return vfsObj.MknodAt(ctx, creds, pop, &vfs.MknodOptions{ 573 Mode: linux.S_IFCHR, // permissions == include/linux/fs.h:WHITEOUT_MODE == 0 574 // DevMajor == DevMinor == 0, from include/linux/fs.h:WHITEOUT_DEV 575 }) 576 } 577 578 func (fs *filesystem) cleanupRecreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) { 579 if err := CreateWhiteout(ctx, vfsObj, fs.creds, pop); err != nil { 580 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate whiteout after failed file creation: %v", err)) 581 } 582 } 583 584 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 585 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 586 var ds *[]*dentry 587 fs.renameMu.RLock() 588 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 589 d, err := fs.resolveLocked(ctx, rp, &ds) 590 if err != nil { 591 return err 592 } 593 if err := d.checkPermissions(creds, ats); err != nil { 594 return err 595 } 596 if !ats.MayWrite() { 597 // Not requesting write permission. Allow it. 598 return nil 599 } 600 if rp.Mount().ReadOnly() { 601 return linuxerr.EROFS 602 } 603 if !d.upperVD.Ok() && !d.canBeCopiedUp() { 604 // A lower layer file that can not be copied up, can not be written to. 605 // Error out here. Don't give the application false hopes. 606 return linuxerr.EACCES 607 } 608 return nil 609 } 610 611 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 612 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 613 var ds *[]*dentry 614 fs.renameMu.RLock() 615 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 616 d, err := fs.resolveLocked(ctx, rp, &ds) 617 if err != nil { 618 return nil, err 619 } 620 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 621 return nil, err 622 } 623 layerVD := d.topLayer() 624 return fs.vfsfs.VirtualFilesystem().BoundEndpointAt(ctx, fs.creds, &vfs.PathOperation{ 625 Root: layerVD, 626 Start: layerVD, 627 }, &opts) 628 } 629 630 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 631 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 632 var ds *[]*dentry 633 fs.renameMu.RLock() 634 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 635 d, err := fs.resolveLocked(ctx, rp, &ds) 636 if err != nil { 637 return nil, err 638 } 639 if opts.CheckSearchable { 640 if !d.isDir() { 641 return nil, linuxerr.ENOTDIR 642 } 643 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 644 return nil, err 645 } 646 } 647 d.IncRef() 648 return &d.vfsd, nil 649 } 650 651 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 652 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 653 var ds *[]*dentry 654 fs.renameMu.RLock() 655 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 656 start := rp.Start().Impl().(*dentry) 657 d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 658 if err != nil { 659 return nil, err 660 } 661 d.IncRef() 662 return &d.vfsd, nil 663 } 664 665 // LinkAt implements vfs.FilesystemImpl.LinkAt. 666 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 667 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 668 if rp.Mount() != vd.Mount() { 669 return linuxerr.EXDEV 670 } 671 old := vd.Dentry().Impl().(*dentry) 672 if old.isDir() { 673 return linuxerr.EPERM 674 } 675 if err := old.copyUpLocked(ctx); err != nil { 676 return err 677 } 678 vfsObj := fs.vfsfs.VirtualFilesystem() 679 newpop := vfs.PathOperation{ 680 Root: parent.upperVD, 681 Start: parent.upperVD, 682 Path: fspath.Parse(childName), 683 } 684 if haveUpperWhiteout { 685 if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil { 686 return err 687 } 688 } 689 if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{ 690 Root: old.upperVD, 691 Start: old.upperVD, 692 }, &newpop); err != nil { 693 if haveUpperWhiteout { 694 fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop) 695 } 696 return err 697 } 698 creds := rp.Credentials() 699 if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{ 700 Stat: linux.Statx{ 701 Mask: linux.STATX_UID | linux.STATX_GID, 702 UID: uint32(creds.EffectiveKUID), 703 GID: uint32(creds.EffectiveKGID), 704 }, 705 }); err != nil { 706 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil { 707 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr)) 708 } else if haveUpperWhiteout { 709 fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop) 710 } 711 return err 712 } 713 old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */) 714 return nil 715 }) 716 } 717 718 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 719 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 720 ct := createDirectory 721 if opts.ForSyntheticMountpoint { 722 ct = createSyntheticMountpoint 723 } 724 return fs.doCreateAt(ctx, rp, ct, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 725 vfsObj := fs.vfsfs.VirtualFilesystem() 726 pop := vfs.PathOperation{ 727 Root: parent.upperVD, 728 Start: parent.upperVD, 729 Path: fspath.Parse(childName), 730 } 731 if haveUpperWhiteout { 732 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 733 return err 734 } 735 } 736 if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil { 737 if haveUpperWhiteout { 738 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 739 } 740 return err 741 } 742 743 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 744 Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()), 745 }); err != nil { 746 if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil { 747 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr)) 748 } else if haveUpperWhiteout { 749 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 750 } 751 return err 752 } 753 if haveUpperWhiteout { 754 // A whiteout is being replaced with this new directory. There may be 755 // directories on lower layers (previously hidden by the whiteout) that 756 // the new directory should not be merged with, so mark as opaque. 757 // See fs/overlayfs/dir.c:ovl_create_over_whiteout() -> ovl_set_opaque(). 758 if err := vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{ 759 Name: _OVL_XATTR_OPAQUE, 760 Value: "y", 761 }); err != nil { 762 if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil { 763 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt set-opaque failure: %v", cleanupErr)) 764 } else { 765 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 766 } 767 return err 768 } 769 } else if len(parent.lowerVDs) > 0 { 770 // If haveUpperWhiteout is false and the parent is merged, then we should 771 // apply an optimization. We know that nothing exists on the parent's 772 // lower layers. Otherwise doCreateAt() would have failed with EEXIST. 773 // Mark the new directory opaque to avoid unnecessary lower lookups in 774 // fs.lookupLocked(). Allow it to fail since this is an optimization. 775 // See fs/overlayfs/dir.c:ovl_create_upper() -> ovl_set_opaque(). 776 _ = vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{ 777 Name: _OVL_XATTR_OPAQUE, 778 Value: "y", 779 }) 780 } 781 return nil 782 }) 783 } 784 785 // MknodAt implements vfs.FilesystemImpl.MknodAt. 786 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 787 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 788 // Disallow attempts to create whiteouts. 789 if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 { 790 return linuxerr.EPERM 791 } 792 vfsObj := fs.vfsfs.VirtualFilesystem() 793 pop := vfs.PathOperation{ 794 Root: parent.upperVD, 795 Start: parent.upperVD, 796 Path: fspath.Parse(childName), 797 } 798 if haveUpperWhiteout { 799 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 800 return err 801 } 802 } 803 if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil { 804 if haveUpperWhiteout { 805 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 806 } 807 return err 808 } 809 creds := rp.Credentials() 810 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 811 Stat: parent.newChildOwnerStat(opts.Mode, creds), 812 }); err != nil { 813 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 814 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr)) 815 } else if haveUpperWhiteout { 816 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 817 } 818 return err 819 } 820 return nil 821 }) 822 } 823 824 // OpenAt implements vfs.FilesystemImpl.OpenAt. 825 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 826 mayCreate := opts.Flags&linux.O_CREAT != 0 827 mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) 828 829 var ds *[]*dentry 830 fs.renameMu.RLock() 831 unlocked := false 832 unlock := func() { 833 if !unlocked { 834 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 835 unlocked = true 836 } 837 } 838 defer unlock() 839 840 start := rp.Start().Impl().(*dentry) 841 if rp.Done() { 842 if mayCreate && rp.MustBeDir() { 843 return nil, linuxerr.EISDIR 844 } 845 if mustCreate { 846 return nil, linuxerr.EEXIST 847 } 848 if err := start.ensureOpenableLocked(ctx, rp, &opts); err != nil { 849 return nil, err 850 } 851 start.IncRef() 852 defer start.DecRef(ctx) 853 unlock() 854 return start.openCopiedUp(ctx, rp, &opts) 855 } 856 857 afterTrailingSymlink: 858 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 859 if err != nil { 860 return nil, err 861 } 862 // Check for search permission in the parent directory. 863 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 864 return nil, err 865 } 866 // Reject attempts to open directories with O_CREAT. 867 if mayCreate && rp.MustBeDir() { 868 return nil, linuxerr.EISDIR 869 } 870 // Determine whether or not we need to create a file. 871 parent.dirMu.Lock() 872 child, topLookupLayer, followedSymlink, err := fs.stepLocked(ctx, rp, parent, &ds) 873 if followedSymlink { 874 parent.dirMu.Unlock() 875 if mustCreate { 876 // EEXIST must be returned if an existing symlink is opened with O_EXCL. 877 return nil, linuxerr.EEXIST 878 } 879 if err != nil { 880 // If followedSymlink && err != nil, then this symlink resolution error 881 // must be handled by the VFS layer. 882 return nil, err 883 } 884 start = parent 885 goto afterTrailingSymlink 886 } 887 if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { 888 fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout) 889 parent.dirMu.Unlock() 890 return fd, err 891 } 892 parent.dirMu.Unlock() 893 if err != nil { 894 return nil, err 895 } 896 if mustCreate { 897 return nil, linuxerr.EEXIST 898 } 899 if rp.MustBeDir() && !child.isDir() { 900 return nil, linuxerr.ENOTDIR 901 } 902 if err := child.ensureOpenableLocked(ctx, rp, &opts); err != nil { 903 return nil, err 904 } 905 child.IncRef() 906 defer child.DecRef(ctx) 907 unlock() 908 return child.openCopiedUp(ctx, rp, &opts) 909 } 910 911 // Preconditions: filesystem.renameMu must be locked. 912 func (d *dentry) ensureOpenableLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) error { 913 ats := vfs.AccessTypesForOpenFlags(opts) 914 if err := d.checkPermissions(rp.Credentials(), ats); err != nil { 915 return err 916 } 917 if d.isDir() { 918 if ats.MayWrite() { 919 return linuxerr.EISDIR 920 } 921 if opts.Flags&linux.O_CREAT != 0 { 922 return linuxerr.EISDIR 923 } 924 if opts.Flags&linux.O_DIRECT != 0 { 925 return linuxerr.EINVAL 926 } 927 return nil 928 } 929 930 if !ats.MayWrite() { 931 return nil 932 } 933 934 // Copy up! 935 if err := rp.Mount().CheckBeginWrite(); err != nil { 936 return err 937 } 938 defer rp.Mount().EndWrite() 939 return d.copyUpLocked(ctx) 940 } 941 942 // Preconditions: If vfs.AccessTypesForOpenFlags(opts).MayWrite(), then d has 943 // been copied up. 944 func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 945 mnt := rp.Mount() 946 947 // Directory FDs open FDs from each layer when directory entries are read, 948 // so they don't require opening an FD from d.topLayer() up front. 949 ftype := d.mode.Load() & linux.S_IFMT 950 if ftype == linux.S_IFDIR { 951 fd := &directoryFD{} 952 fd.LockFD.Init(&d.locks) 953 if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ 954 UseDentryMetadata: true, 955 }); err != nil { 956 return nil, err 957 } 958 return &fd.vfsfd, nil 959 } 960 961 layerVD, isUpper := d.topLayerInfo() 962 layerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 963 Root: layerVD, 964 Start: layerVD, 965 }, opts) 966 if err != nil { 967 return nil, err 968 } 969 if ftype != linux.S_IFREG { 970 return layerFD, nil 971 } 972 layerFlags := layerFD.StatusFlags() 973 fd := ®ularFileFD{ 974 copiedUp: isUpper, 975 cachedFD: layerFD, 976 cachedFlags: layerFlags, 977 } 978 fd.LockFD.Init(&d.locks) 979 layerFDOpts := layerFD.Options() 980 if err := fd.vfsfd.Init(fd, layerFlags, mnt, &d.vfsd, &layerFDOpts); err != nil { 981 layerFD.DecRef(ctx) 982 return nil, err 983 } 984 return &fd.vfsfd, nil 985 } 986 987 // Preconditions: 988 // - parent.dirMu must be locked. 989 // - parent does not already contain a child named rp.Component(). 990 func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry, haveUpperWhiteout bool) (*vfs.FileDescription, error) { 991 creds := rp.Credentials() 992 if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil { 993 return nil, err 994 } 995 if parent.vfsd.IsDead() { 996 return nil, linuxerr.ENOENT 997 } 998 mnt := rp.Mount() 999 if err := mnt.CheckBeginWrite(); err != nil { 1000 return nil, err 1001 } 1002 defer mnt.EndWrite() 1003 1004 if err := parent.copyUpLocked(ctx); err != nil { 1005 return nil, err 1006 } 1007 1008 vfsObj := fs.vfsfs.VirtualFilesystem() 1009 childName := rp.Component() 1010 pop := vfs.PathOperation{ 1011 Root: parent.upperVD, 1012 Start: parent.upperVD, 1013 Path: fspath.Parse(childName), 1014 } 1015 // Unlink the whiteout if it exists. 1016 if haveUpperWhiteout { 1017 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1018 log.Warningf("overlay.filesystem.createAndOpenLocked: failed to unlink whiteout: %v", err) 1019 return nil, err 1020 } 1021 } 1022 // Create the file on the upper layer, and get an FD representing it. 1023 upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{ 1024 Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL, 1025 Mode: opts.Mode, 1026 }) 1027 if err != nil { 1028 if haveUpperWhiteout { 1029 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1030 } 1031 return nil, err 1032 } 1033 1034 // Change the file's owner to the caller. We can't use upperFD.SetStat() 1035 // because it will pick up creds from ctx. 1036 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 1037 Stat: parent.newChildOwnerStat(opts.Mode, creds), 1038 }); err != nil { 1039 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1040 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr)) 1041 } else if haveUpperWhiteout { 1042 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1043 } 1044 return nil, err 1045 } 1046 // Re-lookup to get a dentry representing the new file, which is needed for 1047 // the returned FD. 1048 child, _, err := fs.getChildLocked(ctx, parent, childName, ds) 1049 if err != nil { 1050 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1051 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) dentry lookup failure: %v", cleanupErr)) 1052 } else if haveUpperWhiteout { 1053 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1054 } 1055 return nil, err 1056 } 1057 // Finally construct the overlay FD. Below this point, we don't perform 1058 // cleanup (the file was created successfully even if we can no longer open 1059 // it for some reason). 1060 parent.dirents = nil 1061 upperFlags := upperFD.StatusFlags() 1062 fd := ®ularFileFD{ 1063 copiedUp: true, 1064 cachedFD: upperFD, 1065 cachedFlags: upperFlags, 1066 } 1067 fd.LockFD.Init(&child.locks) 1068 upperFDOpts := upperFD.Options() 1069 if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil { 1070 upperFD.DecRef(ctx) 1071 return nil, err 1072 } 1073 parent.watches.Notify(ctx, childName, linux.IN_CREATE, 0 /* cookie */, vfs.PathEvent, false /* unlinked */) 1074 return &fd.vfsfd, nil 1075 } 1076 1077 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 1078 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 1079 var ds *[]*dentry 1080 fs.renameMu.RLock() 1081 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1082 d, err := fs.resolveLocked(ctx, rp, &ds) 1083 if err != nil { 1084 return "", err 1085 } 1086 layerVD := d.topLayer() 1087 return fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 1088 Root: layerVD, 1089 Start: layerVD, 1090 }) 1091 } 1092 1093 // RenameAt implements vfs.FilesystemImpl.RenameAt. 1094 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 1095 // Resolve newParent first to verify that it's on this Mount. 1096 var ds *[]*dentry 1097 fs.renameMu.Lock() 1098 // We need to DecRef outside of fs.mu because forgetting a dead mountpoint 1099 // could result in this filesystem being released which acquires fs.mu. 1100 var toDecRef []refs.RefCounter 1101 defer func() { 1102 for _, ref := range toDecRef { 1103 ref.DecRef(ctx) 1104 } 1105 }() 1106 defer fs.renameMuUnlockAndCheckDrop(ctx, &ds) 1107 newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) 1108 if err != nil { 1109 return err 1110 } 1111 1112 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 1113 return linuxerr.EINVAL 1114 } 1115 1116 newName := rp.Component() 1117 if newName == "." || newName == ".." { 1118 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1119 return linuxerr.EEXIST 1120 } 1121 return linuxerr.EBUSY 1122 } 1123 if uint64(len(newName)) > fs.maxFilenameLen { 1124 return linuxerr.ENAMETOOLONG 1125 } 1126 // Do not check for newName length, since different filesystem 1127 // implementations impose different name limits. upperfs.RenameAt() will fail 1128 // appropriately if it has to. 1129 mnt := rp.Mount() 1130 if mnt != oldParentVD.Mount() { 1131 return linuxerr.EXDEV 1132 } 1133 if err := mnt.CheckBeginWrite(); err != nil { 1134 return err 1135 } 1136 defer mnt.EndWrite() 1137 1138 oldParent := oldParentVD.Dentry().Impl().(*dentry) 1139 creds := rp.Credentials() 1140 if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1141 return err 1142 } 1143 // We need a dentry representing the renamed file since, if it's a 1144 // directory, we need to check for write permission on it. 1145 oldParent.dirMu.Lock() 1146 defer oldParent.dirMu.Unlock() 1147 renamed, _, err := fs.getChildLocked(ctx, oldParent, oldName, &ds) 1148 if err != nil { 1149 return err 1150 } 1151 if err := oldParent.mayDelete(creds, renamed); err != nil { 1152 return err 1153 } 1154 if renamed.isDir() { 1155 if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { 1156 return linuxerr.EINVAL 1157 } 1158 if oldParent != newParent { 1159 if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { 1160 return err 1161 } 1162 } 1163 } else { 1164 if opts.MustBeDir || rp.MustBeDir() { 1165 return linuxerr.ENOTDIR 1166 } 1167 } 1168 1169 if oldParent != newParent { 1170 if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1171 return err 1172 } 1173 newParent.dirMu.NestedLock(dirLockNew) 1174 defer newParent.dirMu.NestedUnlock(dirLockNew) 1175 } 1176 if newParent.vfsd.IsDead() { 1177 return linuxerr.ENOENT 1178 } 1179 var ( 1180 replaced *dentry 1181 replacedVFSD *vfs.Dentry 1182 replacedLayer lookupLayer 1183 whiteouts map[string]bool 1184 ) 1185 replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds) 1186 if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 1187 return err 1188 } 1189 if replaced != nil { 1190 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1191 return linuxerr.EEXIST 1192 } 1193 replacedVFSD = &replaced.vfsd 1194 if replaced.isDir() { 1195 if !renamed.isDir() { 1196 return linuxerr.EISDIR 1197 } 1198 if genericIsAncestorDentry(replaced, renamed) { 1199 return linuxerr.ENOTEMPTY 1200 } 1201 replaced.dirMu.NestedLock(dirLockReplaced) 1202 defer replaced.dirMu.NestedUnlock(dirLockReplaced) 1203 whiteouts, err = replaced.collectWhiteoutsForRmdirLocked(ctx) 1204 if err != nil { 1205 return err 1206 } 1207 } else { 1208 if rp.MustBeDir() || renamed.isDir() { 1209 return linuxerr.ENOTDIR 1210 } 1211 } 1212 } 1213 1214 if oldParent == newParent && oldName == newName { 1215 return nil 1216 } 1217 1218 // renamed and oldParent need to be copied-up before they're renamed on the 1219 // upper layer. 1220 if err := renamed.copyUpLocked(ctx); err != nil { 1221 return err 1222 } 1223 // If renamed is a directory, all of its descendants need to be copied-up 1224 // before they're renamed on the upper layer. 1225 if renamed.isDir() { 1226 if err := renamed.copyUpDescendantsLocked(ctx, &ds); err != nil { 1227 return err 1228 } 1229 } 1230 // newParent must be copied-up before it can contain renamed on the upper 1231 // layer. 1232 if err := newParent.copyUpLocked(ctx); err != nil { 1233 return err 1234 } 1235 // If replaced exists, it doesn't need to be copied-up, but we do need to 1236 // serialize with copy-up. Holding renameMu for writing should be 1237 // sufficient, but out of an abundance of caution... 1238 if replaced != nil { 1239 replaced.copyMu.RLock() 1240 defer replaced.copyMu.RUnlock() 1241 } 1242 1243 vfsObj := rp.VirtualFilesystem() 1244 mntns := vfs.MountNamespaceFromContext(ctx) 1245 defer mntns.DecRef(ctx) 1246 if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { 1247 return err 1248 } 1249 1250 newpop := vfs.PathOperation{ 1251 Root: newParent.upperVD, 1252 Start: newParent.upperVD, 1253 Path: fspath.Parse(newName), 1254 } 1255 1256 needRecreateWhiteouts := false 1257 cleanupRecreateWhiteouts := func() { 1258 if !needRecreateWhiteouts { 1259 return 1260 } 1261 for whiteoutName, whiteoutUpper := range whiteouts { 1262 if !whiteoutUpper { 1263 continue 1264 } 1265 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &vfs.PathOperation{ 1266 Root: replaced.upperVD, 1267 Start: replaced.upperVD, 1268 Path: fspath.Parse(whiteoutName), 1269 }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 1270 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RenameAt failure: %v", err)) 1271 } 1272 } 1273 } 1274 if renamed.isDir() { 1275 if replacedLayer == lookupLayerUpper { 1276 // Remove whiteouts from the directory being replaced. 1277 needRecreateWhiteouts = true 1278 for whiteoutName, whiteoutUpper := range whiteouts { 1279 if !whiteoutUpper { 1280 continue 1281 } 1282 if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{ 1283 Root: replaced.upperVD, 1284 Start: replaced.upperVD, 1285 Path: fspath.Parse(whiteoutName), 1286 }); err != nil { 1287 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1288 cleanupRecreateWhiteouts() 1289 return err 1290 } 1291 } 1292 } else if replacedLayer == lookupLayerUpperWhiteout { 1293 // We need to explicitly remove the whiteout since otherwise rename 1294 // on the upper layer will fail with ENOTDIR. 1295 if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil { 1296 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1297 return err 1298 } 1299 } 1300 } 1301 1302 // Essentially no gVisor filesystem supports RENAME_WHITEOUT, so just do a 1303 // regular rename and create the whiteout at the origin manually. Unlike 1304 // RENAME_WHITEOUT, this isn't atomic with respect to other users of the 1305 // upper filesystem, but this is already the case for virtually all other 1306 // overlay filesystem operations too. 1307 oldpop := vfs.PathOperation{ 1308 Root: oldParent.upperVD, 1309 Start: oldParent.upperVD, 1310 Path: fspath.Parse(oldName), 1311 } 1312 if err := vfsObj.RenameAt(ctx, creds, &oldpop, &newpop, &opts); err != nil { 1313 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1314 cleanupRecreateWhiteouts() 1315 return err 1316 } 1317 1318 // Below this point, the renamed dentry is now at newpop, and anything we 1319 // replaced is gone forever. Commit the rename, update the overlay 1320 // filesystem tree, and abandon attempts to recover from errors. 1321 toDecRef = vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) 1322 delete(oldParent.children, oldName) 1323 if replaced != nil { 1324 // Lower dentries of replaced are not reachable from the overlay anymore. 1325 // NOTE(b/237573779): Ask lower filesystem to release resources for this 1326 // dentry whenever possible to reduce resource usage. 1327 for _, replaceLower := range replaced.lowerVDs { 1328 replaceLower.Dentry().MarkEvictable() 1329 } 1330 ds = appendDentry(ds, replaced) 1331 } 1332 if oldParent != newParent { 1333 newParent.dirents = nil 1334 // This can't drop the last reference on oldParent because one is held 1335 // by oldParentVD, so lock recursion is impossible. 1336 oldParent.DecRef(ctx) 1337 ds = appendDentry(ds, oldParent) 1338 newParent.IncRef() 1339 renamed.parent.Store(newParent) 1340 } 1341 renamed.name = newName 1342 if newParent.children == nil { 1343 newParent.children = make(map[string]*dentry) 1344 } 1345 newParent.children[newName] = renamed 1346 oldParent.dirents = nil 1347 1348 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &oldpop); err != nil { 1349 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout at origin after RenameAt: %v", err)) 1350 } 1351 if renamed.isDir() { 1352 if err := vfsObj.SetXattrAt(ctx, fs.creds, &newpop, &vfs.SetXattrOptions{ 1353 Name: _OVL_XATTR_OPAQUE, 1354 Value: "y", 1355 }); err != nil { 1356 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to make renamed directory opaque: %v", err)) 1357 } 1358 } 1359 1360 vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir()) 1361 return nil 1362 } 1363 1364 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 1365 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1366 var ds *[]*dentry 1367 fs.renameMu.RLock() 1368 // We need to DecRef outside of fs.mu because forgetting a dead mountpoint 1369 // could result in this filesystem being released which acquires fs.mu. 1370 var toDecRef []refs.RefCounter 1371 defer func() { 1372 for _, ref := range toDecRef { 1373 ref.DecRef(ctx) 1374 } 1375 }() 1376 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1377 start := rp.Start().Impl().(*dentry) 1378 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 1379 if err != nil { 1380 return err 1381 } 1382 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 1383 return err 1384 } 1385 if err := rp.Mount().CheckBeginWrite(); err != nil { 1386 return err 1387 } 1388 defer rp.Mount().EndWrite() 1389 name := rp.Component() 1390 if name == "." { 1391 return linuxerr.EINVAL 1392 } 1393 if name == ".." { 1394 return linuxerr.ENOTEMPTY 1395 } 1396 vfsObj := rp.VirtualFilesystem() 1397 mntns := vfs.MountNamespaceFromContext(ctx) 1398 defer mntns.DecRef(ctx) 1399 parent.dirMu.Lock() 1400 defer parent.dirMu.Unlock() 1401 1402 // Ensure that parent is copied-up before potentially holding child.copyMu 1403 // below. 1404 if err := parent.copyUpLocked(ctx); err != nil { 1405 return err 1406 } 1407 1408 // We need a dentry representing the child directory being removed in order 1409 // to verify that it's empty. 1410 child, _, err := fs.getChildLocked(ctx, parent, name, &ds) 1411 if err != nil { 1412 return err 1413 } 1414 if !child.isDir() { 1415 return linuxerr.ENOTDIR 1416 } 1417 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 1418 return err 1419 } 1420 child.dirMu.NestedLock(dirLockChild) 1421 defer child.dirMu.NestedUnlock(dirLockChild) 1422 whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx) 1423 if err != nil { 1424 return err 1425 } 1426 child.copyMu.RLock() 1427 defer child.copyMu.RUnlock() 1428 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 1429 return err 1430 } 1431 1432 pop := vfs.PathOperation{ 1433 Root: parent.upperVD, 1434 Start: parent.upperVD, 1435 Path: fspath.Parse(name), 1436 } 1437 if child.upperVD.Ok() { 1438 cleanupRecreateWhiteouts := func() { 1439 if !child.upperVD.Ok() { 1440 return 1441 } 1442 for whiteoutName, whiteoutUpper := range whiteouts { 1443 if !whiteoutUpper { 1444 continue 1445 } 1446 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &vfs.PathOperation{ 1447 Root: child.upperVD, 1448 Start: child.upperVD, 1449 Path: fspath.Parse(whiteoutName), 1450 }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 1451 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err)) 1452 } 1453 } 1454 } 1455 // Remove existing whiteouts on the upper layer. 1456 for whiteoutName, whiteoutUpper := range whiteouts { 1457 if !whiteoutUpper { 1458 continue 1459 } 1460 if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{ 1461 Root: child.upperVD, 1462 Start: child.upperVD, 1463 Path: fspath.Parse(whiteoutName), 1464 }); err != nil { 1465 vfsObj.AbortDeleteDentry(&child.vfsd) 1466 cleanupRecreateWhiteouts() 1467 return err 1468 } 1469 } 1470 // Remove the existing directory on the upper layer. 1471 if err := vfsObj.RmdirAt(ctx, fs.creds, &pop); err != nil { 1472 vfsObj.AbortDeleteDentry(&child.vfsd) 1473 cleanupRecreateWhiteouts() 1474 return err 1475 } 1476 } 1477 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &pop); err != nil { 1478 vfsObj.AbortDeleteDentry(&child.vfsd) 1479 if child.upperVD.Ok() { 1480 // Don't attempt to recover from this: the original directory is 1481 // already gone, so any dentries representing it are invalid, and 1482 // creating a new directory won't undo that. 1483 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout after removing upper layer directory during RmdirAt: %v", err)) 1484 } 1485 return err 1486 } 1487 1488 toDecRef = vfsObj.CommitDeleteDentry(ctx, &child.vfsd) 1489 delete(parent.children, name) 1490 ds = appendDentry(ds, child) 1491 parent.dirents = nil 1492 parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0 /* cookie */, vfs.InodeEvent, true /* unlinked */) 1493 return nil 1494 } 1495 1496 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 1497 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1498 var ds *[]*dentry 1499 fs.renameMu.RLock() 1500 d, err := fs.resolveLocked(ctx, rp, &ds) 1501 if err != nil { 1502 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1503 return err 1504 } 1505 err = d.setStatLocked(ctx, rp, opts) 1506 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1507 if err != nil { 1508 return err 1509 } 1510 1511 if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 1512 d.InotifyWithParent(ctx, ev, 0 /* cookie */, vfs.InodeEvent) 1513 } 1514 return nil 1515 } 1516 1517 // Precondition: d.fs.renameMu must be held for reading. 1518 func (d *dentry) setStatLocked(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1519 mode := linux.FileMode(d.mode.Load()) 1520 if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(d.uid.Load()), auth.KGID(d.gid.Load())); err != nil { 1521 return err 1522 } 1523 mnt := rp.Mount() 1524 if err := mnt.CheckBeginWrite(); err != nil { 1525 return err 1526 } 1527 defer mnt.EndWrite() 1528 if err := d.copyUpLocked(ctx); err != nil { 1529 return err 1530 } 1531 // Changes to d's attributes are serialized by d.copyMu. 1532 d.copyMu.Lock() 1533 defer d.copyMu.Unlock() 1534 if err := d.fs.vfsfs.VirtualFilesystem().SetStatAt(ctx, d.fs.creds, &vfs.PathOperation{ 1535 Root: d.upperVD, 1536 Start: d.upperVD, 1537 }, &opts); err != nil { 1538 return err 1539 } 1540 d.updateAfterSetStatLocked(&opts) 1541 return nil 1542 } 1543 1544 // StatAt implements vfs.FilesystemImpl.StatAt. 1545 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 1546 var ds *[]*dentry 1547 fs.renameMu.RLock() 1548 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1549 d, err := fs.resolveLocked(ctx, rp, &ds) 1550 if err != nil { 1551 return linux.Statx{}, err 1552 } 1553 1554 var stat linux.Statx 1555 if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { 1556 layerVD := d.topLayer() 1557 stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{ 1558 Root: layerVD, 1559 Start: layerVD, 1560 }, &vfs.StatOptions{ 1561 Mask: layerMask, 1562 Sync: opts.Sync, 1563 }) 1564 if err != nil { 1565 return linux.Statx{}, err 1566 } 1567 } 1568 d.statInternalTo(ctx, &opts, &stat) 1569 return stat, nil 1570 } 1571 1572 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 1573 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 1574 var ds *[]*dentry 1575 fs.renameMu.RLock() 1576 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1577 _, err := fs.resolveLocked(ctx, rp, &ds) 1578 if err != nil { 1579 return linux.Statfs{}, err 1580 } 1581 return fs.statFS(ctx) 1582 } 1583 1584 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 1585 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 1586 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 1587 vfsObj := fs.vfsfs.VirtualFilesystem() 1588 pop := vfs.PathOperation{ 1589 Root: parent.upperVD, 1590 Start: parent.upperVD, 1591 Path: fspath.Parse(childName), 1592 } 1593 if haveUpperWhiteout { 1594 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1595 return err 1596 } 1597 } 1598 if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil { 1599 if haveUpperWhiteout { 1600 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1601 } 1602 return err 1603 } 1604 creds := rp.Credentials() 1605 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 1606 Stat: linux.Statx{ 1607 Mask: linux.STATX_UID | linux.STATX_GID, 1608 UID: uint32(creds.EffectiveKUID), 1609 GID: uint32(creds.EffectiveKGID), 1610 }, 1611 }); err != nil { 1612 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1613 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr)) 1614 } else if haveUpperWhiteout { 1615 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1616 } 1617 return err 1618 } 1619 return nil 1620 }) 1621 } 1622 1623 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 1624 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1625 var ds *[]*dentry 1626 fs.renameMu.RLock() 1627 // We need to DecRef outside of fs.renameMu because forgetting a dead 1628 // mountpoint could result in this filesystem being released which acquires 1629 // fs.renameMu. 1630 var toDecRef []refs.RefCounter 1631 defer func() { 1632 for _, ref := range toDecRef { 1633 ref.DecRef(ctx) 1634 } 1635 }() 1636 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1637 start := rp.Start().Impl().(*dentry) 1638 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 1639 if err != nil { 1640 return err 1641 } 1642 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 1643 return err 1644 } 1645 if err := rp.Mount().CheckBeginWrite(); err != nil { 1646 return err 1647 } 1648 defer rp.Mount().EndWrite() 1649 name := rp.Component() 1650 if name == "." || name == ".." { 1651 return linuxerr.EISDIR 1652 } 1653 if rp.MustBeDir() { 1654 return linuxerr.ENOTDIR 1655 } 1656 vfsObj := rp.VirtualFilesystem() 1657 mntns := vfs.MountNamespaceFromContext(ctx) 1658 defer mntns.DecRef(ctx) 1659 parent.dirMu.Lock() 1660 defer parent.dirMu.Unlock() 1661 1662 // Ensure that parent is copied-up before potentially holding child.copyMu 1663 // below. 1664 if err := parent.copyUpLocked(ctx); err != nil { 1665 return err 1666 } 1667 1668 // We need a dentry representing the child being removed in order to verify 1669 // that it's not a directory. 1670 child, childLayer, err := fs.getChildLocked(ctx, parent, name, &ds) 1671 if err != nil { 1672 return err 1673 } 1674 if child.isDir() { 1675 return linuxerr.EISDIR 1676 } 1677 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 1678 return err 1679 } 1680 // Hold child.copyMu to prevent it from being copied-up during 1681 // deletion. 1682 child.copyMu.RLock() 1683 defer child.copyMu.RUnlock() 1684 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 1685 return err 1686 } 1687 1688 pop := vfs.PathOperation{ 1689 Root: parent.upperVD, 1690 Start: parent.upperVD, 1691 Path: fspath.Parse(name), 1692 } 1693 if childLayer == lookupLayerUpper { 1694 // Remove the existing file on the upper layer. 1695 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1696 vfsObj.AbortDeleteDentry(&child.vfsd) 1697 return err 1698 } 1699 } 1700 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &pop); err != nil { 1701 vfsObj.AbortDeleteDentry(&child.vfsd) 1702 if childLayer == lookupLayerUpper { 1703 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout after unlinking upper layer file during UnlinkAt: %v", err)) 1704 } 1705 return err 1706 } 1707 1708 toDecRef = vfsObj.CommitDeleteDentry(ctx, &child.vfsd) 1709 delete(parent.children, name) 1710 if !child.isDir() { 1711 // Once a whiteout is created, non-directory dentries on the lower layers 1712 // are no longer reachable from the overlayfs. Ask filesystems to release 1713 // their resources whenever possible. 1714 for _, lowerDentry := range child.lowerVDs { 1715 lowerDentry.Dentry().MarkEvictable() 1716 } 1717 } 1718 ds = appendDentry(ds, child) 1719 vfs.InotifyRemoveChild(ctx, &child.watches, &parent.watches, name) 1720 parent.dirents = nil 1721 return nil 1722 } 1723 1724 // isOverlayXattr returns whether the given extended attribute configures the 1725 // overlay. 1726 func isOverlayXattr(name string) bool { 1727 return strings.HasPrefix(name, _OVL_XATTR_PREFIX) 1728 } 1729 1730 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1731 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1732 var ds *[]*dentry 1733 fs.renameMu.RLock() 1734 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1735 d, err := fs.resolveLocked(ctx, rp, &ds) 1736 if err != nil { 1737 return nil, err 1738 } 1739 1740 return fs.listXattr(ctx, d, size) 1741 } 1742 1743 func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) { 1744 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1745 top := d.topLayer() 1746 names, err := vfsObj.ListXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size) 1747 if err != nil { 1748 return nil, err 1749 } 1750 1751 // Filter out all overlay attributes. 1752 n := 0 1753 for _, name := range names { 1754 if !isOverlayXattr(name) { 1755 names[n] = name 1756 n++ 1757 } 1758 } 1759 return names[:n], err 1760 } 1761 1762 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1763 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1764 var ds *[]*dentry 1765 fs.renameMu.RLock() 1766 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1767 d, err := fs.resolveLocked(ctx, rp, &ds) 1768 if err != nil { 1769 return "", err 1770 } 1771 1772 return fs.getXattr(ctx, d, rp.Credentials(), &opts) 1773 } 1774 1775 func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) { 1776 if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { 1777 return "", err 1778 } 1779 1780 // Return EOPNOTSUPP when fetching an overlay attribute. 1781 // See fs/overlayfs/super.c:ovl_own_xattr_get(). 1782 if isOverlayXattr(opts.Name) { 1783 return "", linuxerr.EOPNOTSUPP 1784 } 1785 1786 // Analogous to fs/overlayfs/super.c:ovl_other_xattr_get(). 1787 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1788 top := d.topLayer() 1789 return vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts) 1790 } 1791 1792 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1793 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1794 var ds *[]*dentry 1795 fs.renameMu.RLock() 1796 d, err := fs.resolveLocked(ctx, rp, &ds) 1797 if err != nil { 1798 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1799 return err 1800 } 1801 1802 err = fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts) 1803 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1804 if err != nil { 1805 return err 1806 } 1807 1808 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent) 1809 return nil 1810 } 1811 1812 // Precondition: fs.renameMu must be locked. 1813 func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetXattrOptions) error { 1814 if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { 1815 return err 1816 } 1817 1818 // Return EOPNOTSUPP when setting an overlay attribute. 1819 // See fs/overlayfs/super.c:ovl_own_xattr_set(). 1820 if isOverlayXattr(opts.Name) { 1821 return linuxerr.EOPNOTSUPP 1822 } 1823 1824 // Analogous to fs/overlayfs/super.c:ovl_other_xattr_set(). 1825 if err := mnt.CheckBeginWrite(); err != nil { 1826 return err 1827 } 1828 defer mnt.EndWrite() 1829 if err := d.copyUpLocked(ctx); err != nil { 1830 return err 1831 } 1832 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1833 return vfsObj.SetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts) 1834 } 1835 1836 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1837 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1838 var ds *[]*dentry 1839 fs.renameMu.RLock() 1840 d, err := fs.resolveLocked(ctx, rp, &ds) 1841 if err != nil { 1842 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1843 return err 1844 } 1845 1846 err = fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name) 1847 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1848 if err != nil { 1849 return err 1850 } 1851 1852 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent) 1853 return nil 1854 } 1855 1856 // Precondition: fs.renameMu must be locked. 1857 func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, name string) error { 1858 if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { 1859 return err 1860 } 1861 1862 // Like SetXattrAt, return EOPNOTSUPP when removing an overlay attribute. 1863 // Linux passes the remove request to xattr_handler->set. 1864 // See fs/xattr.c:vfs_removexattr(). 1865 if isOverlayXattr(name) { 1866 return linuxerr.EOPNOTSUPP 1867 } 1868 1869 if err := mnt.CheckBeginWrite(); err != nil { 1870 return err 1871 } 1872 defer mnt.EndWrite() 1873 if err := d.copyUpLocked(ctx); err != nil { 1874 return err 1875 } 1876 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1877 return vfsObj.RemoveXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name) 1878 } 1879 1880 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1881 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1882 fs.renameMu.RLock() 1883 defer fs.renameMu.RUnlock() 1884 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) 1885 } 1886 1887 // MountOptions implements vfs.FilesystemImpl.MountOptions. 1888 func (fs *filesystem) MountOptions() string { 1889 // Return the mount options from the topmost layer. 1890 var vd vfs.VirtualDentry 1891 if fs.opts.UpperRoot.Ok() { 1892 vd = fs.opts.UpperRoot 1893 } else { 1894 vd = fs.opts.LowerRoots[0] 1895 } 1896 return vd.Mount().Filesystem().Impl().MountOptions() 1897 }