github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/fsimpl/overlay/filesystem.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "fmt" 19 "strings" 20 21 "github.com/ttpreport/gvisor-ligolo/pkg/abi/linux" 22 "github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops" 23 "github.com/ttpreport/gvisor-ligolo/pkg/context" 24 "github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr" 25 "github.com/ttpreport/gvisor-ligolo/pkg/fspath" 26 "github.com/ttpreport/gvisor-ligolo/pkg/log" 27 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/kernel/auth" 28 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/socket/unix/transport" 29 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/vfs" 30 "github.com/ttpreport/gvisor-ligolo/pkg/sync" 31 ) 32 33 // _OVL_XATTR_PREFIX is an extended attribute key prefix to identify overlayfs 34 // attributes. 35 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_PREFIX 36 const _OVL_XATTR_PREFIX = linux.XATTR_TRUSTED_PREFIX + "overlay." 37 38 // _OVL_XATTR_OPAQUE is an extended attribute key whose value is set to "y" for 39 // opaque directories. 40 // Linux: fs/overlayfs/overlayfs.h:OVL_XATTR_OPAQUE 41 const _OVL_XATTR_OPAQUE = _OVL_XATTR_PREFIX + "opaque" 42 43 func isWhiteout(stat *linux.Statx) bool { 44 return stat.Mode&linux.S_IFMT == linux.S_IFCHR && stat.RdevMajor == 0 && stat.RdevMinor == 0 45 } 46 47 // Sync implements vfs.FilesystemImpl.Sync. 48 func (fs *filesystem) Sync(ctx context.Context) error { 49 if fs.opts.UpperRoot.Ok() { 50 return fs.opts.UpperRoot.Mount().Filesystem().Impl().Sync(ctx) 51 } 52 return nil 53 } 54 55 var dentrySlicePool = sync.Pool{ 56 New: func() any { 57 ds := make([]*dentry, 0, 4) // arbitrary non-zero initial capacity 58 return &ds 59 }, 60 } 61 62 func appendDentry(ds *[]*dentry, d *dentry) *[]*dentry { 63 if ds == nil { 64 ds = dentrySlicePool.Get().(*[]*dentry) 65 } 66 *ds = append(*ds, d) 67 return ds 68 } 69 70 // Preconditions: ds != nil. 71 func putDentrySlice(ds *[]*dentry) { 72 // Allow dentries to be GC'd. 73 for i := range *ds { 74 (*ds)[i] = nil 75 } 76 *ds = (*ds)[:0] 77 dentrySlicePool.Put(ds) 78 } 79 80 // renameMuRUnlockAndCheckDrop calls fs.renameMu.RUnlock(), then calls 81 // dentry.checkDropLocked on all dentries in *dsp with fs.renameMu locked for 82 // writing. 83 // 84 // dsp is a pointer-to-pointer since defer evaluates its arguments immediately, 85 // but dentry slices are allocated lazily, and it's much easier to say "defer 86 // fs.renameMuRUnlockAndCheckDrop(&ds)" than "defer func() { 87 // fs.renameMuRUnlockAndCheckDrop(ds) }()" to work around this. 88 // 89 // +checklocksreleaseread:fs.renameMu 90 func (fs *filesystem) renameMuRUnlockAndCheckDrop(ctx context.Context, dsp **[]*dentry) { 91 fs.renameMu.RUnlock() 92 if *dsp == nil { 93 return 94 } 95 ds := **dsp 96 // Only go through calling dentry.checkDropLocked() (which requires 97 // re-locking renameMu) if we actually have any dentries with zero refs. 98 checkAny := false 99 for i := range ds { 100 if ds[i].refs.Load() == 0 { 101 checkAny = true 102 break 103 } 104 } 105 if checkAny { 106 fs.renameMu.Lock() 107 for _, d := range ds { 108 d.checkDropLocked(ctx) 109 } 110 fs.renameMu.Unlock() 111 } 112 putDentrySlice(*dsp) 113 } 114 115 // +checklocksrelease:fs.renameMu 116 func (fs *filesystem) renameMuUnlockAndCheckDrop(ctx context.Context, ds **[]*dentry) { 117 if *ds == nil { 118 fs.renameMu.Unlock() 119 return 120 } 121 for _, d := range **ds { 122 d.checkDropLocked(ctx) 123 } 124 fs.renameMu.Unlock() 125 putDentrySlice(*ds) 126 } 127 128 // stepLocked resolves rp.Component() to an existing file, starting from the 129 // given directory. 130 // 131 // Dentries which may have a reference count of zero, and which therefore 132 // should be dropped once traversal is complete, are appended to ds. 133 // 134 // Preconditions: 135 // - fs.renameMu must be locked. 136 // - d.dirMu must be locked. 137 // - !rp.Done(). 138 func (fs *filesystem) stepLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, lookupLayer, bool, error) { 139 if !d.isDir() { 140 return nil, lookupLayerNone, false, linuxerr.ENOTDIR 141 } 142 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 143 return nil, lookupLayerNone, false, err 144 } 145 name := rp.Component() 146 if name == "." { 147 rp.Advance() 148 return d, d.topLookupLayer(), false, nil 149 } 150 if name == ".." { 151 if isRoot, err := rp.CheckRoot(ctx, &d.vfsd); err != nil { 152 return nil, lookupLayerNone, false, err 153 } else if isRoot || d.parent == nil { 154 rp.Advance() 155 return d, d.topLookupLayer(), false, nil 156 } 157 if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { 158 return nil, lookupLayerNone, false, err 159 } 160 rp.Advance() 161 return d.parent, d.parent.topLookupLayer(), false, nil 162 } 163 if uint64(len(name)) > fs.maxFilenameLen { 164 return nil, lookupLayerNone, false, linuxerr.ENAMETOOLONG 165 } 166 child, topLookupLayer, err := fs.getChildLocked(ctx, d, name, ds) 167 if err != nil { 168 return nil, topLookupLayer, false, err 169 } 170 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 171 return nil, lookupLayerNone, false, err 172 } 173 if child.isSymlink() && rp.ShouldFollowSymlink() { 174 target, err := child.readlink(ctx) 175 if err != nil { 176 return nil, lookupLayerNone, false, err 177 } 178 followedSymlink, err := rp.HandleSymlink(target) 179 return d, topLookupLayer, followedSymlink, err 180 } 181 rp.Advance() 182 return child, topLookupLayer, false, nil 183 } 184 185 // Preconditions: 186 // - fs.renameMu must be locked. 187 // - d.dirMu must be locked. 188 func (fs *filesystem) getChildLocked(ctx context.Context, parent *dentry, name string, ds **[]*dentry) (*dentry, lookupLayer, error) { 189 if child, ok := parent.children[name]; ok { 190 return child, child.topLookupLayer(), nil 191 } 192 child, topLookupLayer, err := fs.lookupLocked(ctx, parent, name) 193 if err != nil { 194 return nil, topLookupLayer, err 195 } 196 if parent.children == nil { 197 parent.children = make(map[string]*dentry) 198 } 199 parent.children[name] = child 200 // child's refcount is initially 0, so it may be dropped after traversal. 201 *ds = appendDentry(*ds, child) 202 return child, topLookupLayer, nil 203 } 204 205 // Preconditions: 206 // - fs.renameMu must be locked. 207 // - parent.dirMu must be locked. 208 func (fs *filesystem) lookupLocked(ctx context.Context, parent *dentry, name string) (*dentry, lookupLayer, error) { 209 childPath := fspath.Parse(name) 210 child := fs.newDentry() 211 topLookupLayer := lookupLayerNone 212 var lookupErr error 213 214 vfsObj := fs.vfsfs.VirtualFilesystem() 215 parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool { 216 childVD, err := vfsObj.GetDentryAt(ctx, fs.creds, &vfs.PathOperation{ 217 Root: parentVD, 218 Start: parentVD, 219 Path: childPath, 220 }, &vfs.GetDentryOptions{}) 221 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { 222 // The file doesn't exist on this layer. Proceed to the next one. 223 return true 224 } 225 if err != nil { 226 lookupErr = err 227 return false 228 } 229 defer childVD.DecRef(ctx) 230 231 mask := uint32(linux.STATX_TYPE) 232 if topLookupLayer == lookupLayerNone { 233 // Mode, UID, GID, and (for non-directories) inode number come from 234 // the topmost layer on which the file exists. 235 mask |= linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO 236 } 237 stat, err := vfsObj.StatAt(ctx, fs.creds, &vfs.PathOperation{ 238 Root: childVD, 239 Start: childVD, 240 }, &vfs.StatOptions{ 241 Mask: mask, 242 }) 243 if err != nil { 244 lookupErr = err 245 return false 246 } 247 if stat.Mask&mask != mask { 248 lookupErr = linuxerr.EREMOTE 249 return false 250 } 251 252 if isWhiteout(&stat) { 253 // This is a whiteout, so it "doesn't exist" on this layer, and 254 // layers below this one are ignored. 255 if isUpper { 256 topLookupLayer = lookupLayerUpperWhiteout 257 } 258 return false 259 } 260 isDir := stat.Mode&linux.S_IFMT == linux.S_IFDIR 261 if topLookupLayer != lookupLayerNone && !isDir { 262 // Directories are not merged with non-directory files from lower 263 // layers; instead, layers including and below the first 264 // non-directory file are ignored. (This file must be a directory 265 // on previous layers, since lower layers aren't searched for 266 // non-directory files.) 267 return false 268 } 269 270 // Update child to include this layer. 271 childVD.IncRef() 272 if isUpper { 273 child.upperVD = childVD 274 child.copiedUp = atomicbitops.FromUint32(1) 275 } else { 276 child.lowerVDs = append(child.lowerVDs, childVD) 277 } 278 if topLookupLayer == lookupLayerNone { 279 if isUpper { 280 topLookupLayer = lookupLayerUpper 281 } else { 282 topLookupLayer = lookupLayerLower 283 } 284 child.mode = atomicbitops.FromUint32(uint32(stat.Mode)) 285 child.uid = atomicbitops.FromUint32(stat.UID) 286 child.gid = atomicbitops.FromUint32(stat.GID) 287 child.devMajor = atomicbitops.FromUint32(stat.DevMajor) 288 child.devMinor = atomicbitops.FromUint32(stat.DevMinor) 289 child.ino = atomicbitops.FromUint64(stat.Ino) 290 } 291 292 // For non-directory files, only the topmost layer that contains a file 293 // matters. 294 if !isDir { 295 return false 296 } 297 298 // Directories use the lowest layer inode and device numbers to generate a 299 // filesystem local inode number. This way the inode number does not change 300 // after copy ups. 301 child.devMajor = atomicbitops.FromUint32(stat.DevMajor) 302 child.devMinor = atomicbitops.FromUint32(stat.DevMinor) 303 child.ino = atomicbitops.FromUint64(stat.Ino) 304 305 // Directories are merged with directories from lower layers if they 306 // are not explicitly opaque. 307 opaqueVal, err := vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{ 308 Root: childVD, 309 Start: childVD, 310 }, &vfs.GetXattrOptions{ 311 Name: _OVL_XATTR_OPAQUE, 312 Size: 1, 313 }) 314 return !(err == nil && opaqueVal == "y") 315 }) 316 317 if lookupErr != nil { 318 child.destroyLocked(ctx) 319 return nil, topLookupLayer, lookupErr 320 } 321 if !topLookupLayer.existsInOverlay() { 322 child.destroyLocked(ctx) 323 return nil, topLookupLayer, linuxerr.ENOENT 324 } 325 326 // Device and inode numbers were copied from the topmost layer above for 327 // non-directories. They were copied from the bottommost layer for 328 // directories. Override them if necessary. We can use RacyLoad() because 329 // child is still being initialized. 330 if child.isDir() { 331 child.ino.Store(fs.newDirIno(child.devMajor.RacyLoad(), child.devMinor.RacyLoad(), child.ino.RacyLoad())) 332 child.devMajor = atomicbitops.FromUint32(linux.UNNAMED_MAJOR) 333 child.devMinor = atomicbitops.FromUint32(fs.dirDevMinor) 334 } else if !child.upperVD.Ok() { 335 childDevMinor, err := fs.getLowerDevMinor(child.devMajor.RacyLoad(), child.devMinor.RacyLoad()) 336 if err != nil { 337 ctx.Infof("overlay.filesystem.lookupLocked: failed to map lower layer device number (%d, %d) to an overlay-specific device number: %v", child.devMajor.RacyLoad(), child.devMinor.RacyLoad(), err) 338 child.destroyLocked(ctx) 339 return nil, topLookupLayer, err 340 } 341 child.devMajor = atomicbitops.FromUint32(linux.UNNAMED_MAJOR) 342 child.devMinor = atomicbitops.FromUint32(childDevMinor) 343 } 344 345 parent.IncRef() 346 child.parent = parent 347 child.name = name 348 return child, topLookupLayer, nil 349 } 350 351 // lookupLayerLocked is similar to lookupLocked, but only returns information 352 // about the file rather than a dentry. 353 // 354 // Preconditions: 355 // - fs.renameMu must be locked. 356 // - parent.dirMu must be locked. 357 func (fs *filesystem) lookupLayerLocked(ctx context.Context, parent *dentry, name string) (lookupLayer, error) { 358 childPath := fspath.Parse(name) 359 lookupLayer := lookupLayerNone 360 var lookupErr error 361 362 parent.iterLayers(func(parentVD vfs.VirtualDentry, isUpper bool) bool { 363 stat, err := fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{ 364 Root: parentVD, 365 Start: parentVD, 366 Path: childPath, 367 }, &vfs.StatOptions{ 368 Mask: linux.STATX_TYPE, 369 }) 370 if linuxerr.Equals(linuxerr.ENOENT, err) || linuxerr.Equals(linuxerr.ENAMETOOLONG, err) { 371 // The file doesn't exist on this layer. Proceed to the next 372 // one. 373 return true 374 } 375 if err != nil { 376 lookupErr = err 377 return false 378 } 379 if stat.Mask&linux.STATX_TYPE == 0 { 380 // Linux's overlayfs tends to return EREMOTE in cases where a file 381 // is unusable for reasons that are not better captured by another 382 // errno. 383 lookupErr = linuxerr.EREMOTE 384 return false 385 } 386 if isWhiteout(&stat) { 387 // This is a whiteout, so it "doesn't exist" on this layer, and 388 // layers below this one are ignored. 389 if isUpper { 390 lookupLayer = lookupLayerUpperWhiteout 391 } 392 return false 393 } 394 // The file exists; we can stop searching. 395 if isUpper { 396 lookupLayer = lookupLayerUpper 397 } else { 398 lookupLayer = lookupLayerLower 399 } 400 return false 401 }) 402 403 return lookupLayer, lookupErr 404 } 405 406 type lookupLayer int 407 408 const ( 409 // lookupLayerNone indicates that no file exists at the given path on the 410 // upper layer, and is either whited out or does not exist on lower layers. 411 // Therefore, the file does not exist in the overlay filesystem, and file 412 // creation may proceed normally (if an upper layer exists). 413 lookupLayerNone lookupLayer = iota 414 415 // lookupLayerLower indicates that no file exists at the given path on the 416 // upper layer, but exists on a lower layer. Therefore, the file exists in 417 // the overlay filesystem, but must be copied-up before mutation. 418 lookupLayerLower 419 420 // lookupLayerUpper indicates that a non-whiteout file exists at the given 421 // path on the upper layer. Therefore, the file exists in the overlay 422 // filesystem, and is already copied-up. 423 lookupLayerUpper 424 425 // lookupLayerUpperWhiteout indicates that a whiteout exists at the given 426 // path on the upper layer. Therefore, the file does not exist in the 427 // overlay filesystem, and file creation must remove the whiteout before 428 // proceeding. 429 lookupLayerUpperWhiteout 430 ) 431 432 func (ll lookupLayer) existsInOverlay() bool { 433 return ll == lookupLayerLower || ll == lookupLayerUpper 434 } 435 436 // walkParentDirLocked resolves all but the last path component of rp to an 437 // existing directory, starting from the given directory (which is usually 438 // rp.Start().Impl().(*dentry)). It does not check that the returned directory 439 // is searchable by the provider of rp. 440 // 441 // Preconditions: 442 // - fs.renameMu must be locked. 443 // - !rp.Done(). 444 func (fs *filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *dentry, ds **[]*dentry) (*dentry, error) { 445 for !rp.Final() { 446 d.dirMu.Lock() 447 next, _, _, err := fs.stepLocked(ctx, rp, d, ds) 448 d.dirMu.Unlock() 449 if err != nil { 450 return nil, err 451 } 452 d = next 453 } 454 if !d.isDir() { 455 return nil, linuxerr.ENOTDIR 456 } 457 return d, nil 458 } 459 460 // resolveLocked resolves rp to an existing file. 461 // 462 // Preconditions: fs.renameMu must be locked. 463 func (fs *filesystem) resolveLocked(ctx context.Context, rp *vfs.ResolvingPath, ds **[]*dentry) (*dentry, error) { 464 d := rp.Start().Impl().(*dentry) 465 for !rp.Done() { 466 d.dirMu.Lock() 467 next, _, _, err := fs.stepLocked(ctx, rp, d, ds) 468 d.dirMu.Unlock() 469 if err != nil { 470 return nil, err 471 } 472 d = next 473 } 474 if rp.MustBeDir() && !d.isDir() { 475 return nil, linuxerr.ENOTDIR 476 } 477 return d, nil 478 } 479 480 type createType int 481 482 const ( 483 createNonDirectory createType = iota 484 createDirectory 485 createSyntheticMountpoint 486 ) 487 488 // doCreateAt checks that creating a file at rp is permitted, then invokes 489 // create to do so. 490 // 491 // Preconditions: 492 // - !rp.Done(). 493 // - For the final path component in rp, !rp.ShouldFollowSymlink(). 494 func (fs *filesystem) doCreateAt(ctx context.Context, rp *vfs.ResolvingPath, ct createType, create func(parent *dentry, name string, haveUpperWhiteout bool) error) error { 495 var ds *[]*dentry 496 fs.renameMu.RLock() 497 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 498 start := rp.Start().Impl().(*dentry) 499 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 500 if err != nil { 501 return err 502 } 503 name := rp.Component() 504 if name == "." || name == ".." { 505 return linuxerr.EEXIST 506 } 507 if uint64(len(name)) > fs.maxFilenameLen { 508 return linuxerr.ENAMETOOLONG 509 } 510 if parent.vfsd.IsDead() { 511 return linuxerr.ENOENT 512 } 513 514 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 515 return err 516 } 517 518 parent.dirMu.Lock() 519 defer parent.dirMu.Unlock() 520 521 // Determine if a file already exists at name. 522 if _, ok := parent.children[name]; ok { 523 return linuxerr.EEXIST 524 } 525 childLayer, err := fs.lookupLayerLocked(ctx, parent, name) 526 if err != nil { 527 return err 528 } 529 if childLayer.existsInOverlay() { 530 return linuxerr.EEXIST 531 } 532 533 if ct == createNonDirectory && rp.MustBeDir() { 534 return linuxerr.ENOENT 535 } 536 537 mnt := rp.Mount() 538 if err := mnt.CheckBeginWrite(); err != nil { 539 return err 540 } 541 defer mnt.EndWrite() 542 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 543 return err 544 } 545 // Ensure that the parent directory is copied-up so that we can create the 546 // new file in the upper layer. 547 if err := parent.copyUpMaybeSyntheticMountpointLocked(ctx, ct == createSyntheticMountpoint); err != nil { 548 return err 549 } 550 551 // Finally create the new file. 552 if err := create(parent, name, childLayer == lookupLayerUpperWhiteout); err != nil { 553 return err 554 } 555 556 parent.dirents = nil 557 ev := linux.IN_CREATE 558 if ct != createNonDirectory { 559 ev |= linux.IN_ISDIR 560 } 561 parent.watches.Notify(ctx, name, uint32(ev), 0 /* cookie */, vfs.InodeEvent, false /* unlinked */) 562 return nil 563 } 564 565 // CreateWhiteout creates a whiteout at pop. Whiteouts are created with 566 // character devices with device ID = 0. 567 // 568 // Preconditions: pop's parent directory has been copied up. 569 func CreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, pop *vfs.PathOperation) error { 570 return vfsObj.MknodAt(ctx, creds, pop, &vfs.MknodOptions{ 571 Mode: linux.S_IFCHR, // permissions == include/linux/fs.h:WHITEOUT_MODE == 0 572 // DevMajor == DevMinor == 0, from include/linux/fs.h:WHITEOUT_DEV 573 }) 574 } 575 576 func (fs *filesystem) cleanupRecreateWhiteout(ctx context.Context, vfsObj *vfs.VirtualFilesystem, pop *vfs.PathOperation) { 577 if err := CreateWhiteout(ctx, vfsObj, fs.creds, pop); err != nil { 578 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate whiteout after failed file creation: %v", err)) 579 } 580 } 581 582 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 583 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 584 var ds *[]*dentry 585 fs.renameMu.RLock() 586 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 587 d, err := fs.resolveLocked(ctx, rp, &ds) 588 if err != nil { 589 return err 590 } 591 if err := d.checkPermissions(creds, ats); err != nil { 592 return err 593 } 594 if !ats.MayWrite() { 595 // Not requesting write permission. Allow it. 596 return nil 597 } 598 if rp.Mount().ReadOnly() { 599 return linuxerr.EROFS 600 } 601 if !d.upperVD.Ok() && !d.canBeCopiedUp() { 602 // A lower layer file that can not be copied up, can not be written to. 603 // Error out here. Don't give the application false hopes. 604 return linuxerr.EACCES 605 } 606 return nil 607 } 608 609 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 610 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 611 var ds *[]*dentry 612 fs.renameMu.RLock() 613 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 614 d, err := fs.resolveLocked(ctx, rp, &ds) 615 if err != nil { 616 return nil, err 617 } 618 if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 619 return nil, err 620 } 621 layerVD := d.topLayer() 622 return fs.vfsfs.VirtualFilesystem().BoundEndpointAt(ctx, fs.creds, &vfs.PathOperation{ 623 Root: layerVD, 624 Start: layerVD, 625 }, &opts) 626 } 627 628 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 629 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 630 var ds *[]*dentry 631 fs.renameMu.RLock() 632 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 633 d, err := fs.resolveLocked(ctx, rp, &ds) 634 if err != nil { 635 return nil, err 636 } 637 if opts.CheckSearchable { 638 if !d.isDir() { 639 return nil, linuxerr.ENOTDIR 640 } 641 if err := d.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 642 return nil, err 643 } 644 } 645 d.IncRef() 646 return &d.vfsd, nil 647 } 648 649 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 650 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 651 var ds *[]*dentry 652 fs.renameMu.RLock() 653 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 654 start := rp.Start().Impl().(*dentry) 655 d, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 656 if err != nil { 657 return nil, err 658 } 659 d.IncRef() 660 return &d.vfsd, nil 661 } 662 663 // LinkAt implements vfs.FilesystemImpl.LinkAt. 664 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 665 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 666 if rp.Mount() != vd.Mount() { 667 return linuxerr.EXDEV 668 } 669 old := vd.Dentry().Impl().(*dentry) 670 if old.isDir() { 671 return linuxerr.EPERM 672 } 673 if err := old.copyUpLocked(ctx); err != nil { 674 return err 675 } 676 vfsObj := fs.vfsfs.VirtualFilesystem() 677 newpop := vfs.PathOperation{ 678 Root: parent.upperVD, 679 Start: parent.upperVD, 680 Path: fspath.Parse(childName), 681 } 682 if haveUpperWhiteout { 683 if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil { 684 return err 685 } 686 } 687 if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{ 688 Root: old.upperVD, 689 Start: old.upperVD, 690 }, &newpop); err != nil { 691 if haveUpperWhiteout { 692 fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop) 693 } 694 return err 695 } 696 creds := rp.Credentials() 697 if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{ 698 Stat: linux.Statx{ 699 Mask: linux.STATX_UID | linux.STATX_GID, 700 UID: uint32(creds.EffectiveKUID), 701 GID: uint32(creds.EffectiveKGID), 702 }, 703 }); err != nil { 704 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil { 705 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr)) 706 } else if haveUpperWhiteout { 707 fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop) 708 } 709 return err 710 } 711 old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */) 712 return nil 713 }) 714 } 715 716 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 717 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 718 ct := createDirectory 719 if opts.ForSyntheticMountpoint { 720 ct = createSyntheticMountpoint 721 } 722 return fs.doCreateAt(ctx, rp, ct, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 723 vfsObj := fs.vfsfs.VirtualFilesystem() 724 pop := vfs.PathOperation{ 725 Root: parent.upperVD, 726 Start: parent.upperVD, 727 Path: fspath.Parse(childName), 728 } 729 if haveUpperWhiteout { 730 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 731 return err 732 } 733 } 734 if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil { 735 if haveUpperWhiteout { 736 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 737 } 738 return err 739 } 740 741 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 742 Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()), 743 }); err != nil { 744 if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil { 745 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr)) 746 } else if haveUpperWhiteout { 747 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 748 } 749 return err 750 } 751 if haveUpperWhiteout { 752 // A whiteout is being replaced with this new directory. There may be 753 // directories on lower layers (previously hidden by the whiteout) that 754 // the new directory should not be merged with, so mark as opaque. 755 // See fs/overlayfs/dir.c:ovl_create_over_whiteout() -> ovl_set_opaque(). 756 if err := vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{ 757 Name: _OVL_XATTR_OPAQUE, 758 Value: "y", 759 }); err != nil { 760 if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil { 761 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt set-opaque failure: %v", cleanupErr)) 762 } else { 763 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 764 } 765 return err 766 } 767 } else if len(parent.lowerVDs) > 0 { 768 // If haveUpperWhiteout is false and the parent is merged, then we should 769 // apply an optimization. We know that nothing exists on the parent's 770 // lower layers. Otherwise doCreateAt() would have failed with EEXIST. 771 // Mark the new directory opaque to avoid unnecessary lower lookups in 772 // fs.lookupLocked(). Allow it to fail since this is an optimization. 773 // See fs/overlayfs/dir.c:ovl_create_upper() -> ovl_set_opaque(). 774 _ = vfsObj.SetXattrAt(ctx, fs.creds, &pop, &vfs.SetXattrOptions{ 775 Name: _OVL_XATTR_OPAQUE, 776 Value: "y", 777 }) 778 } 779 return nil 780 }) 781 } 782 783 // MknodAt implements vfs.FilesystemImpl.MknodAt. 784 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 785 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 786 // Disallow attempts to create whiteouts. 787 if opts.Mode&linux.S_IFMT == linux.S_IFCHR && opts.DevMajor == 0 && opts.DevMinor == 0 { 788 return linuxerr.EPERM 789 } 790 vfsObj := fs.vfsfs.VirtualFilesystem() 791 pop := vfs.PathOperation{ 792 Root: parent.upperVD, 793 Start: parent.upperVD, 794 Path: fspath.Parse(childName), 795 } 796 if haveUpperWhiteout { 797 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 798 return err 799 } 800 } 801 if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil { 802 if haveUpperWhiteout { 803 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 804 } 805 return err 806 } 807 creds := rp.Credentials() 808 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 809 Stat: parent.newChildOwnerStat(opts.Mode, creds), 810 }); err != nil { 811 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 812 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr)) 813 } else if haveUpperWhiteout { 814 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 815 } 816 return err 817 } 818 return nil 819 }) 820 } 821 822 // OpenAt implements vfs.FilesystemImpl.OpenAt. 823 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 824 mayCreate := opts.Flags&linux.O_CREAT != 0 825 mustCreate := opts.Flags&(linux.O_CREAT|linux.O_EXCL) == (linux.O_CREAT | linux.O_EXCL) 826 827 var ds *[]*dentry 828 fs.renameMu.RLock() 829 unlocked := false 830 unlock := func() { 831 if !unlocked { 832 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 833 unlocked = true 834 } 835 } 836 defer unlock() 837 838 start := rp.Start().Impl().(*dentry) 839 if rp.Done() { 840 if mayCreate && rp.MustBeDir() { 841 return nil, linuxerr.EISDIR 842 } 843 if mustCreate { 844 return nil, linuxerr.EEXIST 845 } 846 if err := start.ensureOpenableLocked(ctx, rp, &opts); err != nil { 847 return nil, err 848 } 849 start.IncRef() 850 defer start.DecRef(ctx) 851 unlock() 852 return start.openCopiedUp(ctx, rp, &opts) 853 } 854 855 afterTrailingSymlink: 856 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 857 if err != nil { 858 return nil, err 859 } 860 // Check for search permission in the parent directory. 861 if err := parent.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 862 return nil, err 863 } 864 // Reject attempts to open directories with O_CREAT. 865 if mayCreate && rp.MustBeDir() { 866 return nil, linuxerr.EISDIR 867 } 868 // Determine whether or not we need to create a file. 869 parent.dirMu.Lock() 870 child, topLookupLayer, followedSymlink, err := fs.stepLocked(ctx, rp, parent, &ds) 871 if followedSymlink { 872 parent.dirMu.Unlock() 873 if mustCreate { 874 // EEXIST must be returned if an existing symlink is opened with O_EXCL. 875 return nil, linuxerr.EEXIST 876 } 877 if err != nil { 878 // If followedSymlink && err != nil, then this symlink resolution error 879 // must be handled by the VFS layer. 880 return nil, err 881 } 882 start = parent 883 goto afterTrailingSymlink 884 } 885 if linuxerr.Equals(linuxerr.ENOENT, err) && mayCreate { 886 fd, err := fs.createAndOpenLocked(ctx, rp, parent, &opts, &ds, topLookupLayer == lookupLayerUpperWhiteout) 887 parent.dirMu.Unlock() 888 return fd, err 889 } 890 parent.dirMu.Unlock() 891 if err != nil { 892 return nil, err 893 } 894 if mustCreate { 895 return nil, linuxerr.EEXIST 896 } 897 if rp.MustBeDir() && !child.isDir() { 898 return nil, linuxerr.ENOTDIR 899 } 900 if err := child.ensureOpenableLocked(ctx, rp, &opts); err != nil { 901 return nil, err 902 } 903 child.IncRef() 904 defer child.DecRef(ctx) 905 unlock() 906 return child.openCopiedUp(ctx, rp, &opts) 907 } 908 909 // Preconditions: filesystem.renameMu must be locked. 910 func (d *dentry) ensureOpenableLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) error { 911 ats := vfs.AccessTypesForOpenFlags(opts) 912 if err := d.checkPermissions(rp.Credentials(), ats); err != nil { 913 return err 914 } 915 if d.isDir() { 916 if ats.MayWrite() { 917 return linuxerr.EISDIR 918 } 919 if opts.Flags&linux.O_CREAT != 0 { 920 return linuxerr.EISDIR 921 } 922 if opts.Flags&linux.O_DIRECT != 0 { 923 return linuxerr.EINVAL 924 } 925 return nil 926 } 927 928 if !ats.MayWrite() { 929 return nil 930 } 931 932 // Copy up! 933 if err := rp.Mount().CheckBeginWrite(); err != nil { 934 return err 935 } 936 defer rp.Mount().EndWrite() 937 return d.copyUpLocked(ctx) 938 } 939 940 // Preconditions: If vfs.AccessTypesForOpenFlags(opts).MayWrite(), then d has 941 // been copied up. 942 func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { 943 mnt := rp.Mount() 944 945 // Directory FDs open FDs from each layer when directory entries are read, 946 // so they don't require opening an FD from d.topLayer() up front. 947 ftype := d.mode.Load() & linux.S_IFMT 948 if ftype == linux.S_IFDIR { 949 fd := &directoryFD{} 950 fd.LockFD.Init(&d.locks) 951 if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ 952 UseDentryMetadata: true, 953 }); err != nil { 954 return nil, err 955 } 956 return &fd.vfsfd, nil 957 } 958 959 layerVD, isUpper := d.topLayerInfo() 960 layerFD, err := rp.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 961 Root: layerVD, 962 Start: layerVD, 963 }, opts) 964 if err != nil { 965 return nil, err 966 } 967 if ftype != linux.S_IFREG { 968 return layerFD, nil 969 } 970 layerFlags := layerFD.StatusFlags() 971 fd := ®ularFileFD{ 972 copiedUp: isUpper, 973 cachedFD: layerFD, 974 cachedFlags: layerFlags, 975 } 976 fd.LockFD.Init(&d.locks) 977 layerFDOpts := layerFD.Options() 978 if err := fd.vfsfd.Init(fd, layerFlags, mnt, &d.vfsd, &layerFDOpts); err != nil { 979 layerFD.DecRef(ctx) 980 return nil, err 981 } 982 return &fd.vfsfd, nil 983 } 984 985 // Preconditions: 986 // - parent.dirMu must be locked. 987 // - parent does not already contain a child named rp.Component(). 988 func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *dentry, opts *vfs.OpenOptions, ds **[]*dentry, haveUpperWhiteout bool) (*vfs.FileDescription, error) { 989 creds := rp.Credentials() 990 if err := parent.checkPermissions(creds, vfs.MayWrite); err != nil { 991 return nil, err 992 } 993 if parent.vfsd.IsDead() { 994 return nil, linuxerr.ENOENT 995 } 996 mnt := rp.Mount() 997 if err := mnt.CheckBeginWrite(); err != nil { 998 return nil, err 999 } 1000 defer mnt.EndWrite() 1001 1002 if err := parent.copyUpLocked(ctx); err != nil { 1003 return nil, err 1004 } 1005 1006 vfsObj := fs.vfsfs.VirtualFilesystem() 1007 childName := rp.Component() 1008 pop := vfs.PathOperation{ 1009 Root: parent.upperVD, 1010 Start: parent.upperVD, 1011 Path: fspath.Parse(childName), 1012 } 1013 // Unlink the whiteout if it exists. 1014 if haveUpperWhiteout { 1015 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1016 log.Warningf("overlay.filesystem.createAndOpenLocked: failed to unlink whiteout: %v", err) 1017 return nil, err 1018 } 1019 } 1020 // Create the file on the upper layer, and get an FD representing it. 1021 upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{ 1022 Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL, 1023 Mode: opts.Mode, 1024 }) 1025 if err != nil { 1026 if haveUpperWhiteout { 1027 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1028 } 1029 return nil, err 1030 } 1031 1032 // Change the file's owner to the caller. We can't use upperFD.SetStat() 1033 // because it will pick up creds from ctx. 1034 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 1035 Stat: parent.newChildOwnerStat(opts.Mode, creds), 1036 }); err != nil { 1037 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1038 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr)) 1039 } else if haveUpperWhiteout { 1040 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1041 } 1042 return nil, err 1043 } 1044 // Re-lookup to get a dentry representing the new file, which is needed for 1045 // the returned FD. 1046 child, _, err := fs.getChildLocked(ctx, parent, childName, ds) 1047 if err != nil { 1048 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1049 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) dentry lookup failure: %v", cleanupErr)) 1050 } else if haveUpperWhiteout { 1051 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1052 } 1053 return nil, err 1054 } 1055 // Finally construct the overlay FD. Below this point, we don't perform 1056 // cleanup (the file was created successfully even if we can no longer open 1057 // it for some reason). 1058 parent.dirents = nil 1059 upperFlags := upperFD.StatusFlags() 1060 fd := ®ularFileFD{ 1061 copiedUp: true, 1062 cachedFD: upperFD, 1063 cachedFlags: upperFlags, 1064 } 1065 fd.LockFD.Init(&child.locks) 1066 upperFDOpts := upperFD.Options() 1067 if err := fd.vfsfd.Init(fd, upperFlags, mnt, &child.vfsd, &upperFDOpts); err != nil { 1068 upperFD.DecRef(ctx) 1069 return nil, err 1070 } 1071 parent.watches.Notify(ctx, childName, linux.IN_CREATE, 0 /* cookie */, vfs.PathEvent, false /* unlinked */) 1072 return &fd.vfsfd, nil 1073 } 1074 1075 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 1076 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 1077 var ds *[]*dentry 1078 fs.renameMu.RLock() 1079 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1080 d, err := fs.resolveLocked(ctx, rp, &ds) 1081 if err != nil { 1082 return "", err 1083 } 1084 layerVD := d.topLayer() 1085 return fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 1086 Root: layerVD, 1087 Start: layerVD, 1088 }) 1089 } 1090 1091 // RenameAt implements vfs.FilesystemImpl.RenameAt. 1092 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 1093 // Resolve newParent first to verify that it's on this Mount. 1094 var ds *[]*dentry 1095 fs.renameMu.Lock() 1096 defer fs.renameMuUnlockAndCheckDrop(ctx, &ds) 1097 newParent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*dentry), &ds) 1098 if err != nil { 1099 return err 1100 } 1101 1102 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 1103 return linuxerr.EINVAL 1104 } 1105 1106 newName := rp.Component() 1107 if newName == "." || newName == ".." { 1108 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1109 return linuxerr.EEXIST 1110 } 1111 return linuxerr.EBUSY 1112 } 1113 if uint64(len(newName)) > fs.maxFilenameLen { 1114 return linuxerr.ENAMETOOLONG 1115 } 1116 // Do not check for newName length, since different filesystem 1117 // implementations impose different name limits. upperfs.RenameAt() will fail 1118 // appropriately if it has to. 1119 mnt := rp.Mount() 1120 if mnt != oldParentVD.Mount() { 1121 return linuxerr.EXDEV 1122 } 1123 if err := mnt.CheckBeginWrite(); err != nil { 1124 return err 1125 } 1126 defer mnt.EndWrite() 1127 1128 oldParent := oldParentVD.Dentry().Impl().(*dentry) 1129 creds := rp.Credentials() 1130 if err := oldParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1131 return err 1132 } 1133 // We need a dentry representing the renamed file since, if it's a 1134 // directory, we need to check for write permission on it. 1135 oldParent.dirMu.Lock() 1136 defer oldParent.dirMu.Unlock() 1137 renamed, _, err := fs.getChildLocked(ctx, oldParent, oldName, &ds) 1138 if err != nil { 1139 return err 1140 } 1141 if err := oldParent.mayDelete(creds, renamed); err != nil { 1142 return err 1143 } 1144 if renamed.isDir() { 1145 if renamed == newParent || genericIsAncestorDentry(renamed, newParent) { 1146 return linuxerr.EINVAL 1147 } 1148 if oldParent != newParent { 1149 if err := renamed.checkPermissions(creds, vfs.MayWrite); err != nil { 1150 return err 1151 } 1152 } 1153 } else { 1154 if opts.MustBeDir || rp.MustBeDir() { 1155 return linuxerr.ENOTDIR 1156 } 1157 } 1158 1159 if oldParent != newParent { 1160 if err := newParent.checkPermissions(creds, vfs.MayWrite|vfs.MayExec); err != nil { 1161 return err 1162 } 1163 newParent.dirMu.NestedLock(dirLockNew) 1164 defer newParent.dirMu.NestedUnlock(dirLockNew) 1165 } 1166 if newParent.vfsd.IsDead() { 1167 return linuxerr.ENOENT 1168 } 1169 var ( 1170 replaced *dentry 1171 replacedVFSD *vfs.Dentry 1172 replacedLayer lookupLayer 1173 whiteouts map[string]bool 1174 ) 1175 replaced, replacedLayer, err = fs.getChildLocked(ctx, newParent, newName, &ds) 1176 if err != nil && !linuxerr.Equals(linuxerr.ENOENT, err) { 1177 return err 1178 } 1179 if replaced != nil { 1180 if opts.Flags&linux.RENAME_NOREPLACE != 0 { 1181 return linuxerr.EEXIST 1182 } 1183 replacedVFSD = &replaced.vfsd 1184 if replaced.isDir() { 1185 if !renamed.isDir() { 1186 return linuxerr.EISDIR 1187 } 1188 if genericIsAncestorDentry(replaced, renamed) { 1189 return linuxerr.ENOTEMPTY 1190 } 1191 replaced.dirMu.NestedLock(dirLockReplaced) 1192 defer replaced.dirMu.NestedUnlock(dirLockReplaced) 1193 whiteouts, err = replaced.collectWhiteoutsForRmdirLocked(ctx) 1194 if err != nil { 1195 return err 1196 } 1197 } else { 1198 if rp.MustBeDir() || renamed.isDir() { 1199 return linuxerr.ENOTDIR 1200 } 1201 } 1202 } 1203 1204 if oldParent == newParent && oldName == newName { 1205 return nil 1206 } 1207 1208 // renamed and oldParent need to be copied-up before they're renamed on the 1209 // upper layer. 1210 if err := renamed.copyUpLocked(ctx); err != nil { 1211 return err 1212 } 1213 // If renamed is a directory, all of its descendants need to be copied-up 1214 // before they're renamed on the upper layer. 1215 if renamed.isDir() { 1216 if err := renamed.copyUpDescendantsLocked(ctx, &ds); err != nil { 1217 return err 1218 } 1219 } 1220 // newParent must be copied-up before it can contain renamed on the upper 1221 // layer. 1222 if err := newParent.copyUpLocked(ctx); err != nil { 1223 return err 1224 } 1225 // If replaced exists, it doesn't need to be copied-up, but we do need to 1226 // serialize with copy-up. Holding renameMu for writing should be 1227 // sufficient, but out of an abundance of caution... 1228 if replaced != nil { 1229 replaced.copyMu.RLock() 1230 defer replaced.copyMu.RUnlock() 1231 } 1232 1233 vfsObj := rp.VirtualFilesystem() 1234 mntns := vfs.MountNamespaceFromContext(ctx) 1235 defer mntns.DecRef(ctx) 1236 if err := vfsObj.PrepareRenameDentry(mntns, &renamed.vfsd, replacedVFSD); err != nil { 1237 return err 1238 } 1239 1240 newpop := vfs.PathOperation{ 1241 Root: newParent.upperVD, 1242 Start: newParent.upperVD, 1243 Path: fspath.Parse(newName), 1244 } 1245 1246 needRecreateWhiteouts := false 1247 cleanupRecreateWhiteouts := func() { 1248 if !needRecreateWhiteouts { 1249 return 1250 } 1251 for whiteoutName, whiteoutUpper := range whiteouts { 1252 if !whiteoutUpper { 1253 continue 1254 } 1255 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &vfs.PathOperation{ 1256 Root: replaced.upperVD, 1257 Start: replaced.upperVD, 1258 Path: fspath.Parse(whiteoutName), 1259 }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 1260 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RenameAt failure: %v", err)) 1261 } 1262 } 1263 } 1264 if renamed.isDir() { 1265 if replacedLayer == lookupLayerUpper { 1266 // Remove whiteouts from the directory being replaced. 1267 needRecreateWhiteouts = true 1268 for whiteoutName, whiteoutUpper := range whiteouts { 1269 if !whiteoutUpper { 1270 continue 1271 } 1272 if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{ 1273 Root: replaced.upperVD, 1274 Start: replaced.upperVD, 1275 Path: fspath.Parse(whiteoutName), 1276 }); err != nil { 1277 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1278 cleanupRecreateWhiteouts() 1279 return err 1280 } 1281 } 1282 } else if replacedLayer == lookupLayerUpperWhiteout { 1283 // We need to explicitly remove the whiteout since otherwise rename 1284 // on the upper layer will fail with ENOTDIR. 1285 if err := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); err != nil { 1286 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1287 return err 1288 } 1289 } 1290 } 1291 1292 // Essentially no gVisor filesystem supports RENAME_WHITEOUT, so just do a 1293 // regular rename and create the whiteout at the origin manually. Unlike 1294 // RENAME_WHITEOUT, this isn't atomic with respect to other users of the 1295 // upper filesystem, but this is already the case for virtually all other 1296 // overlay filesystem operations too. 1297 oldpop := vfs.PathOperation{ 1298 Root: oldParent.upperVD, 1299 Start: oldParent.upperVD, 1300 Path: fspath.Parse(oldName), 1301 } 1302 if err := vfsObj.RenameAt(ctx, creds, &oldpop, &newpop, &opts); err != nil { 1303 vfsObj.AbortRenameDentry(&renamed.vfsd, replacedVFSD) 1304 cleanupRecreateWhiteouts() 1305 return err 1306 } 1307 1308 // Below this point, the renamed dentry is now at newpop, and anything we 1309 // replaced is gone forever. Commit the rename, update the overlay 1310 // filesystem tree, and abandon attempts to recover from errors. 1311 vfsObj.CommitRenameReplaceDentry(ctx, &renamed.vfsd, replacedVFSD) 1312 delete(oldParent.children, oldName) 1313 if replaced != nil { 1314 // Lower dentries of replaced are not reachable from the overlay anymore. 1315 // NOTE(b/237573779): Ask lower filesystem to release resources for this 1316 // dentry whenever possible to reduce resource usage. 1317 for _, replaceLower := range replaced.lowerVDs { 1318 replaceLower.Dentry().MarkEvictable() 1319 } 1320 ds = appendDentry(ds, replaced) 1321 } 1322 if oldParent != newParent { 1323 newParent.dirents = nil 1324 // This can't drop the last reference on oldParent because one is held 1325 // by oldParentVD, so lock recursion is impossible. 1326 oldParent.DecRef(ctx) 1327 ds = appendDentry(ds, oldParent) 1328 newParent.IncRef() 1329 renamed.parent = newParent 1330 } 1331 renamed.name = newName 1332 if newParent.children == nil { 1333 newParent.children = make(map[string]*dentry) 1334 } 1335 newParent.children[newName] = renamed 1336 oldParent.dirents = nil 1337 1338 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &oldpop); err != nil { 1339 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout at origin after RenameAt: %v", err)) 1340 } 1341 if renamed.isDir() { 1342 if err := vfsObj.SetXattrAt(ctx, fs.creds, &newpop, &vfs.SetXattrOptions{ 1343 Name: _OVL_XATTR_OPAQUE, 1344 Value: "y", 1345 }); err != nil { 1346 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to make renamed directory opaque: %v", err)) 1347 } 1348 } 1349 1350 vfs.InotifyRename(ctx, &renamed.watches, &oldParent.watches, &newParent.watches, oldName, newName, renamed.isDir()) 1351 return nil 1352 } 1353 1354 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 1355 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1356 var ds *[]*dentry 1357 fs.renameMu.RLock() 1358 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1359 start := rp.Start().Impl().(*dentry) 1360 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 1361 if err != nil { 1362 return err 1363 } 1364 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 1365 return err 1366 } 1367 if err := rp.Mount().CheckBeginWrite(); err != nil { 1368 return err 1369 } 1370 defer rp.Mount().EndWrite() 1371 name := rp.Component() 1372 if name == "." { 1373 return linuxerr.EINVAL 1374 } 1375 if name == ".." { 1376 return linuxerr.ENOTEMPTY 1377 } 1378 vfsObj := rp.VirtualFilesystem() 1379 mntns := vfs.MountNamespaceFromContext(ctx) 1380 defer mntns.DecRef(ctx) 1381 parent.dirMu.Lock() 1382 defer parent.dirMu.Unlock() 1383 1384 // Ensure that parent is copied-up before potentially holding child.copyMu 1385 // below. 1386 if err := parent.copyUpLocked(ctx); err != nil { 1387 return err 1388 } 1389 1390 // We need a dentry representing the child directory being removed in order 1391 // to verify that it's empty. 1392 child, _, err := fs.getChildLocked(ctx, parent, name, &ds) 1393 if err != nil { 1394 return err 1395 } 1396 if !child.isDir() { 1397 return linuxerr.ENOTDIR 1398 } 1399 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 1400 return err 1401 } 1402 child.dirMu.NestedLock(dirLockChild) 1403 defer child.dirMu.NestedUnlock(dirLockChild) 1404 whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx) 1405 if err != nil { 1406 return err 1407 } 1408 child.copyMu.RLock() 1409 defer child.copyMu.RUnlock() 1410 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 1411 return err 1412 } 1413 1414 pop := vfs.PathOperation{ 1415 Root: parent.upperVD, 1416 Start: parent.upperVD, 1417 Path: fspath.Parse(name), 1418 } 1419 if child.upperVD.Ok() { 1420 cleanupRecreateWhiteouts := func() { 1421 if !child.upperVD.Ok() { 1422 return 1423 } 1424 for whiteoutName, whiteoutUpper := range whiteouts { 1425 if !whiteoutUpper { 1426 continue 1427 } 1428 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &vfs.PathOperation{ 1429 Root: child.upperVD, 1430 Start: child.upperVD, 1431 Path: fspath.Parse(whiteoutName), 1432 }); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 1433 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to recreate deleted whiteout after RmdirAt failure: %v", err)) 1434 } 1435 } 1436 } 1437 // Remove existing whiteouts on the upper layer. 1438 for whiteoutName, whiteoutUpper := range whiteouts { 1439 if !whiteoutUpper { 1440 continue 1441 } 1442 if err := vfsObj.UnlinkAt(ctx, fs.creds, &vfs.PathOperation{ 1443 Root: child.upperVD, 1444 Start: child.upperVD, 1445 Path: fspath.Parse(whiteoutName), 1446 }); err != nil { 1447 vfsObj.AbortDeleteDentry(&child.vfsd) 1448 cleanupRecreateWhiteouts() 1449 return err 1450 } 1451 } 1452 // Remove the existing directory on the upper layer. 1453 if err := vfsObj.RmdirAt(ctx, fs.creds, &pop); err != nil { 1454 vfsObj.AbortDeleteDentry(&child.vfsd) 1455 cleanupRecreateWhiteouts() 1456 return err 1457 } 1458 } 1459 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &pop); err != nil { 1460 vfsObj.AbortDeleteDentry(&child.vfsd) 1461 if child.upperVD.Ok() { 1462 // Don't attempt to recover from this: the original directory is 1463 // already gone, so any dentries representing it are invalid, and 1464 // creating a new directory won't undo that. 1465 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout after removing upper layer directory during RmdirAt: %v", err)) 1466 } 1467 return err 1468 } 1469 1470 vfsObj.CommitDeleteDentry(ctx, &child.vfsd) 1471 delete(parent.children, name) 1472 ds = appendDentry(ds, child) 1473 parent.dirents = nil 1474 parent.watches.Notify(ctx, name, linux.IN_DELETE|linux.IN_ISDIR, 0 /* cookie */, vfs.InodeEvent, true /* unlinked */) 1475 return nil 1476 } 1477 1478 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 1479 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1480 var ds *[]*dentry 1481 fs.renameMu.RLock() 1482 d, err := fs.resolveLocked(ctx, rp, &ds) 1483 if err != nil { 1484 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1485 return err 1486 } 1487 err = d.setStatLocked(ctx, rp, opts) 1488 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1489 if err != nil { 1490 return err 1491 } 1492 1493 if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 1494 d.InotifyWithParent(ctx, ev, 0 /* cookie */, vfs.InodeEvent) 1495 } 1496 return nil 1497 } 1498 1499 // Precondition: d.fs.renameMu must be held for reading. 1500 func (d *dentry) setStatLocked(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 1501 mode := linux.FileMode(d.mode.Load()) 1502 if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(d.uid.Load()), auth.KGID(d.gid.Load())); err != nil { 1503 return err 1504 } 1505 mnt := rp.Mount() 1506 if err := mnt.CheckBeginWrite(); err != nil { 1507 return err 1508 } 1509 defer mnt.EndWrite() 1510 if err := d.copyUpLocked(ctx); err != nil { 1511 return err 1512 } 1513 // Changes to d's attributes are serialized by d.copyMu. 1514 d.copyMu.Lock() 1515 defer d.copyMu.Unlock() 1516 if err := d.fs.vfsfs.VirtualFilesystem().SetStatAt(ctx, d.fs.creds, &vfs.PathOperation{ 1517 Root: d.upperVD, 1518 Start: d.upperVD, 1519 }, &opts); err != nil { 1520 return err 1521 } 1522 d.updateAfterSetStatLocked(&opts) 1523 return nil 1524 } 1525 1526 // StatAt implements vfs.FilesystemImpl.StatAt. 1527 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 1528 var ds *[]*dentry 1529 fs.renameMu.RLock() 1530 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1531 d, err := fs.resolveLocked(ctx, rp, &ds) 1532 if err != nil { 1533 return linux.Statx{}, err 1534 } 1535 1536 var stat linux.Statx 1537 if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { 1538 layerVD := d.topLayer() 1539 stat, err = fs.vfsfs.VirtualFilesystem().StatAt(ctx, fs.creds, &vfs.PathOperation{ 1540 Root: layerVD, 1541 Start: layerVD, 1542 }, &vfs.StatOptions{ 1543 Mask: layerMask, 1544 Sync: opts.Sync, 1545 }) 1546 if err != nil { 1547 return linux.Statx{}, err 1548 } 1549 } 1550 d.statInternalTo(ctx, &opts, &stat) 1551 return stat, nil 1552 } 1553 1554 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 1555 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 1556 var ds *[]*dentry 1557 fs.renameMu.RLock() 1558 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1559 _, err := fs.resolveLocked(ctx, rp, &ds) 1560 if err != nil { 1561 return linux.Statfs{}, err 1562 } 1563 return fs.statFS(ctx) 1564 } 1565 1566 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 1567 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 1568 return fs.doCreateAt(ctx, rp, createNonDirectory, func(parent *dentry, childName string, haveUpperWhiteout bool) error { 1569 vfsObj := fs.vfsfs.VirtualFilesystem() 1570 pop := vfs.PathOperation{ 1571 Root: parent.upperVD, 1572 Start: parent.upperVD, 1573 Path: fspath.Parse(childName), 1574 } 1575 if haveUpperWhiteout { 1576 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1577 return err 1578 } 1579 } 1580 if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil { 1581 if haveUpperWhiteout { 1582 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1583 } 1584 return err 1585 } 1586 creds := rp.Credentials() 1587 if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{ 1588 Stat: linux.Statx{ 1589 Mask: linux.STATX_UID | linux.STATX_GID, 1590 UID: uint32(creds.EffectiveKUID), 1591 GID: uint32(creds.EffectiveKGID), 1592 }, 1593 }); err != nil { 1594 if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil { 1595 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr)) 1596 } else if haveUpperWhiteout { 1597 fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop) 1598 } 1599 return err 1600 } 1601 return nil 1602 }) 1603 } 1604 1605 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 1606 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 1607 var ds *[]*dentry 1608 fs.renameMu.RLock() 1609 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1610 start := rp.Start().Impl().(*dentry) 1611 parent, err := fs.walkParentDirLocked(ctx, rp, start, &ds) 1612 if err != nil { 1613 return err 1614 } 1615 if err := parent.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 1616 return err 1617 } 1618 if err := rp.Mount().CheckBeginWrite(); err != nil { 1619 return err 1620 } 1621 defer rp.Mount().EndWrite() 1622 name := rp.Component() 1623 if name == "." || name == ".." { 1624 return linuxerr.EISDIR 1625 } 1626 if rp.MustBeDir() { 1627 return linuxerr.ENOTDIR 1628 } 1629 vfsObj := rp.VirtualFilesystem() 1630 mntns := vfs.MountNamespaceFromContext(ctx) 1631 defer mntns.DecRef(ctx) 1632 parent.dirMu.Lock() 1633 defer parent.dirMu.Unlock() 1634 1635 // Ensure that parent is copied-up before potentially holding child.copyMu 1636 // below. 1637 if err := parent.copyUpLocked(ctx); err != nil { 1638 return err 1639 } 1640 1641 // We need a dentry representing the child being removed in order to verify 1642 // that it's not a directory. 1643 child, childLayer, err := fs.getChildLocked(ctx, parent, name, &ds) 1644 if err != nil { 1645 return err 1646 } 1647 if child.isDir() { 1648 return linuxerr.EISDIR 1649 } 1650 if err := parent.mayDelete(rp.Credentials(), child); err != nil { 1651 return err 1652 } 1653 // Hold child.copyMu to prevent it from being copied-up during 1654 // deletion. 1655 child.copyMu.RLock() 1656 defer child.copyMu.RUnlock() 1657 if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { 1658 return err 1659 } 1660 1661 pop := vfs.PathOperation{ 1662 Root: parent.upperVD, 1663 Start: parent.upperVD, 1664 Path: fspath.Parse(name), 1665 } 1666 if childLayer == lookupLayerUpper { 1667 // Remove the existing file on the upper layer. 1668 if err := vfsObj.UnlinkAt(ctx, fs.creds, &pop); err != nil { 1669 vfsObj.AbortDeleteDentry(&child.vfsd) 1670 return err 1671 } 1672 } 1673 if err := CreateWhiteout(ctx, vfsObj, fs.creds, &pop); err != nil { 1674 vfsObj.AbortDeleteDentry(&child.vfsd) 1675 if childLayer == lookupLayerUpper { 1676 panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to create whiteout after unlinking upper layer file during UnlinkAt: %v", err)) 1677 } 1678 return err 1679 } 1680 1681 vfsObj.CommitDeleteDentry(ctx, &child.vfsd) 1682 delete(parent.children, name) 1683 if !child.isDir() { 1684 // Once a whiteout is created, non-directory dentries on the lower layers 1685 // are no longer reachable from the overlayfs. Ask filesystems to release 1686 // their resources whenever possible. 1687 for _, lowerDentry := range child.lowerVDs { 1688 lowerDentry.Dentry().MarkEvictable() 1689 } 1690 } 1691 ds = appendDentry(ds, child) 1692 vfs.InotifyRemoveChild(ctx, &child.watches, &parent.watches, name) 1693 parent.dirents = nil 1694 return nil 1695 } 1696 1697 // isOverlayXattr returns whether the given extended attribute configures the 1698 // overlay. 1699 func isOverlayXattr(name string) bool { 1700 return strings.HasPrefix(name, _OVL_XATTR_PREFIX) 1701 } 1702 1703 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1704 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1705 var ds *[]*dentry 1706 fs.renameMu.RLock() 1707 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1708 d, err := fs.resolveLocked(ctx, rp, &ds) 1709 if err != nil { 1710 return nil, err 1711 } 1712 1713 return fs.listXattr(ctx, d, size) 1714 } 1715 1716 func (fs *filesystem) listXattr(ctx context.Context, d *dentry, size uint64) ([]string, error) { 1717 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1718 top := d.topLayer() 1719 names, err := vfsObj.ListXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, size) 1720 if err != nil { 1721 return nil, err 1722 } 1723 1724 // Filter out all overlay attributes. 1725 n := 0 1726 for _, name := range names { 1727 if !isOverlayXattr(name) { 1728 names[n] = name 1729 n++ 1730 } 1731 } 1732 return names[:n], err 1733 } 1734 1735 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1736 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1737 var ds *[]*dentry 1738 fs.renameMu.RLock() 1739 defer fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1740 d, err := fs.resolveLocked(ctx, rp, &ds) 1741 if err != nil { 1742 return "", err 1743 } 1744 1745 return fs.getXattr(ctx, d, rp.Credentials(), &opts) 1746 } 1747 1748 func (fs *filesystem) getXattr(ctx context.Context, d *dentry, creds *auth.Credentials, opts *vfs.GetXattrOptions) (string, error) { 1749 if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayRead); err != nil { 1750 return "", err 1751 } 1752 1753 // Return EOPNOTSUPP when fetching an overlay attribute. 1754 // See fs/overlayfs/super.c:ovl_own_xattr_get(). 1755 if isOverlayXattr(opts.Name) { 1756 return "", linuxerr.EOPNOTSUPP 1757 } 1758 1759 // Analogous to fs/overlayfs/super.c:ovl_other_xattr_get(). 1760 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1761 top := d.topLayer() 1762 return vfsObj.GetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: top, Start: top}, opts) 1763 } 1764 1765 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1766 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1767 var ds *[]*dentry 1768 fs.renameMu.RLock() 1769 d, err := fs.resolveLocked(ctx, rp, &ds) 1770 if err != nil { 1771 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1772 return err 1773 } 1774 1775 err = fs.setXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), &opts) 1776 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1777 if err != nil { 1778 return err 1779 } 1780 1781 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent) 1782 return nil 1783 } 1784 1785 // Precondition: fs.renameMu must be locked. 1786 func (fs *filesystem) setXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, opts *vfs.SetXattrOptions) error { 1787 if err := d.checkXattrPermissions(creds, opts.Name, vfs.MayWrite); err != nil { 1788 return err 1789 } 1790 1791 // Return EOPNOTSUPP when setting an overlay attribute. 1792 // See fs/overlayfs/super.c:ovl_own_xattr_set(). 1793 if isOverlayXattr(opts.Name) { 1794 return linuxerr.EOPNOTSUPP 1795 } 1796 1797 // Analogous to fs/overlayfs/super.c:ovl_other_xattr_set(). 1798 if err := mnt.CheckBeginWrite(); err != nil { 1799 return err 1800 } 1801 defer mnt.EndWrite() 1802 if err := d.copyUpLocked(ctx); err != nil { 1803 return err 1804 } 1805 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1806 return vfsObj.SetXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, opts) 1807 } 1808 1809 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1810 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1811 var ds *[]*dentry 1812 fs.renameMu.RLock() 1813 d, err := fs.resolveLocked(ctx, rp, &ds) 1814 if err != nil { 1815 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1816 return err 1817 } 1818 1819 err = fs.removeXattrLocked(ctx, d, rp.Mount(), rp.Credentials(), name) 1820 fs.renameMuRUnlockAndCheckDrop(ctx, &ds) 1821 if err != nil { 1822 return err 1823 } 1824 1825 d.InotifyWithParent(ctx, linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent) 1826 return nil 1827 } 1828 1829 // Precondition: fs.renameMu must be locked. 1830 func (fs *filesystem) removeXattrLocked(ctx context.Context, d *dentry, mnt *vfs.Mount, creds *auth.Credentials, name string) error { 1831 if err := d.checkXattrPermissions(creds, name, vfs.MayWrite); err != nil { 1832 return err 1833 } 1834 1835 // Like SetXattrAt, return EOPNOTSUPP when removing an overlay attribute. 1836 // Linux passes the remove request to xattr_handler->set. 1837 // See fs/xattr.c:vfs_removexattr(). 1838 if isOverlayXattr(name) { 1839 return linuxerr.EOPNOTSUPP 1840 } 1841 1842 if err := mnt.CheckBeginWrite(); err != nil { 1843 return err 1844 } 1845 defer mnt.EndWrite() 1846 if err := d.copyUpLocked(ctx); err != nil { 1847 return err 1848 } 1849 vfsObj := d.fs.vfsfs.VirtualFilesystem() 1850 return vfsObj.RemoveXattrAt(ctx, fs.creds, &vfs.PathOperation{Root: d.upperVD, Start: d.upperVD}, name) 1851 } 1852 1853 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1854 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1855 fs.renameMu.RLock() 1856 defer fs.renameMu.RUnlock() 1857 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) 1858 } 1859 1860 // MountOptions implements vfs.FilesystemImpl.MountOptions. 1861 func (fs *filesystem) MountOptions() string { 1862 // Return the mount options from the topmost layer. 1863 var vd vfs.VirtualDentry 1864 if fs.opts.UpperRoot.Ok() { 1865 vd = fs.opts.UpperRoot 1866 } else { 1867 vd = fs.opts.LowerRoots[0] 1868 } 1869 return vd.Mount().Filesystem().Impl().MountOptions() 1870 }