github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/kernfs/filesystem.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernfs 16 17 // This file implements vfs.FilesystemImpl for kernfs. 18 19 import ( 20 "fmt" 21 22 "github.com/metacubex/gvisor/pkg/abi/linux" 23 "github.com/metacubex/gvisor/pkg/context" 24 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 25 "github.com/metacubex/gvisor/pkg/fspath" 26 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 27 "github.com/metacubex/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/metacubex/gvisor/pkg/sentry/vfs" 29 ) 30 31 // stepExistingLocked resolves rp.Component() in parent directory vfsd. 32 // 33 // stepExistingLocked is loosely analogous to fs/namei.c:walk_component(). 34 // 35 // Preconditions: 36 // - Filesystem.mu must be locked for at least reading. 37 // - !rp.Done(). 38 // 39 // Postcondition: Caller must call fs.processDeferredDecRefs*. 40 func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) (*Dentry, bool, error) { 41 if !d.isDir() { 42 return nil, false, linuxerr.ENOTDIR 43 } 44 // Directory searchable? 45 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 46 return nil, false, err 47 } 48 name := rp.Component() 49 // Revalidation must be skipped if name is "." or ".."; d or its parent 50 // respectively can't be expected to transition from invalidated back to 51 // valid, so detecting invalidation and retrying would loop forever. This 52 // is consistent with Linux: fs/namei.c:walk_component() => lookup_fast() 53 // calls d_revalidate(), but walk_component() => handle_dots() does not. 54 if name == "." { 55 rp.Advance() 56 return d, false, nil 57 } 58 if name == ".." { 59 if isRoot, err := rp.CheckRoot(ctx, d.VFSDentry()); err != nil { 60 return nil, false, err 61 } else if isRoot || d.parent.Load() == nil { 62 rp.Advance() 63 return d, false, nil 64 } 65 if err := rp.CheckMount(ctx, d.Parent().VFSDentry()); err != nil { 66 return nil, false, err 67 } 68 rp.Advance() 69 return d.parent.Load(), false, nil 70 } 71 if len(name) > linux.NAME_MAX { 72 return nil, false, linuxerr.ENAMETOOLONG 73 } 74 d.dirMu.Lock() 75 next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, d.children[name]) 76 d.dirMu.Unlock() 77 if err != nil { 78 return nil, false, err 79 } 80 if err := rp.CheckMount(ctx, next.VFSDentry()); err != nil { 81 return nil, false, err 82 } 83 // Resolve any symlink at current path component. 84 if rp.ShouldFollowSymlink() && next.isSymlink() { 85 targetVD, targetPathname, err := next.inode.Getlink(ctx, rp.Mount()) 86 if err != nil { 87 return nil, false, err 88 } 89 if targetVD.Ok() { 90 followedTarget, err := rp.HandleJump(targetVD) 91 fs.deferDecRefVD(ctx, targetVD) 92 return d, followedTarget, err 93 } 94 followedSymlink, err := rp.HandleSymlink(targetPathname) 95 return d, followedSymlink, err 96 } 97 rp.Advance() 98 return next, false, nil 99 } 100 101 // revalidateChildLocked must be called after a call to parent.vfsd.Child(name) 102 // or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be 103 // nil) to verify that the returned child (or lack thereof) is correct. 104 // 105 // Preconditions: 106 // - Filesystem.mu must be locked for at least reading. 107 // - parent.dirMu must be locked. 108 // - parent.isDir(). 109 // - name is not "." or "..". 110 // 111 // Postconditions: Caller must call fs.processDeferredDecRefs*. 112 func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, child *Dentry) (*Dentry, error) { 113 if child != nil { 114 // Cached dentry exists, revalidate. 115 if !child.inode.Valid(ctx) { 116 delete(parent.children, name) 117 if child.inode.Keep() { 118 // Drop the ref owned by kernfs. 119 fs.deferDecRef(child) 120 } 121 rcs := vfsObj.InvalidateDentry(ctx, child.VFSDentry()) 122 for _, rc := range rcs { 123 fs.deferDecRef(rc) 124 } 125 child = nil 126 } 127 } 128 if child == nil { 129 // Dentry isn't cached; it either doesn't exist or failed revalidation. 130 // Attempt to resolve it via Lookup. 131 childInode, err := parent.inode.Lookup(ctx, name) 132 if err != nil { 133 return nil, err 134 } 135 var newChild Dentry 136 newChild.Init(fs, childInode) // childInode's ref is transferred to newChild. 137 parent.insertChildLocked(name, &newChild) 138 child = &newChild 139 140 // Drop the ref on newChild. This will cause the dentry to get pruned 141 // from the dentry tree by the end of current filesystem operation 142 // (before returning to the VFS layer) if another ref is not picked on 143 // this dentry. 144 if !childInode.Keep() { 145 fs.deferDecRef(&newChild) 146 } 147 } 148 return child, nil 149 } 150 151 // walkExistingLocked resolves rp to an existing file. 152 // 153 // walkExistingLocked is loosely analogous to Linux's 154 // fs/namei.c:path_lookupat(). 155 // 156 // Preconditions: Filesystem.mu must be locked for at least reading. 157 // 158 // Postconditions: Caller must call fs.processDeferredDecRefs*. 159 func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) { 160 d := rp.Start().Impl().(*Dentry) 161 for !rp.Done() { 162 var err error 163 d, _, err = fs.stepExistingLocked(ctx, rp, d) 164 if err != nil { 165 return nil, err 166 } 167 } 168 if rp.MustBeDir() && !d.isDir() { 169 return nil, linuxerr.ENOTDIR 170 } 171 return d, nil 172 } 173 174 // walkParentDirLocked resolves all but the last path component of rp to an 175 // existing directory. It does not check that the returned directory is 176 // searchable by the provider of rp. 177 // 178 // walkParentDirLocked is loosely analogous to Linux's 179 // fs/namei.c:path_parentat(). 180 // 181 // Preconditions: 182 // - Filesystem.mu must be locked for at least reading. 183 // - !rp.Done(). 184 // 185 // Postconditions: Caller must call fs.processDeferredDecRefs*. 186 func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) (*Dentry, error) { 187 for !rp.Final() { 188 var err error 189 d, _, err = fs.stepExistingLocked(ctx, rp, d) 190 if err != nil { 191 return nil, err 192 } 193 } 194 if !d.isDir() { 195 return nil, linuxerr.ENOTDIR 196 } 197 return d, nil 198 } 199 200 // checkCreateLocked checks that a file named rp.Component() may be created in 201 // directory parent, then returns rp.Component(). 202 // 203 // Preconditions: 204 // - Filesystem.mu must be locked for at least reading. 205 // - isDir(parentInode) == true. 206 func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string, parent *Dentry) error { 207 // Order of checks is important. First check if parent directory can be 208 // executed, then check for existence, and lastly check if mount is writable. 209 if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayExec); err != nil { 210 return err 211 } 212 if name == "." || name == ".." { 213 return linuxerr.EEXIST 214 } 215 if len(name) > linux.NAME_MAX { 216 return linuxerr.ENAMETOOLONG 217 } 218 if _, ok := parent.children[name]; ok { 219 return linuxerr.EEXIST 220 } 221 if parent.VFSDentry().IsDead() { 222 return linuxerr.ENOENT 223 } 224 if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil { 225 return err 226 } 227 return nil 228 } 229 230 // checkDeleteLocked checks that the file represented by vfsd may be deleted. 231 // 232 // Preconditions: Filesystem.mu must be locked for at least reading. 233 func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) error { 234 parent := d.parent.Load() 235 if parent == nil { 236 return linuxerr.EBUSY 237 } 238 if parent.vfsd.IsDead() { 239 return linuxerr.ENOENT 240 } 241 if d.vfsd.IsDead() { 242 // This implies a duplicate unlink on an orphaned dentry, where the path 243 // resolution was successful. This is possible when the orphan is 244 // replaced by a new node of the same name (so the path resolution 245 // succeeds), and the orphan is unlinked again through a dirfd using 246 // unlinkat(2) (so the unlink refers to the orphan and not the new 247 // node). See Linux, fs/namei.c:do_rmdir(). 248 return linuxerr.EINVAL 249 } 250 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 251 return err 252 } 253 return nil 254 } 255 256 // Release implements vfs.FilesystemImpl.Release. 257 func (fs *Filesystem) Release(ctx context.Context) { 258 root := fs.root 259 if root == nil { 260 return 261 } 262 fs.mu.Lock() 263 root.releaseKeptDentriesLocked(ctx) 264 for fs.cachedDentriesLen != 0 { 265 fs.evictCachedDentryLocked(ctx) 266 } 267 fs.mu.Unlock() 268 // Drop ref acquired in Dentry.InitRoot(). 269 root.DecRef(ctx) 270 } 271 272 // releaseKeptDentriesLocked recursively drops all dentry references created by 273 // Lookup when Dentry.inode.Keep() is true. 274 // 275 // Precondition: Filesystem.mu is held. 276 func (d *Dentry) releaseKeptDentriesLocked(ctx context.Context) { 277 if d.inode.Keep() && d != d.fs.root { 278 d.decRefLocked(ctx) 279 } 280 281 if d.isDir() { 282 var children []*Dentry 283 d.dirMu.Lock() 284 for _, child := range d.children { 285 children = append(children, child) 286 } 287 d.dirMu.Unlock() 288 for _, child := range children { 289 child.releaseKeptDentriesLocked(ctx) 290 } 291 } 292 } 293 294 // Sync implements vfs.FilesystemImpl.Sync. 295 func (fs *Filesystem) Sync(ctx context.Context) error { 296 // All filesystem state is in-memory. 297 return nil 298 } 299 300 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 301 func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 302 fs.mu.RLock() 303 defer fs.processDeferredDecRefs(ctx) 304 defer fs.mu.RUnlock() 305 306 d, err := fs.walkExistingLocked(ctx, rp) 307 if err != nil { 308 return err 309 } 310 if err := d.inode.CheckPermissions(ctx, creds, ats); err != nil { 311 return err 312 } 313 if ats.MayWrite() && rp.Mount().ReadOnly() { 314 return linuxerr.EROFS 315 } 316 return nil 317 } 318 319 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 320 func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 321 fs.mu.RLock() 322 defer fs.processDeferredDecRefs(ctx) 323 defer fs.mu.RUnlock() 324 d, err := fs.walkExistingLocked(ctx, rp) 325 if err != nil { 326 return nil, err 327 } 328 329 if opts.CheckSearchable { 330 if !d.isDir() { 331 return nil, linuxerr.ENOTDIR 332 } 333 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 334 return nil, err 335 } 336 } 337 vfsd := d.VFSDentry() 338 vfsd.IncRef() // Ownership transferred to caller. 339 return vfsd, nil 340 } 341 342 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 343 func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 344 fs.mu.RLock() 345 defer fs.processDeferredDecRefs(ctx) 346 defer fs.mu.RUnlock() 347 d, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 348 if err != nil { 349 return nil, err 350 } 351 d.IncRef() // Ownership transferred to caller. 352 return d.VFSDentry(), nil 353 } 354 355 // LinkAt implements vfs.FilesystemImpl.LinkAt. 356 func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 357 if rp.Done() { 358 return linuxerr.EEXIST 359 } 360 fs.mu.Lock() 361 defer fs.processDeferredDecRefs(ctx) 362 defer fs.mu.Unlock() 363 parent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 364 if err != nil { 365 return err 366 } 367 368 if rp.Mount() != vd.Mount() { 369 return linuxerr.EXDEV 370 } 371 inode := vd.Dentry().Impl().(*Dentry).Inode() 372 if inode.Mode().IsDir() { 373 return linuxerr.EPERM 374 } 375 if err := vfs.MayLink(rp.Credentials(), inode.Mode(), inode.UID(), inode.GID()); err != nil { 376 return err 377 } 378 parent.dirMu.Lock() 379 defer parent.dirMu.Unlock() 380 pc := rp.Component() 381 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 382 return err 383 } 384 if rp.MustBeDir() { 385 return linuxerr.ENOENT 386 } 387 if err := rp.Mount().CheckBeginWrite(); err != nil { 388 return err 389 } 390 defer rp.Mount().EndWrite() 391 392 childI, err := parent.inode.NewLink(ctx, pc, inode) 393 if err != nil { 394 return err 395 } 396 parent.inode.Watches().Notify(ctx, pc, linux.IN_CREATE, 0, vfs.InodeEvent, false /* unlinked */) 397 inode.Watches().Notify(ctx, "", linux.IN_ATTRIB, 0, vfs.InodeEvent, false /* unlinked */) 398 var child Dentry 399 child.Init(fs, childI) 400 parent.insertChildLocked(pc, &child) 401 return nil 402 } 403 404 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 405 func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 406 if rp.Done() { 407 return linuxerr.EEXIST 408 } 409 fs.mu.Lock() 410 defer fs.processDeferredDecRefs(ctx) 411 defer fs.mu.Unlock() 412 parent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 413 if err != nil { 414 return err 415 } 416 417 parent.dirMu.Lock() 418 defer parent.dirMu.Unlock() 419 pc := rp.Component() 420 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 421 return err 422 } 423 if err := rp.Mount().CheckBeginWrite(); err != nil { 424 return err 425 } 426 defer rp.Mount().EndWrite() 427 childI, err := parent.inode.NewDir(ctx, pc, opts) 428 if err != nil { 429 if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { 430 return err 431 } 432 childI = newSyntheticDirectory(ctx, rp.Credentials(), opts.Mode) 433 } 434 var child Dentry 435 child.Init(fs, childI) 436 parent.inode.Watches().Notify(ctx, pc, linux.IN_CREATE|linux.IN_ISDIR, 0, vfs.InodeEvent, false /* unlinked */) 437 parent.insertChildLocked(pc, &child) 438 return nil 439 } 440 441 // MknodAt implements vfs.FilesystemImpl.MknodAt. 442 func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 443 if rp.Done() { 444 return linuxerr.EEXIST 445 } 446 fs.mu.Lock() 447 defer fs.processDeferredDecRefs(ctx) 448 defer fs.mu.Unlock() 449 parent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 450 if err != nil { 451 return err 452 } 453 454 parent.dirMu.Lock() 455 defer parent.dirMu.Unlock() 456 pc := rp.Component() 457 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 458 return err 459 } 460 if rp.MustBeDir() { 461 return linuxerr.ENOENT 462 } 463 if err := rp.Mount().CheckBeginWrite(); err != nil { 464 return err 465 } 466 defer rp.Mount().EndWrite() 467 newI, err := parent.inode.NewNode(ctx, pc, opts) 468 if err != nil { 469 return err 470 } 471 parent.inode.Watches().Notify(ctx, pc, linux.IN_CREATE, 0, vfs.InodeEvent, false /* unlinked */) 472 var newD Dentry 473 newD.Init(fs, newI) 474 parent.insertChildLocked(pc, &newD) 475 return nil 476 } 477 478 // OpenAt implements vfs.FilesystemImpl.OpenAt. 479 func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 480 ats := vfs.AccessTypesForOpenFlags(&opts) 481 482 // Do not create new file. 483 if opts.Flags&linux.O_CREAT == 0 { 484 fs.mu.RLock() 485 defer fs.processDeferredDecRefs(ctx) 486 d, err := fs.walkExistingLocked(ctx, rp) 487 if err != nil { 488 fs.mu.RUnlock() 489 return nil, err 490 } 491 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 492 fs.mu.RUnlock() 493 return nil, err 494 } 495 // Open may block so we need to unlock fs.mu. IncRef d to prevent 496 // its destruction while fs.mu is unlocked. 497 d.IncRef() 498 fs.mu.RUnlock() 499 fd, err := d.inode.Open(ctx, rp, d, opts) 500 d.DecRef(ctx) 501 return fd, err 502 } 503 504 // May create new file. 505 mustCreate := opts.Flags&linux.O_EXCL != 0 506 start := rp.Start().Impl().(*Dentry) 507 fs.mu.Lock() 508 unlocked := false 509 unlock := func() { 510 if !unlocked { 511 fs.mu.Unlock() 512 unlocked = true 513 } 514 } 515 // Process all to-be-decref'd dentries at the end at once. 516 // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked 517 // when this is executed. 518 defer fs.processDeferredDecRefs(ctx) 519 defer unlock() 520 if rp.Done() { 521 if rp.MustBeDir() { 522 return nil, linuxerr.EISDIR 523 } 524 if mustCreate { 525 return nil, linuxerr.EEXIST 526 } 527 if err := start.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 528 return nil, err 529 } 530 // Open may block so we need to unlock fs.mu. IncRef d to prevent 531 // its destruction while fs.mu is unlocked. 532 start.IncRef() 533 unlock() 534 fd, err := start.inode.Open(ctx, rp, start, opts) 535 start.DecRef(ctx) 536 return fd, err 537 } 538 afterTrailingSymlink: 539 parent, err := fs.walkParentDirLocked(ctx, rp, start) 540 if err != nil { 541 return nil, err 542 } 543 // Check for search permission in the parent directory. 544 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 545 return nil, err 546 } 547 // Reject attempts to open directories with O_CREAT. 548 if rp.MustBeDir() { 549 return nil, linuxerr.EISDIR 550 } 551 pc := rp.Component() 552 if pc == "." || pc == ".." { 553 return nil, linuxerr.EISDIR 554 } 555 if len(pc) > linux.NAME_MAX { 556 return nil, linuxerr.ENAMETOOLONG 557 } 558 if parent.VFSDentry().IsDead() { 559 return nil, linuxerr.ENOENT 560 } 561 // Determine whether or not we need to create a file. 562 child, followedSymlink, err := fs.stepExistingLocked(ctx, rp, parent) 563 if followedSymlink { 564 if mustCreate { 565 // EEXIST must be returned if an existing symlink is opened with O_EXCL. 566 return nil, linuxerr.EEXIST 567 } 568 if err != nil { 569 // If followedSymlink && err != nil, then this symlink resolution error 570 // must be handled by the VFS layer. 571 return nil, err 572 } 573 start = parent 574 goto afterTrailingSymlink 575 } 576 if linuxerr.Equals(linuxerr.ENOENT, err) { 577 // Already checked for searchability above; now check for writability. 578 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { 579 return nil, err 580 } 581 if err := rp.Mount().CheckBeginWrite(); err != nil { 582 return nil, err 583 } 584 defer rp.Mount().EndWrite() 585 // Create and open the child. 586 childI, err := parent.inode.NewFile(ctx, pc, opts) 587 if err != nil { 588 return nil, err 589 } 590 var child Dentry 591 child.Init(fs, childI) 592 parent.insertChild(pc, &child) 593 // Open may block so we need to unlock fs.mu. IncRef child to prevent 594 // its destruction while fs.mu is unlocked. 595 child.IncRef() 596 unlock() 597 parent.inode.Watches().Notify(ctx, pc, linux.IN_CREATE, 0, vfs.PathEvent, false /* unlinked */) 598 fd, err := child.inode.Open(ctx, rp, &child, opts) 599 child.DecRef(ctx) 600 return fd, err 601 } 602 if err != nil { 603 return nil, err 604 } 605 // Open existing file or follow symlink. 606 if mustCreate { 607 return nil, linuxerr.EEXIST 608 } 609 if rp.MustBeDir() && !child.isDir() { 610 return nil, linuxerr.ENOTDIR 611 } 612 if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 613 return nil, err 614 } 615 if child.isDir() { 616 // Can't open directories with O_CREAT. 617 if opts.Flags&linux.O_CREAT != 0 { 618 return nil, linuxerr.EISDIR 619 } 620 // Can't open directories writably. 621 if ats&vfs.MayWrite != 0 { 622 return nil, linuxerr.EISDIR 623 } 624 if opts.Flags&linux.O_DIRECT != 0 { 625 return nil, linuxerr.EINVAL 626 } 627 } 628 // Open may block so we need to unlock fs.mu. IncRef child to prevent 629 // its destruction while fs.mu is unlocked. 630 child.IncRef() 631 unlock() 632 fd, err := child.inode.Open(ctx, rp, child, opts) 633 child.DecRef(ctx) 634 return fd, err 635 } 636 637 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 638 func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 639 defer fs.processDeferredDecRefs(ctx) 640 641 fs.mu.RLock() 642 d, err := fs.walkExistingLocked(ctx, rp) 643 if err != nil { 644 fs.mu.RUnlock() 645 return "", err 646 } 647 if !d.isSymlink() { 648 fs.mu.RUnlock() 649 return "", linuxerr.EINVAL 650 } 651 652 // Inode.Readlink() cannot be called holding fs locks. 653 d.IncRef() 654 defer d.DecRef(ctx) 655 fs.mu.RUnlock() 656 657 return d.inode.Readlink(ctx, rp.Mount()) 658 } 659 660 // RenameAt implements vfs.FilesystemImpl.RenameAt. 661 func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 662 fs.mu.Lock() 663 defer fs.processDeferredDecRefs(ctx) 664 defer fs.mu.Unlock() 665 666 // Resolve the destination directory first to verify that it's on this 667 // Mount. 668 dstDir, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 669 if err != nil { 670 return err 671 } 672 673 // Only RENAME_NOREPLACE is supported. 674 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 675 return linuxerr.EINVAL 676 } 677 noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 678 679 mnt := rp.Mount() 680 if mnt != oldParentVD.Mount() { 681 return linuxerr.EXDEV 682 } 683 if err := mnt.CheckBeginWrite(); err != nil { 684 return err 685 } 686 defer mnt.EndWrite() 687 oldParentDir := oldParentVD.Dentry().Impl().(*Dentry).Inode() 688 if err := oldParentDir.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 689 return err 690 } 691 if err := dstDir.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 692 return err 693 } 694 695 srcDirVFSD := oldParentVD.Dentry() 696 srcDir := srcDirVFSD.Impl().(*Dentry) 697 srcDir.dirMu.Lock() 698 src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDir.children[oldName]) 699 srcDir.dirMu.Unlock() 700 if err != nil { 701 return err 702 } 703 704 // Can we remove the src dentry? 705 if err := checkDeleteLocked(ctx, rp, src); err != nil { 706 return err 707 } 708 709 // Can we create the dst dentry? 710 var dst *Dentry 711 newName := rp.Component() 712 if newName == "." || newName == ".." { 713 if noReplace { 714 return linuxerr.EEXIST 715 } 716 return linuxerr.EBUSY 717 } 718 if len(newName) > linux.NAME_MAX { 719 return linuxerr.ENAMETOOLONG 720 } 721 722 err = checkCreateLocked(ctx, rp.Credentials(), newName, dstDir) 723 switch { 724 case err == nil: 725 // Ok, continue with rename as replacement. 726 case linuxerr.Equals(linuxerr.EEXIST, err): 727 if noReplace { 728 // Won't overwrite existing node since RENAME_NOREPLACE was requested. 729 return linuxerr.EEXIST 730 } 731 dst = dstDir.children[newName] 732 if dst == nil { 733 panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", newName, dstDir)) 734 } 735 default: 736 return err 737 } 738 739 if srcDir == dstDir && oldName == newName { 740 return nil 741 } 742 743 var dstVFSD *vfs.Dentry 744 if dst != nil { 745 dstVFSD = dst.VFSDentry() 746 } 747 748 mntns := vfs.MountNamespaceFromContext(ctx) 749 defer mntns.DecRef(ctx) 750 virtfs := rp.VirtualFilesystem() 751 752 // We can't deadlock here due to lock ordering because we're protected from 753 // concurrent renames by fs.mu held for writing. 754 srcDir.dirMu.Lock() 755 defer srcDir.dirMu.Unlock() 756 if srcDir != dstDir { 757 dstDir.dirMu.Lock() 758 defer dstDir.dirMu.Unlock() 759 } 760 761 srcVFSD := src.VFSDentry() 762 if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil { 763 return err 764 } 765 err = srcDir.inode.Rename(ctx, src.name, newName, src.inode, dstDir.inode) 766 if err != nil { 767 virtfs.AbortRenameDentry(srcVFSD, dstVFSD) 768 return err 769 } 770 delete(srcDir.children, src.name) 771 if srcDir != dstDir { 772 fs.deferDecRef(srcDir) // child (src) drops ref on old parent. 773 dstDir.IncRef() // child (src) takes a ref on the new parent. 774 } 775 src.parent.Store(dstDir) 776 src.name = newName 777 if dstDir.children == nil { 778 dstDir.children = make(map[string]*Dentry) 779 } 780 replaced := dstDir.children[newName] 781 dstDir.children[newName] = src 782 var replaceVFSD *vfs.Dentry 783 if replaced != nil { 784 // deferDecRef so that fs.mu and dstDir.mu are unlocked by then. 785 fs.deferDecRef(replaced) 786 replaceVFSD = replaced.VFSDentry() 787 replaced.setDeleted() 788 } 789 vfs.InotifyRename(ctx, src.inode.Watches(), srcDir.inode.Watches(), dstDir.inode.Watches(), oldName, newName, src.isDir()) 790 for _, rc := range virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) { // +checklocksforce: to may be nil, that's okay. 791 fs.deferDecRef(rc) 792 } 793 return nil 794 } 795 796 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 797 func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 798 fs.mu.Lock() 799 defer fs.processDeferredDecRefs(ctx) 800 defer fs.mu.Unlock() 801 parent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 802 if err != nil { 803 return err 804 } 805 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 806 return err 807 } 808 if err := rp.Mount().CheckBeginWrite(); err != nil { 809 return err 810 } 811 defer rp.Mount().EndWrite() 812 name := rp.Component() 813 if name == "." { 814 return linuxerr.EINVAL 815 } 816 if name == ".." { 817 return linuxerr.ENOTEMPTY 818 } 819 child, ok := parent.children[name] 820 if !ok { 821 return linuxerr.ENOENT 822 } 823 if err := checkDeleteLocked(ctx, rp, child); err != nil { 824 return err 825 } 826 if err := vfs.CheckDeleteSticky( 827 rp.Credentials(), 828 linux.FileMode(parent.inode.Mode()), 829 auth.KUID(parent.inode.UID()), 830 auth.KUID(child.inode.UID()), 831 auth.KGID(child.inode.GID()), 832 ); err != nil { 833 return err 834 } 835 if !child.isDir() { 836 return linuxerr.ENOTDIR 837 } 838 if child.inode.HasChildren() { 839 return linuxerr.ENOTEMPTY 840 } 841 virtfs := rp.VirtualFilesystem() 842 parent.dirMu.Lock() 843 defer parent.dirMu.Unlock() 844 845 mntns := vfs.MountNamespaceFromContext(ctx) 846 defer mntns.DecRef(ctx) 847 vfsd := child.VFSDentry() 848 if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { 849 return err // +checklocksforce: vfsd is not locked. 850 } 851 852 if err := parent.inode.RmDir(ctx, child.name, child.inode); err != nil { 853 virtfs.AbortDeleteDentry(vfsd) 854 return err 855 } 856 delete(parent.children, child.name) 857 parent.inode.Watches().Notify(ctx, child.name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent, true /* unlinked */) 858 // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. 859 fs.deferDecRef(child) 860 rcs := virtfs.CommitDeleteDentry(ctx, vfsd) 861 for _, rc := range rcs { 862 fs.deferDecRef(rc) 863 } 864 child.setDeleted() 865 return nil 866 } 867 868 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 869 func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 870 fs.mu.RLock() 871 defer fs.processDeferredDecRefs(ctx) 872 d, err := fs.walkExistingLocked(ctx, rp) 873 if err != nil { 874 fs.mu.RUnlock() 875 return err 876 } 877 if opts.Stat.Mask == 0 { 878 fs.mu.RUnlock() 879 return nil 880 } 881 err = d.inode.SetStat(ctx, fs.VFSFilesystem(), rp.Credentials(), opts) 882 fs.mu.RUnlock() 883 if err != nil { 884 return err 885 } 886 if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 887 d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) 888 } 889 return nil 890 } 891 892 // StatAt implements vfs.FilesystemImpl.StatAt. 893 func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 894 fs.mu.RLock() 895 defer fs.processDeferredDecRefs(ctx) 896 defer fs.mu.RUnlock() 897 d, err := fs.walkExistingLocked(ctx, rp) 898 if err != nil { 899 return linux.Statx{}, err 900 } 901 return d.inode.Stat(ctx, fs.VFSFilesystem(), opts) 902 } 903 904 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 905 func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 906 fs.mu.RLock() 907 defer fs.processDeferredDecRefs(ctx) 908 defer fs.mu.RUnlock() 909 d, err := fs.walkExistingLocked(ctx, rp) 910 if err != nil { 911 return linux.Statfs{}, err 912 } 913 return d.inode.StatFS(ctx, fs.VFSFilesystem()) 914 } 915 916 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 917 func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 918 if rp.Done() { 919 return linuxerr.EEXIST 920 } 921 fs.mu.Lock() 922 defer fs.processDeferredDecRefs(ctx) 923 defer fs.mu.Unlock() 924 parent, err := fs.walkParentDirLocked(ctx, rp, rp.Start().Impl().(*Dentry)) 925 if err != nil { 926 return err 927 } 928 parent.dirMu.Lock() 929 defer parent.dirMu.Unlock() 930 931 pc := rp.Component() 932 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 933 return err 934 } 935 if rp.MustBeDir() { 936 return linuxerr.ENOENT 937 } 938 if err := rp.Mount().CheckBeginWrite(); err != nil { 939 return err 940 } 941 defer rp.Mount().EndWrite() 942 childI, err := parent.inode.NewSymlink(ctx, pc, target) 943 if err != nil { 944 return err 945 } 946 parent.inode.Watches().Notify(ctx, pc, linux.IN_CREATE, 0, vfs.InodeEvent, false /* unlinked */) 947 var child Dentry 948 child.Init(fs, childI) 949 parent.insertChildLocked(pc, &child) 950 return nil 951 } 952 953 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 954 func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 955 fs.mu.Lock() 956 defer fs.processDeferredDecRefs(ctx) 957 defer fs.mu.Unlock() 958 959 d, err := fs.walkExistingLocked(ctx, rp) 960 if err != nil { 961 return err 962 } 963 if err := rp.Mount().CheckBeginWrite(); err != nil { 964 return err 965 } 966 defer rp.Mount().EndWrite() 967 if err := checkDeleteLocked(ctx, rp, d); err != nil { 968 return err 969 } 970 if d.isDir() { 971 return linuxerr.EISDIR 972 } 973 virtfs := rp.VirtualFilesystem() 974 parentDentry := d.parent.Load() 975 parentDentry.dirMu.Lock() 976 defer parentDentry.dirMu.Unlock() 977 mntns := vfs.MountNamespaceFromContext(ctx) 978 defer mntns.DecRef(ctx) 979 vfsd := d.VFSDentry() 980 if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { 981 return err 982 } 983 if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil { 984 virtfs.AbortDeleteDentry(vfsd) 985 return err 986 } 987 delete(parentDentry.children, d.name) 988 vfs.InotifyRemoveChild(ctx, d.inode.Watches(), parentDentry.inode.Watches(), d.name) 989 // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. 990 fs.deferDecRef(d) 991 rcs := virtfs.CommitDeleteDentry(ctx, vfsd) 992 for _, rc := range rcs { 993 fs.deferDecRef(rc) 994 } 995 d.setDeleted() 996 return nil 997 } 998 999 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 1000 func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 1001 fs.mu.RLock() 1002 defer fs.processDeferredDecRefs(ctx) 1003 defer fs.mu.RUnlock() 1004 d, err := fs.walkExistingLocked(ctx, rp) 1005 if err != nil { 1006 return nil, err 1007 } 1008 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { 1009 return nil, err 1010 } 1011 return nil, linuxerr.ECONNREFUSED 1012 } 1013 1014 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 1015 func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 1016 fs.mu.RLock() 1017 defer fs.processDeferredDecRefs(ctx) 1018 defer fs.mu.RUnlock() 1019 _, err := fs.walkExistingLocked(ctx, rp) 1020 if err != nil { 1021 return nil, err 1022 } 1023 // kernfs currently does not support extended attributes. 1024 return nil, linuxerr.ENOTSUP 1025 } 1026 1027 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 1028 func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 1029 fs.mu.RLock() 1030 defer fs.processDeferredDecRefs(ctx) 1031 defer fs.mu.RUnlock() 1032 _, err := fs.walkExistingLocked(ctx, rp) 1033 if err != nil { 1034 return "", err 1035 } 1036 // kernfs currently does not support extended attributes. 1037 return "", linuxerr.ENOTSUP 1038 } 1039 1040 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 1041 func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 1042 fs.mu.RLock() 1043 defer fs.processDeferredDecRefs(ctx) 1044 defer fs.mu.RUnlock() 1045 _, err := fs.walkExistingLocked(ctx, rp) 1046 if err != nil { 1047 return err 1048 } 1049 // kernfs currently does not support extended attributes. 1050 return linuxerr.ENOTSUP 1051 } 1052 1053 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 1054 func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 1055 fs.mu.RLock() 1056 defer fs.processDeferredDecRefs(ctx) 1057 defer fs.mu.RUnlock() 1058 _, err := fs.walkExistingLocked(ctx, rp) 1059 if err != nil { 1060 return err 1061 } 1062 // kernfs currently does not support extended attributes. 1063 return linuxerr.ENOTSUP 1064 } 1065 1066 // PrependPath implements vfs.FilesystemImpl.PrependPath. 1067 func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 1068 fs.mu.RLock() 1069 defer fs.mu.RUnlock() 1070 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) 1071 } 1072 1073 func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { 1074 if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { 1075 // The following is equivalent to vd.DecRef(ctx). This is needed 1076 // because if d belongs to this filesystem, we can not DecRef it right 1077 // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we 1078 // defer the DecRef to when locks are dropped. 1079 vd.Mount().DecRef(ctx) 1080 fs.deferDecRef(d) 1081 } else { 1082 vd.DecRef(ctx) 1083 } 1084 } 1085 1086 // IsDescendant implements vfs.FilesystemImpl.IsDescendant. 1087 func (fs *Filesystem) IsDescendant(vfsroot, vd vfs.VirtualDentry) bool { 1088 return genericIsDescendant(vfsroot.Dentry(), vd.Dentry().Impl().(*Dentry)) 1089 }