github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/kernfs/filesystem.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package kernfs 16 17 // This file implements vfs.FilesystemImpl for kernfs. 18 19 import ( 20 "fmt" 21 22 "github.com/SagerNet/gvisor/pkg/abi/linux" 23 "github.com/SagerNet/gvisor/pkg/context" 24 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 25 "github.com/SagerNet/gvisor/pkg/fspath" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 ) 31 32 // stepExistingLocked resolves rp.Component() in parent directory vfsd. 33 // 34 // stepExistingLocked is loosely analogous to fs/namei.c:walk_component(). 35 // 36 // Preconditions: 37 // * Filesystem.mu must be locked for at least reading. 38 // * !rp.Done(). 39 // 40 // Postcondition: Caller must call fs.processDeferredDecRefs*. 41 func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, mayFollowSymlinks bool) (*Dentry, error) { 42 if !d.isDir() { 43 return nil, syserror.ENOTDIR 44 } 45 // Directory searchable? 46 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 47 return nil, err 48 } 49 afterSymlink: 50 name := rp.Component() 51 // Revalidation must be skipped if name is "." or ".."; d or its parent 52 // respectively can't be expected to transition from invalidated back to 53 // valid, so detecting invalidation and retrying would loop forever. This 54 // is consistent with Linux: fs/namei.c:walk_component() => lookup_fast() 55 // calls d_revalidate(), but walk_component() => handle_dots() does not. 56 if name == "." { 57 rp.Advance() 58 return d, nil 59 } 60 if name == ".." { 61 if isRoot, err := rp.CheckRoot(ctx, d.VFSDentry()); err != nil { 62 return nil, err 63 } else if isRoot || d.parent == nil { 64 rp.Advance() 65 return d, nil 66 } 67 if err := rp.CheckMount(ctx, d.parent.VFSDentry()); err != nil { 68 return nil, err 69 } 70 rp.Advance() 71 return d.parent, nil 72 } 73 if len(name) > linux.NAME_MAX { 74 return nil, linuxerr.ENAMETOOLONG 75 } 76 d.dirMu.Lock() 77 next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, d.children[name]) 78 d.dirMu.Unlock() 79 if err != nil { 80 return nil, err 81 } 82 if err := rp.CheckMount(ctx, next.VFSDentry()); err != nil { 83 return nil, err 84 } 85 // Resolve any symlink at current path component. 86 if mayFollowSymlinks && rp.ShouldFollowSymlink() && next.isSymlink() { 87 targetVD, targetPathname, err := next.inode.Getlink(ctx, rp.Mount()) 88 if err != nil { 89 return nil, err 90 } 91 if targetVD.Ok() { 92 err := rp.HandleJump(targetVD) 93 fs.deferDecRefVD(ctx, targetVD) 94 if err != nil { 95 return nil, err 96 } 97 } else { 98 if err := rp.HandleSymlink(targetPathname); err != nil { 99 return nil, err 100 } 101 } 102 goto afterSymlink 103 } 104 rp.Advance() 105 return next, nil 106 } 107 108 // revalidateChildLocked must be called after a call to parent.vfsd.Child(name) 109 // or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be 110 // nil) to verify that the returned child (or lack thereof) is correct. 111 // 112 // Preconditions: 113 // * Filesystem.mu must be locked for at least reading. 114 // * parent.dirMu must be locked. 115 // * parent.isDir(). 116 // * name is not "." or "..". 117 // 118 // Postconditions: Caller must call fs.processDeferredDecRefs*. 119 func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, child *Dentry) (*Dentry, error) { 120 if child != nil { 121 // Cached dentry exists, revalidate. 122 if !child.inode.Valid(ctx) { 123 delete(parent.children, name) 124 if child.inode.Keep() { 125 // Drop the ref owned by kernfs. 126 fs.deferDecRef(child) 127 } 128 vfsObj.InvalidateDentry(ctx, child.VFSDentry()) 129 child = nil 130 } 131 } 132 if child == nil { 133 // Dentry isn't cached; it either doesn't exist or failed revalidation. 134 // Attempt to resolve it via Lookup. 135 childInode, err := parent.inode.Lookup(ctx, name) 136 if err != nil { 137 return nil, err 138 } 139 var newChild Dentry 140 newChild.Init(fs, childInode) // childInode's ref is transferred to newChild. 141 parent.insertChildLocked(name, &newChild) 142 child = &newChild 143 144 // Drop the ref on newChild. This will cause the dentry to get pruned 145 // from the dentry tree by the end of current filesystem operation 146 // (before returning to the VFS layer) if another ref is not picked on 147 // this dentry. 148 if !childInode.Keep() { 149 fs.deferDecRef(&newChild) 150 } 151 } 152 return child, nil 153 } 154 155 // walkExistingLocked resolves rp to an existing file. 156 // 157 // walkExistingLocked is loosely analogous to Linux's 158 // fs/namei.c:path_lookupat(). 159 // 160 // Preconditions: Filesystem.mu must be locked for at least reading. 161 // 162 // Postconditions: Caller must call fs.processDeferredDecRefs*. 163 func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) { 164 d := rp.Start().Impl().(*Dentry) 165 for !rp.Done() { 166 var err error 167 d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */) 168 if err != nil { 169 return nil, err 170 } 171 } 172 if rp.MustBeDir() && !d.isDir() { 173 return nil, syserror.ENOTDIR 174 } 175 return d, nil 176 } 177 178 // walkParentDirLocked resolves all but the last path component of rp to an 179 // existing directory. It does not check that the returned directory is 180 // searchable by the provider of rp. 181 // 182 // walkParentDirLocked is loosely analogous to Linux's 183 // fs/namei.c:path_parentat(). 184 // 185 // Preconditions: 186 // * Filesystem.mu must be locked for at least reading. 187 // * !rp.Done(). 188 // 189 // Postconditions: Caller must call fs.processDeferredDecRefs*. 190 func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*Dentry, error) { 191 d := rp.Start().Impl().(*Dentry) 192 for !rp.Final() { 193 var err error 194 d, err = fs.stepExistingLocked(ctx, rp, d, true /* mayFollowSymlinks */) 195 if err != nil { 196 return nil, err 197 } 198 } 199 if !d.isDir() { 200 return nil, syserror.ENOTDIR 201 } 202 return d, nil 203 } 204 205 // checkCreateLocked checks that a file named rp.Component() may be created in 206 // directory parent, then returns rp.Component(). 207 // 208 // Preconditions: 209 // * Filesystem.mu must be locked for at least reading. 210 // * isDir(parentInode) == true. 211 func checkCreateLocked(ctx context.Context, creds *auth.Credentials, name string, parent *Dentry) error { 212 // Order of checks is important. First check if parent directory can be 213 // executed, then check for existence, and lastly check if mount is writable. 214 if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayExec); err != nil { 215 return err 216 } 217 if name == "." || name == ".." { 218 return syserror.EEXIST 219 } 220 if len(name) > linux.NAME_MAX { 221 return linuxerr.ENAMETOOLONG 222 } 223 if _, ok := parent.children[name]; ok { 224 return syserror.EEXIST 225 } 226 if parent.VFSDentry().IsDead() { 227 return syserror.ENOENT 228 } 229 if err := parent.inode.CheckPermissions(ctx, creds, vfs.MayWrite); err != nil { 230 return err 231 } 232 return nil 233 } 234 235 // checkDeleteLocked checks that the file represented by vfsd may be deleted. 236 // 237 // Preconditions: Filesystem.mu must be locked for at least reading. 238 func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry) error { 239 parent := d.parent 240 if parent == nil { 241 return linuxerr.EBUSY 242 } 243 if parent.vfsd.IsDead() { 244 return syserror.ENOENT 245 } 246 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { 247 return err 248 } 249 return nil 250 } 251 252 // Release implements vfs.FilesystemImpl.Release. 253 func (fs *Filesystem) Release(ctx context.Context) { 254 root := fs.root 255 if root == nil { 256 return 257 } 258 fs.mu.Lock() 259 root.releaseKeptDentriesLocked(ctx) 260 for fs.cachedDentriesLen != 0 { 261 fs.evictCachedDentryLocked(ctx) 262 } 263 fs.mu.Unlock() 264 // Drop ref acquired in Dentry.InitRoot(). 265 root.DecRef(ctx) 266 } 267 268 // releaseKeptDentriesLocked recursively drops all dentry references created by 269 // Lookup when Dentry.inode.Keep() is true. 270 // 271 // Precondition: Filesystem.mu is held. 272 func (d *Dentry) releaseKeptDentriesLocked(ctx context.Context) { 273 if d.inode.Keep() && d != d.fs.root { 274 d.decRefLocked(ctx) 275 } 276 277 if d.isDir() { 278 var children []*Dentry 279 d.dirMu.Lock() 280 for _, child := range d.children { 281 children = append(children, child) 282 } 283 d.dirMu.Unlock() 284 for _, child := range children { 285 child.releaseKeptDentriesLocked(ctx) 286 } 287 } 288 } 289 290 // Sync implements vfs.FilesystemImpl.Sync. 291 func (fs *Filesystem) Sync(ctx context.Context) error { 292 // All filesystem state is in-memory. 293 return nil 294 } 295 296 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 297 func (fs *Filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 298 fs.mu.RLock() 299 defer fs.processDeferredDecRefs(ctx) 300 defer fs.mu.RUnlock() 301 302 d, err := fs.walkExistingLocked(ctx, rp) 303 if err != nil { 304 return err 305 } 306 return d.inode.CheckPermissions(ctx, creds, ats) 307 } 308 309 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 310 func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 311 fs.mu.RLock() 312 defer fs.processDeferredDecRefs(ctx) 313 defer fs.mu.RUnlock() 314 d, err := fs.walkExistingLocked(ctx, rp) 315 if err != nil { 316 return nil, err 317 } 318 319 if opts.CheckSearchable { 320 if !d.isDir() { 321 return nil, syserror.ENOTDIR 322 } 323 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 324 return nil, err 325 } 326 } 327 vfsd := d.VFSDentry() 328 vfsd.IncRef() // Ownership transferred to caller. 329 return vfsd, nil 330 } 331 332 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 333 func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 334 fs.mu.RLock() 335 defer fs.processDeferredDecRefs(ctx) 336 defer fs.mu.RUnlock() 337 d, err := fs.walkParentDirLocked(ctx, rp) 338 if err != nil { 339 return nil, err 340 } 341 d.IncRef() // Ownership transferred to caller. 342 return d.VFSDentry(), nil 343 } 344 345 // LinkAt implements vfs.FilesystemImpl.LinkAt. 346 func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 347 if rp.Done() { 348 return syserror.EEXIST 349 } 350 fs.mu.Lock() 351 defer fs.processDeferredDecRefs(ctx) 352 defer fs.mu.Unlock() 353 parent, err := fs.walkParentDirLocked(ctx, rp) 354 if err != nil { 355 return err 356 } 357 358 parent.dirMu.Lock() 359 defer parent.dirMu.Unlock() 360 pc := rp.Component() 361 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 362 return err 363 } 364 if rp.MustBeDir() { 365 return syserror.ENOENT 366 } 367 if rp.Mount() != vd.Mount() { 368 return linuxerr.EXDEV 369 } 370 if err := rp.Mount().CheckBeginWrite(); err != nil { 371 return err 372 } 373 defer rp.Mount().EndWrite() 374 375 d := vd.Dentry().Impl().(*Dentry) 376 if d.isDir() { 377 return linuxerr.EPERM 378 } 379 380 childI, err := parent.inode.NewLink(ctx, pc, d.inode) 381 if err != nil { 382 return err 383 } 384 var child Dentry 385 child.Init(fs, childI) 386 parent.insertChildLocked(pc, &child) 387 return nil 388 } 389 390 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 391 func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 392 if rp.Done() { 393 return syserror.EEXIST 394 } 395 fs.mu.Lock() 396 defer fs.processDeferredDecRefs(ctx) 397 defer fs.mu.Unlock() 398 parent, err := fs.walkParentDirLocked(ctx, rp) 399 if err != nil { 400 return err 401 } 402 403 parent.dirMu.Lock() 404 defer parent.dirMu.Unlock() 405 pc := rp.Component() 406 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 407 return err 408 } 409 if err := rp.Mount().CheckBeginWrite(); err != nil { 410 return err 411 } 412 defer rp.Mount().EndWrite() 413 childI, err := parent.inode.NewDir(ctx, pc, opts) 414 if err != nil { 415 if !opts.ForSyntheticMountpoint || linuxerr.Equals(linuxerr.EEXIST, err) { 416 return err 417 } 418 childI = newSyntheticDirectory(ctx, rp.Credentials(), opts.Mode) 419 } 420 var child Dentry 421 child.Init(fs, childI) 422 parent.insertChildLocked(pc, &child) 423 return nil 424 } 425 426 // MknodAt implements vfs.FilesystemImpl.MknodAt. 427 func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 428 if rp.Done() { 429 return syserror.EEXIST 430 } 431 fs.mu.Lock() 432 defer fs.processDeferredDecRefs(ctx) 433 defer fs.mu.Unlock() 434 parent, err := fs.walkParentDirLocked(ctx, rp) 435 if err != nil { 436 return err 437 } 438 439 parent.dirMu.Lock() 440 defer parent.dirMu.Unlock() 441 pc := rp.Component() 442 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 443 return err 444 } 445 if rp.MustBeDir() { 446 return syserror.ENOENT 447 } 448 if err := rp.Mount().CheckBeginWrite(); err != nil { 449 return err 450 } 451 defer rp.Mount().EndWrite() 452 newI, err := parent.inode.NewNode(ctx, pc, opts) 453 if err != nil { 454 return err 455 } 456 var newD Dentry 457 newD.Init(fs, newI) 458 parent.insertChildLocked(pc, &newD) 459 return nil 460 } 461 462 // OpenAt implements vfs.FilesystemImpl.OpenAt. 463 func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 464 // Filter out flags that are not supported by kernfs. O_DIRECTORY and 465 // O_NOFOLLOW have no effect here (they're handled by VFS by setting 466 // appropriate bits in rp), but are returned by 467 // FileDescriptionImpl.StatusFlags(). 468 opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | 469 linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK | linux.O_NOCTTY 470 ats := vfs.AccessTypesForOpenFlags(&opts) 471 472 // Do not create new file. 473 if opts.Flags&linux.O_CREAT == 0 { 474 fs.mu.RLock() 475 defer fs.processDeferredDecRefs(ctx) 476 d, err := fs.walkExistingLocked(ctx, rp) 477 if err != nil { 478 fs.mu.RUnlock() 479 return nil, err 480 } 481 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 482 fs.mu.RUnlock() 483 return nil, err 484 } 485 // Open may block so we need to unlock fs.mu. IncRef d to prevent 486 // its destruction while fs.mu is unlocked. 487 d.IncRef() 488 fs.mu.RUnlock() 489 fd, err := d.inode.Open(ctx, rp, d, opts) 490 d.DecRef(ctx) 491 return fd, err 492 } 493 494 // May create new file. 495 mustCreate := opts.Flags&linux.O_EXCL != 0 496 d := rp.Start().Impl().(*Dentry) 497 fs.mu.Lock() 498 unlocked := false 499 unlock := func() { 500 if !unlocked { 501 fs.mu.Unlock() 502 unlocked = true 503 } 504 } 505 // Process all to-be-decref'd dentries at the end at once. 506 // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked 507 // when this is executed. 508 defer fs.processDeferredDecRefs(ctx) 509 defer unlock() 510 if rp.Done() { 511 if rp.MustBeDir() { 512 return nil, syserror.EISDIR 513 } 514 if mustCreate { 515 return nil, syserror.EEXIST 516 } 517 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 518 return nil, err 519 } 520 // Open may block so we need to unlock fs.mu. IncRef d to prevent 521 // its destruction while fs.mu is unlocked. 522 d.IncRef() 523 unlock() 524 fd, err := d.inode.Open(ctx, rp, d, opts) 525 d.DecRef(ctx) 526 return fd, err 527 } 528 afterTrailingSymlink: 529 parent, err := fs.walkParentDirLocked(ctx, rp) 530 if err != nil { 531 return nil, err 532 } 533 // Check for search permission in the parent directory. 534 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { 535 return nil, err 536 } 537 // Reject attempts to open directories with O_CREAT. 538 if rp.MustBeDir() { 539 return nil, syserror.EISDIR 540 } 541 pc := rp.Component() 542 if pc == "." || pc == ".." { 543 return nil, syserror.EISDIR 544 } 545 if len(pc) > linux.NAME_MAX { 546 return nil, linuxerr.ENAMETOOLONG 547 } 548 // Determine whether or not we need to create a file. 549 child, err := fs.stepExistingLocked(ctx, rp, parent, false /* mayFollowSymlinks */) 550 if linuxerr.Equals(linuxerr.ENOENT, err) { 551 // Already checked for searchability above; now check for writability. 552 if err := parent.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { 553 return nil, err 554 } 555 if err := rp.Mount().CheckBeginWrite(); err != nil { 556 return nil, err 557 } 558 defer rp.Mount().EndWrite() 559 // Create and open the child. 560 childI, err := parent.inode.NewFile(ctx, pc, opts) 561 if err != nil { 562 return nil, err 563 } 564 var child Dentry 565 child.Init(fs, childI) 566 parent.insertChild(pc, &child) 567 // Open may block so we need to unlock fs.mu. IncRef child to prevent 568 // its destruction while fs.mu is unlocked. 569 child.IncRef() 570 unlock() 571 fd, err := child.inode.Open(ctx, rp, &child, opts) 572 child.DecRef(ctx) 573 return fd, err 574 } 575 if err != nil { 576 return nil, err 577 } 578 // Open existing file or follow symlink. 579 if mustCreate { 580 return nil, syserror.EEXIST 581 } 582 if rp.ShouldFollowSymlink() && child.isSymlink() { 583 targetVD, targetPathname, err := child.inode.Getlink(ctx, rp.Mount()) 584 if err != nil { 585 return nil, err 586 } 587 if targetVD.Ok() { 588 err := rp.HandleJump(targetVD) 589 fs.deferDecRefVD(ctx, targetVD) 590 if err != nil { 591 return nil, err 592 } 593 } else { 594 if err := rp.HandleSymlink(targetPathname); err != nil { 595 return nil, err 596 } 597 } 598 // rp.Final() may no longer be true since we now need to resolve the 599 // symlink target. 600 goto afterTrailingSymlink 601 } 602 if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { 603 return nil, err 604 } 605 // Open may block so we need to unlock fs.mu. IncRef child to prevent 606 // its destruction while fs.mu is unlocked. 607 child.IncRef() 608 unlock() 609 fd, err := child.inode.Open(ctx, rp, child, opts) 610 child.DecRef(ctx) 611 return fd, err 612 } 613 614 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 615 func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 616 defer fs.processDeferredDecRefs(ctx) 617 618 fs.mu.RLock() 619 d, err := fs.walkExistingLocked(ctx, rp) 620 if err != nil { 621 fs.mu.RUnlock() 622 return "", err 623 } 624 if !d.isSymlink() { 625 fs.mu.RUnlock() 626 return "", linuxerr.EINVAL 627 } 628 629 // Inode.Readlink() cannot be called holding fs locks. 630 d.IncRef() 631 defer d.DecRef(ctx) 632 fs.mu.RUnlock() 633 634 return d.inode.Readlink(ctx, rp.Mount()) 635 } 636 637 // RenameAt implements vfs.FilesystemImpl.RenameAt. 638 func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 639 fs.mu.Lock() 640 defer fs.processDeferredDecRefs(ctx) 641 defer fs.mu.Unlock() 642 643 // Resolve the destination directory first to verify that it's on this 644 // Mount. 645 dstDir, err := fs.walkParentDirLocked(ctx, rp) 646 if err != nil { 647 return err 648 } 649 650 // Only RENAME_NOREPLACE is supported. 651 if opts.Flags&^linux.RENAME_NOREPLACE != 0 { 652 return linuxerr.EINVAL 653 } 654 noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 655 656 mnt := rp.Mount() 657 if mnt != oldParentVD.Mount() { 658 return linuxerr.EXDEV 659 } 660 if err := mnt.CheckBeginWrite(); err != nil { 661 return err 662 } 663 defer mnt.EndWrite() 664 665 srcDirVFSD := oldParentVD.Dentry() 666 srcDir := srcDirVFSD.Impl().(*Dentry) 667 srcDir.dirMu.Lock() 668 src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDir.children[oldName]) 669 srcDir.dirMu.Unlock() 670 if err != nil { 671 return err 672 } 673 674 // Can we remove the src dentry? 675 if err := checkDeleteLocked(ctx, rp, src); err != nil { 676 return err 677 } 678 679 // Can we create the dst dentry? 680 var dst *Dentry 681 newName := rp.Component() 682 if newName == "." || newName == ".." { 683 if noReplace { 684 return syserror.EEXIST 685 } 686 return linuxerr.EBUSY 687 } 688 689 err = checkCreateLocked(ctx, rp.Credentials(), newName, dstDir) 690 switch { 691 case err == nil: 692 // Ok, continue with rename as replacement. 693 case linuxerr.Equals(linuxerr.EEXIST, err): 694 if noReplace { 695 // Won't overwrite existing node since RENAME_NOREPLACE was requested. 696 return syserror.EEXIST 697 } 698 dst = dstDir.children[newName] 699 if dst == nil { 700 panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", newName, dstDir)) 701 } 702 default: 703 return err 704 } 705 706 if srcDir == dstDir && oldName == newName { 707 return nil 708 } 709 710 var dstVFSD *vfs.Dentry 711 if dst != nil { 712 dstVFSD = dst.VFSDentry() 713 } 714 715 mntns := vfs.MountNamespaceFromContext(ctx) 716 defer mntns.DecRef(ctx) 717 virtfs := rp.VirtualFilesystem() 718 719 // We can't deadlock here due to lock ordering because we're protected from 720 // concurrent renames by fs.mu held for writing. 721 srcDir.dirMu.Lock() 722 defer srcDir.dirMu.Unlock() 723 if srcDir != dstDir { 724 dstDir.dirMu.Lock() 725 defer dstDir.dirMu.Unlock() 726 } 727 728 srcVFSD := src.VFSDentry() 729 if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil { 730 return err 731 } 732 err = srcDir.inode.Rename(ctx, src.name, newName, src.inode, dstDir.inode) 733 if err != nil { 734 virtfs.AbortRenameDentry(srcVFSD, dstVFSD) 735 return err 736 } 737 delete(srcDir.children, src.name) 738 if srcDir != dstDir { 739 fs.deferDecRef(srcDir) // child (src) drops ref on old parent. 740 dstDir.IncRef() // child (src) takes a ref on the new parent. 741 } 742 src.parent = dstDir 743 src.name = newName 744 if dstDir.children == nil { 745 dstDir.children = make(map[string]*Dentry) 746 } 747 replaced := dstDir.children[newName] 748 dstDir.children[newName] = src 749 var replaceVFSD *vfs.Dentry 750 if replaced != nil { 751 // deferDecRef so that fs.mu and dstDir.mu are unlocked by then. 752 fs.deferDecRef(replaced) 753 replaceVFSD = replaced.VFSDentry() 754 } 755 virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) // +checklocksforce: to may be nil, that's okay. 756 return nil 757 } 758 759 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 760 func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 761 fs.mu.Lock() 762 defer fs.processDeferredDecRefs(ctx) 763 defer fs.mu.Unlock() 764 765 d, err := fs.walkExistingLocked(ctx, rp) 766 if err != nil { 767 return err 768 } 769 if err := rp.Mount().CheckBeginWrite(); err != nil { 770 return err 771 } 772 defer rp.Mount().EndWrite() 773 if err := checkDeleteLocked(ctx, rp, d); err != nil { 774 return err 775 } 776 if !d.isDir() { 777 return syserror.ENOTDIR 778 } 779 if d.inode.HasChildren() { 780 return linuxerr.ENOTEMPTY 781 } 782 virtfs := rp.VirtualFilesystem() 783 parentDentry := d.parent 784 parentDentry.dirMu.Lock() 785 defer parentDentry.dirMu.Unlock() 786 787 mntns := vfs.MountNamespaceFromContext(ctx) 788 defer mntns.DecRef(ctx) 789 vfsd := d.VFSDentry() 790 if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { 791 return err // +checklocksforce: vfsd is not locked. 792 } 793 794 if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil { 795 virtfs.AbortDeleteDentry(vfsd) 796 return err 797 } 798 delete(parentDentry.children, d.name) 799 // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. 800 fs.deferDecRef(d) 801 virtfs.CommitDeleteDentry(ctx, vfsd) 802 return nil 803 } 804 805 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 806 func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 807 fs.mu.RLock() 808 defer fs.processDeferredDecRefs(ctx) 809 defer fs.mu.RUnlock() 810 d, err := fs.walkExistingLocked(ctx, rp) 811 if err != nil { 812 return err 813 } 814 if opts.Stat.Mask == 0 { 815 return nil 816 } 817 return d.inode.SetStat(ctx, fs.VFSFilesystem(), rp.Credentials(), opts) 818 } 819 820 // StatAt implements vfs.FilesystemImpl.StatAt. 821 func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 822 fs.mu.RLock() 823 defer fs.processDeferredDecRefs(ctx) 824 defer fs.mu.RUnlock() 825 d, err := fs.walkExistingLocked(ctx, rp) 826 if err != nil { 827 return linux.Statx{}, err 828 } 829 return d.inode.Stat(ctx, fs.VFSFilesystem(), opts) 830 } 831 832 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 833 func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 834 fs.mu.RLock() 835 defer fs.processDeferredDecRefs(ctx) 836 defer fs.mu.RUnlock() 837 d, err := fs.walkExistingLocked(ctx, rp) 838 if err != nil { 839 return linux.Statfs{}, err 840 } 841 return d.inode.StatFS(ctx, fs.VFSFilesystem()) 842 } 843 844 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 845 func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 846 if rp.Done() { 847 return syserror.EEXIST 848 } 849 fs.mu.Lock() 850 defer fs.processDeferredDecRefs(ctx) 851 defer fs.mu.Unlock() 852 parent, err := fs.walkParentDirLocked(ctx, rp) 853 if err != nil { 854 return err 855 } 856 parent.dirMu.Lock() 857 defer parent.dirMu.Unlock() 858 859 pc := rp.Component() 860 if err := checkCreateLocked(ctx, rp.Credentials(), pc, parent); err != nil { 861 return err 862 } 863 if rp.MustBeDir() { 864 return syserror.ENOENT 865 } 866 if err := rp.Mount().CheckBeginWrite(); err != nil { 867 return err 868 } 869 defer rp.Mount().EndWrite() 870 childI, err := parent.inode.NewSymlink(ctx, pc, target) 871 if err != nil { 872 return err 873 } 874 var child Dentry 875 child.Init(fs, childI) 876 parent.insertChildLocked(pc, &child) 877 return nil 878 } 879 880 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 881 func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 882 fs.mu.Lock() 883 defer fs.processDeferredDecRefs(ctx) 884 defer fs.mu.Unlock() 885 886 d, err := fs.walkExistingLocked(ctx, rp) 887 if err != nil { 888 return err 889 } 890 if err := rp.Mount().CheckBeginWrite(); err != nil { 891 return err 892 } 893 defer rp.Mount().EndWrite() 894 if err := checkDeleteLocked(ctx, rp, d); err != nil { 895 return err 896 } 897 if d.isDir() { 898 return syserror.EISDIR 899 } 900 virtfs := rp.VirtualFilesystem() 901 parentDentry := d.parent 902 parentDentry.dirMu.Lock() 903 defer parentDentry.dirMu.Unlock() 904 mntns := vfs.MountNamespaceFromContext(ctx) 905 defer mntns.DecRef(ctx) 906 vfsd := d.VFSDentry() 907 if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { 908 return err 909 } 910 if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil { 911 virtfs.AbortDeleteDentry(vfsd) 912 return err 913 } 914 delete(parentDentry.children, d.name) 915 // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. 916 fs.deferDecRef(d) 917 virtfs.CommitDeleteDentry(ctx, vfsd) 918 return nil 919 } 920 921 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 922 func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 923 fs.mu.RLock() 924 defer fs.processDeferredDecRefs(ctx) 925 defer fs.mu.RUnlock() 926 d, err := fs.walkExistingLocked(ctx, rp) 927 if err != nil { 928 return nil, err 929 } 930 if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { 931 return nil, err 932 } 933 return nil, linuxerr.ECONNREFUSED 934 } 935 936 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 937 func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 938 fs.mu.RLock() 939 defer fs.processDeferredDecRefs(ctx) 940 defer fs.mu.RUnlock() 941 _, err := fs.walkExistingLocked(ctx, rp) 942 if err != nil { 943 return nil, err 944 } 945 // kernfs currently does not support extended attributes. 946 return nil, linuxerr.ENOTSUP 947 } 948 949 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 950 func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 951 fs.mu.RLock() 952 defer fs.processDeferredDecRefs(ctx) 953 defer fs.mu.RUnlock() 954 _, err := fs.walkExistingLocked(ctx, rp) 955 if err != nil { 956 return "", err 957 } 958 // kernfs currently does not support extended attributes. 959 return "", linuxerr.ENOTSUP 960 } 961 962 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 963 func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 964 fs.mu.RLock() 965 defer fs.processDeferredDecRefs(ctx) 966 defer fs.mu.RUnlock() 967 _, err := fs.walkExistingLocked(ctx, rp) 968 if err != nil { 969 return err 970 } 971 // kernfs currently does not support extended attributes. 972 return linuxerr.ENOTSUP 973 } 974 975 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 976 func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 977 fs.mu.RLock() 978 defer fs.processDeferredDecRefs(ctx) 979 defer fs.mu.RUnlock() 980 _, err := fs.walkExistingLocked(ctx, rp) 981 if err != nil { 982 return err 983 } 984 // kernfs currently does not support extended attributes. 985 return linuxerr.ENOTSUP 986 } 987 988 // PrependPath implements vfs.FilesystemImpl.PrependPath. 989 func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 990 fs.mu.RLock() 991 defer fs.mu.RUnlock() 992 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) 993 } 994 995 func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { 996 if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { 997 // The following is equivalent to vd.DecRef(ctx). This is needed 998 // because if d belongs to this filesystem, we can not DecRef it right 999 // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we 1000 // defer the DecRef to when locks are dropped. 1001 vd.Mount().DecRef(ctx) 1002 fs.deferDecRef(d) 1003 } else { 1004 vd.DecRef(ctx) 1005 } 1006 }