github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/vfs/vfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package vfs implements a virtual filesystem layer. 16 // 17 // Lock order: 18 // 19 // EpollInstance.interestMu 20 // FileDescription.epollMu 21 // FilesystemImpl/FileDescriptionImpl locks 22 // VirtualFilesystem.mountMu 23 // Dentry.mu 24 // Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry 25 // VirtualFilesystem.filesystemsMu 26 // fdnotifier.notifier.mu 27 // EpollInstance.mu 28 // Locks acquired by FileDescriptionImpl.Readiness 29 // Inotify.mu 30 // Watches.mu 31 // Inotify.evMu 32 // VirtualFilesystem.fsTypesMu 33 // 34 // Locking Dentry.mu in multiple Dentries requires holding 35 // VirtualFilesystem.mountMu. Locking EpollInstance.interestMu in multiple 36 // EpollInstances requires holding epollCycleMu. 37 package vfs 38 39 import ( 40 "fmt" 41 "path" 42 43 "github.com/SagerNet/gvisor/pkg/abi/linux" 44 "github.com/SagerNet/gvisor/pkg/context" 45 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 46 "github.com/SagerNet/gvisor/pkg/fspath" 47 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 48 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 49 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 50 "github.com/SagerNet/gvisor/pkg/sync" 51 "github.com/SagerNet/gvisor/pkg/syserror" 52 ) 53 54 // A VirtualFilesystem (VFS for short) combines Filesystems in trees of Mounts. 55 // 56 // There is no analogue to the VirtualFilesystem type in Linux, as the 57 // equivalent state in Linux is global. 58 // 59 // +stateify savable 60 type VirtualFilesystem struct { 61 // mountMu serializes mount mutations. 62 // 63 // mountMu is analogous to Linux's namespace_sem. 64 mountMu sync.Mutex `state:"nosave"` 65 66 // mounts maps (mount parent, mount point) pairs to mounts. (Since mounts 67 // are uniquely namespaced, including mount parent in the key correctly 68 // handles both bind mounts and mount namespaces; Linux does the same.) 69 // Synchronization between mutators and readers is provided by mounts.seq; 70 // synchronization between mutators is provided by mountMu. 71 // 72 // mounts is used to follow mount points during path traversal. We use a 73 // single table rather than per-Dentry tables to reduce size (and therefore 74 // cache footprint) for the vast majority of Dentries that are not mount 75 // points. 76 // 77 // mounts is analogous to Linux's mount_hashtable. 78 mounts mountTable `state:".([]*Mount)"` 79 80 // mountpoints maps mount points to mounts at those points in all 81 // namespaces. mountpoints is protected by mountMu. 82 // 83 // mountpoints is used to find mounts that must be umounted due to 84 // removal of a mount point Dentry from another mount namespace. ("A file 85 // or directory that is a mount point in one namespace that is not a mount 86 // point in another namespace, may be renamed, unlinked, or removed 87 // (rmdir(2)) in the mount namespace in which it is not a mount point 88 // (subject to the usual permission checks)." - mount_namespaces(7)) 89 // 90 // mountpoints is analogous to Linux's mountpoint_hashtable. 91 mountpoints map[*Dentry]map[*Mount]struct{} 92 93 // lastMountID is the last allocated mount ID. lastMountID is accessed 94 // using atomic memory operations. 95 lastMountID uint64 96 97 // anonMount is a Mount, not included in mounts or mountpoints, 98 // representing an anonFilesystem. anonMount is used to back 99 // VirtualDentries returned by VirtualFilesystem.NewAnonVirtualDentry(). 100 // anonMount is immutable. 101 // 102 // anonMount is analogous to Linux's anon_inode_mnt. 103 anonMount *Mount 104 105 // devices contains all registered Devices. devices is protected by 106 // devicesMu. 107 devicesMu sync.RWMutex `state:"nosave"` 108 devices map[devTuple]*registeredDevice 109 110 // anonBlockDevMinor contains all allocated anonymous block device minor 111 // numbers. anonBlockDevMinorNext is a lower bound for the smallest 112 // unallocated anonymous block device number. anonBlockDevMinorNext and 113 // anonBlockDevMinor are protected by anonBlockDevMinorMu. 114 anonBlockDevMinorMu sync.Mutex `state:"nosave"` 115 anonBlockDevMinorNext uint32 116 anonBlockDevMinor map[uint32]struct{} 117 118 // fsTypes contains all registered FilesystemTypes. fsTypes is protected by 119 // fsTypesMu. 120 fsTypesMu sync.RWMutex `state:"nosave"` 121 fsTypes map[string]*registeredFilesystemType 122 123 // filesystems contains all Filesystems. filesystems is protected by 124 // filesystemsMu. 125 filesystemsMu sync.Mutex `state:"nosave"` 126 filesystems map[*Filesystem]struct{} 127 } 128 129 // Init initializes a new VirtualFilesystem with no mounts or FilesystemTypes. 130 func (vfs *VirtualFilesystem) Init(ctx context.Context) error { 131 if vfs.mountpoints != nil { 132 panic("VFS already initialized") 133 } 134 vfs.mountpoints = make(map[*Dentry]map[*Mount]struct{}) 135 vfs.devices = make(map[devTuple]*registeredDevice) 136 vfs.anonBlockDevMinorNext = 1 137 vfs.anonBlockDevMinor = make(map[uint32]struct{}) 138 vfs.fsTypes = make(map[string]*registeredFilesystemType) 139 vfs.filesystems = make(map[*Filesystem]struct{}) 140 vfs.mounts.Init() 141 142 // Construct vfs.anonMount. 143 anonfsDevMinor, err := vfs.GetAnonBlockDevMinor() 144 if err != nil { 145 // This shouldn't be possible since anonBlockDevMinorNext was 146 // initialized to 1 above (no device numbers have been allocated yet). 147 panic(fmt.Sprintf("VirtualFilesystem.Init: device number allocation for anonfs failed: %v", err)) 148 } 149 anonfs := anonFilesystem{ 150 devMinor: anonfsDevMinor, 151 } 152 anonfs.vfsfs.Init(vfs, &anonFilesystemType{}, &anonfs) 153 defer anonfs.vfsfs.DecRef(ctx) 154 anonMount, err := vfs.NewDisconnectedMount(&anonfs.vfsfs, nil, &MountOptions{}) 155 if err != nil { 156 // We should not be passing any MountOptions that would cause 157 // construction of this mount to fail. 158 panic(fmt.Sprintf("VirtualFilesystem.Init: anonfs mount failed: %v", err)) 159 } 160 vfs.anonMount = anonMount 161 162 return nil 163 } 164 165 // Release drops references on filesystem objects held by vfs. 166 // 167 // Precondition: This must be called after VFS.Init() has succeeded. 168 func (vfs *VirtualFilesystem) Release(ctx context.Context) { 169 vfs.anonMount.DecRef(ctx) 170 for _, fst := range vfs.fsTypes { 171 fst.fsType.Release(ctx) 172 } 173 } 174 175 // PathOperation specifies the path operated on by a VFS method. 176 // 177 // PathOperation is passed to VFS methods by pointer to reduce memory copying: 178 // it's somewhat large and should never escape. (Options structs are passed by 179 // pointer to VFS and FileDescription methods for the same reason.) 180 // 181 // +stateify savable 182 type PathOperation struct { 183 // Root is the VFS root. References on Root are borrowed from the provider 184 // of the PathOperation. 185 // 186 // Invariants: Root.Ok(). 187 Root VirtualDentry 188 189 // Start is the starting point for the path traversal. References on Start 190 // are borrowed from the provider of the PathOperation (i.e. the caller of 191 // the VFS method to which the PathOperation was passed). 192 // 193 // Invariants: Start.Ok(). If Path.Absolute, then Start == Root. 194 Start VirtualDentry 195 196 // Path is the pathname traversed by this operation. 197 Path fspath.Path 198 199 // If FollowFinalSymlink is true, and the Dentry traversed by the final 200 // path component represents a symbolic link, the symbolic link should be 201 // followed. 202 FollowFinalSymlink bool 203 } 204 205 // AccessAt checks whether a user with creds has access to the file at 206 // the given path. 207 func (vfs *VirtualFilesystem) AccessAt(ctx context.Context, creds *auth.Credentials, ats AccessTypes, pop *PathOperation) error { 208 rp := vfs.getResolvingPath(creds, pop) 209 for { 210 err := rp.mount.fs.impl.AccessAt(ctx, rp, creds, ats) 211 if err == nil { 212 rp.Release(ctx) 213 return nil 214 } 215 if !rp.handleError(ctx, err) { 216 rp.Release(ctx) 217 return err 218 } 219 } 220 } 221 222 // GetDentryAt returns a VirtualDentry representing the given path, at which a 223 // file must exist. A reference is taken on the returned VirtualDentry. 224 func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetDentryOptions) (VirtualDentry, error) { 225 rp := vfs.getResolvingPath(creds, pop) 226 for { 227 d, err := rp.mount.fs.impl.GetDentryAt(ctx, rp, *opts) 228 if err == nil { 229 vd := VirtualDentry{ 230 mount: rp.mount, 231 dentry: d, 232 } 233 rp.mount.IncRef() 234 rp.Release(ctx) 235 return vd, nil 236 } 237 if !rp.handleError(ctx, err) { 238 rp.Release(ctx) 239 return VirtualDentry{}, err 240 } 241 } 242 } 243 244 // Preconditions: pop.Path.Begin.Ok(). 245 func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (VirtualDentry, string, error) { 246 rp := vfs.getResolvingPath(creds, pop) 247 for { 248 parent, err := rp.mount.fs.impl.GetParentDentryAt(ctx, rp) 249 if err == nil { 250 parentVD := VirtualDentry{ 251 mount: rp.mount, 252 dentry: parent, 253 } 254 rp.mount.IncRef() 255 name := rp.Component() 256 rp.Release(ctx) 257 return parentVD, name, nil 258 } 259 if checkInvariants { 260 if rp.canHandleError(err) && rp.Done() { 261 panic(fmt.Sprintf("%T.GetParentDentryAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 262 } 263 } 264 if !rp.handleError(ctx, err) { 265 rp.Release(ctx) 266 return VirtualDentry{}, "", err 267 } 268 } 269 } 270 271 // LinkAt creates a hard link at newpop representing the existing file at 272 // oldpop. 273 func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation) error { 274 oldVD, err := vfs.GetDentryAt(ctx, creds, oldpop, &GetDentryOptions{}) 275 if err != nil { 276 return err 277 } 278 279 if !newpop.Path.Begin.Ok() { 280 oldVD.DecRef(ctx) 281 if newpop.Path.Absolute { 282 return syserror.EEXIST 283 } 284 return syserror.ENOENT 285 } 286 if newpop.FollowFinalSymlink { 287 oldVD.DecRef(ctx) 288 ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink") 289 return linuxerr.EINVAL 290 } 291 292 rp := vfs.getResolvingPath(creds, newpop) 293 for { 294 err := rp.mount.fs.impl.LinkAt(ctx, rp, oldVD) 295 if err == nil { 296 rp.Release(ctx) 297 oldVD.DecRef(ctx) 298 return nil 299 } 300 if checkInvariants { 301 if rp.canHandleError(err) && rp.Done() { 302 panic(fmt.Sprintf("%T.LinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 303 } 304 } 305 if !rp.handleError(ctx, err) { 306 rp.Release(ctx) 307 oldVD.DecRef(ctx) 308 return err 309 } 310 } 311 } 312 313 // MkdirAt creates a directory at the given path. 314 func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error { 315 if !pop.Path.Begin.Ok() { 316 // pop.Path should not be empty in operations that create/delete files. 317 // This is consistent with mkdirat(dirfd, "", mode). 318 if pop.Path.Absolute { 319 return syserror.EEXIST 320 } 321 return syserror.ENOENT 322 } 323 if pop.FollowFinalSymlink { 324 ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink") 325 return linuxerr.EINVAL 326 } 327 // "Under Linux, apart from the permission bits, the S_ISVTX mode bit is 328 // also honored." - mkdir(2) 329 opts.Mode &= 0777 | linux.S_ISVTX 330 331 rp := vfs.getResolvingPath(creds, pop) 332 for { 333 err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts) 334 if err == nil { 335 rp.Release(ctx) 336 return nil 337 } 338 if checkInvariants { 339 if rp.canHandleError(err) && rp.Done() { 340 panic(fmt.Sprintf("%T.MkdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 341 } 342 } 343 if !rp.handleError(ctx, err) { 344 rp.Release(ctx) 345 return err 346 } 347 } 348 } 349 350 // MknodAt creates a file of the given mode at the given path. It returns an 351 // error from the syserror package. 352 func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error { 353 if !pop.Path.Begin.Ok() { 354 // pop.Path should not be empty in operations that create/delete files. 355 // This is consistent with mknodat(dirfd, "", mode, dev). 356 if pop.Path.Absolute { 357 return syserror.EEXIST 358 } 359 return syserror.ENOENT 360 } 361 if pop.FollowFinalSymlink { 362 ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink") 363 return linuxerr.EINVAL 364 } 365 366 rp := vfs.getResolvingPath(creds, pop) 367 for { 368 err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts) 369 if err == nil { 370 rp.Release(ctx) 371 return nil 372 } 373 if checkInvariants { 374 if rp.canHandleError(err) && rp.Done() { 375 panic(fmt.Sprintf("%T.MknodAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 376 } 377 } 378 if !rp.handleError(ctx, err) { 379 rp.Release(ctx) 380 return err 381 } 382 } 383 } 384 385 // OpenAt returns a FileDescription providing access to the file at the given 386 // path. A reference is taken on the returned FileDescription. 387 func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *OpenOptions) (*FileDescription, error) { 388 fsmetric.Opens.Increment() 389 390 // Remove: 391 // 392 // - O_CLOEXEC, which affects file descriptors and therefore must be 393 // handled outside of VFS. 394 // 395 // - Unknown flags. 396 opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_APPEND | linux.O_NONBLOCK | linux.O_DSYNC | linux.O_ASYNC | linux.O_DIRECT | linux.O_LARGEFILE | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NOATIME | linux.O_SYNC | linux.O_PATH | linux.O_TMPFILE 397 // Linux's __O_SYNC (which we call linux.O_SYNC) implies O_DSYNC. 398 if opts.Flags&linux.O_SYNC != 0 { 399 opts.Flags |= linux.O_DSYNC 400 } 401 // Linux's __O_TMPFILE (which we call linux.O_TMPFILE) must be specified 402 // with O_DIRECTORY and a writable access mode (to ensure that it fails on 403 // filesystem implementations that do not support it). 404 if opts.Flags&linux.O_TMPFILE != 0 { 405 if opts.Flags&linux.O_DIRECTORY == 0 { 406 return nil, linuxerr.EINVAL 407 } 408 if opts.Flags&linux.O_CREAT != 0 { 409 return nil, linuxerr.EINVAL 410 } 411 if opts.Flags&linux.O_ACCMODE == linux.O_RDONLY { 412 return nil, linuxerr.EINVAL 413 } 414 } 415 // O_PATH causes most other flags to be ignored. 416 if opts.Flags&linux.O_PATH != 0 { 417 opts.Flags &= linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_PATH 418 } 419 // "On Linux, the following bits are also honored in mode: [S_ISUID, 420 // S_ISGID, S_ISVTX]" - open(2) 421 opts.Mode &= 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX 422 423 if opts.Flags&linux.O_NOFOLLOW != 0 { 424 pop.FollowFinalSymlink = false 425 } 426 rp := vfs.getResolvingPath(creds, pop) 427 if opts.Flags&linux.O_DIRECTORY != 0 { 428 rp.mustBeDir = true 429 } 430 // Ignore O_PATH for verity, as verity performs extra operations on the fd for verification. 431 // The underlying filesystem that verity wraps opens the fd with O_PATH. 432 if opts.Flags&linux.O_PATH != 0 && rp.mount.fs.FilesystemType().Name() != "verity" { 433 vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) 434 if err != nil { 435 return nil, err 436 } 437 fd := &opathFD{} 438 if err := fd.vfsfd.Init(fd, opts.Flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}); err != nil { 439 return nil, err 440 } 441 vd.DecRef(ctx) 442 return &fd.vfsfd, err 443 } 444 for { 445 fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts) 446 if err == nil { 447 rp.Release(ctx) 448 449 if opts.FileExec { 450 if fd.Mount().Flags.NoExec { 451 fd.DecRef(ctx) 452 return nil, linuxerr.EACCES 453 } 454 455 // Only a regular file can be executed. 456 stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_TYPE}) 457 if err != nil { 458 fd.DecRef(ctx) 459 return nil, err 460 } 461 if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.S_IFMT != linux.S_IFREG { 462 fd.DecRef(ctx) 463 return nil, linuxerr.EACCES 464 } 465 } 466 467 fd.Dentry().InotifyWithParent(ctx, linux.IN_OPEN, 0, PathEvent) 468 return fd, nil 469 } 470 if !rp.handleError(ctx, err) { 471 rp.Release(ctx) 472 return nil, err 473 } 474 } 475 } 476 477 // ReadlinkAt returns the target of the symbolic link at the given path. 478 func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (string, error) { 479 rp := vfs.getResolvingPath(creds, pop) 480 for { 481 target, err := rp.mount.fs.impl.ReadlinkAt(ctx, rp) 482 if err == nil { 483 rp.Release(ctx) 484 return target, nil 485 } 486 if !rp.handleError(ctx, err) { 487 rp.Release(ctx) 488 return "", err 489 } 490 } 491 } 492 493 // RenameAt renames the file at oldpop to newpop. 494 func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation, opts *RenameOptions) error { 495 if !oldpop.Path.Begin.Ok() { 496 if oldpop.Path.Absolute { 497 return linuxerr.EBUSY 498 } 499 return syserror.ENOENT 500 } 501 if oldpop.FollowFinalSymlink { 502 ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink") 503 return linuxerr.EINVAL 504 } 505 506 oldParentVD, oldName, err := vfs.getParentDirAndName(ctx, creds, oldpop) 507 if err != nil { 508 return err 509 } 510 if oldName == "." || oldName == ".." { 511 oldParentVD.DecRef(ctx) 512 return linuxerr.EBUSY 513 } 514 515 if !newpop.Path.Begin.Ok() { 516 oldParentVD.DecRef(ctx) 517 if newpop.Path.Absolute { 518 return linuxerr.EBUSY 519 } 520 return syserror.ENOENT 521 } 522 if newpop.FollowFinalSymlink { 523 oldParentVD.DecRef(ctx) 524 ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink") 525 return linuxerr.EINVAL 526 } 527 528 rp := vfs.getResolvingPath(creds, newpop) 529 renameOpts := *opts 530 if oldpop.Path.Dir { 531 renameOpts.MustBeDir = true 532 } 533 for { 534 err := rp.mount.fs.impl.RenameAt(ctx, rp, oldParentVD, oldName, renameOpts) 535 if err == nil { 536 rp.Release(ctx) 537 oldParentVD.DecRef(ctx) 538 return nil 539 } 540 if checkInvariants { 541 if rp.canHandleError(err) && rp.Done() { 542 panic(fmt.Sprintf("%T.RenameAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 543 } 544 } 545 if !rp.handleError(ctx, err) { 546 rp.Release(ctx) 547 oldParentVD.DecRef(ctx) 548 return err 549 } 550 } 551 } 552 553 // RmdirAt removes the directory at the given path. 554 func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error { 555 if !pop.Path.Begin.Ok() { 556 // pop.Path should not be empty in operations that create/delete files. 557 // This is consistent with unlinkat(dirfd, "", AT_REMOVEDIR). 558 if pop.Path.Absolute { 559 return linuxerr.EBUSY 560 } 561 return syserror.ENOENT 562 } 563 if pop.FollowFinalSymlink { 564 ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink") 565 return linuxerr.EINVAL 566 } 567 568 rp := vfs.getResolvingPath(creds, pop) 569 for { 570 err := rp.mount.fs.impl.RmdirAt(ctx, rp) 571 if err == nil { 572 rp.Release(ctx) 573 return nil 574 } 575 if checkInvariants { 576 if rp.canHandleError(err) && rp.Done() { 577 panic(fmt.Sprintf("%T.RmdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 578 } 579 } 580 if !rp.handleError(ctx, err) { 581 rp.Release(ctx) 582 return err 583 } 584 } 585 } 586 587 // SetStatAt changes metadata for the file at the given path. 588 func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetStatOptions) error { 589 rp := vfs.getResolvingPath(creds, pop) 590 for { 591 err := rp.mount.fs.impl.SetStatAt(ctx, rp, *opts) 592 if err == nil { 593 rp.Release(ctx) 594 return nil 595 } 596 if !rp.handleError(ctx, err) { 597 rp.Release(ctx) 598 return err 599 } 600 } 601 } 602 603 // StatAt returns metadata for the file at the given path. 604 func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *StatOptions) (linux.Statx, error) { 605 rp := vfs.getResolvingPath(creds, pop) 606 for { 607 stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts) 608 if err == nil { 609 rp.Release(ctx) 610 return stat, nil 611 } 612 if !rp.handleError(ctx, err) { 613 rp.Release(ctx) 614 return linux.Statx{}, err 615 } 616 } 617 } 618 619 // StatFSAt returns metadata for the filesystem containing the file at the 620 // given path. 621 func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (linux.Statfs, error) { 622 rp := vfs.getResolvingPath(creds, pop) 623 for { 624 statfs, err := rp.mount.fs.impl.StatFSAt(ctx, rp) 625 if err == nil { 626 rp.Release(ctx) 627 return statfs, nil 628 } 629 if !rp.handleError(ctx, err) { 630 rp.Release(ctx) 631 return linux.Statfs{}, err 632 } 633 } 634 } 635 636 // SymlinkAt creates a symbolic link at the given path with the given target. 637 func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, target string) error { 638 if !pop.Path.Begin.Ok() { 639 // pop.Path should not be empty in operations that create/delete files. 640 // This is consistent with symlinkat(oldpath, newdirfd, ""). 641 if pop.Path.Absolute { 642 return syserror.EEXIST 643 } 644 return syserror.ENOENT 645 } 646 if pop.FollowFinalSymlink { 647 ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink") 648 return linuxerr.EINVAL 649 } 650 651 rp := vfs.getResolvingPath(creds, pop) 652 for { 653 err := rp.mount.fs.impl.SymlinkAt(ctx, rp, target) 654 if err == nil { 655 rp.Release(ctx) 656 return nil 657 } 658 if checkInvariants { 659 if rp.canHandleError(err) && rp.Done() { 660 panic(fmt.Sprintf("%T.SymlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 661 } 662 } 663 if !rp.handleError(ctx, err) { 664 rp.Release(ctx) 665 return err 666 } 667 } 668 } 669 670 // UnlinkAt deletes the non-directory file at the given path. 671 func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error { 672 if !pop.Path.Begin.Ok() { 673 // pop.Path should not be empty in operations that create/delete files. 674 // This is consistent with unlinkat(dirfd, "", 0). 675 if pop.Path.Absolute { 676 return linuxerr.EBUSY 677 } 678 return syserror.ENOENT 679 } 680 if pop.FollowFinalSymlink { 681 ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink") 682 return linuxerr.EINVAL 683 } 684 685 rp := vfs.getResolvingPath(creds, pop) 686 for { 687 err := rp.mount.fs.impl.UnlinkAt(ctx, rp) 688 if err == nil { 689 rp.Release(ctx) 690 return nil 691 } 692 if checkInvariants { 693 if rp.canHandleError(err) && rp.Done() { 694 panic(fmt.Sprintf("%T.UnlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 695 } 696 } 697 if !rp.handleError(ctx, err) { 698 rp.Release(ctx) 699 return err 700 } 701 } 702 } 703 704 // BoundEndpointAt gets the bound endpoint at the given path, if one exists. 705 func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) { 706 rp := vfs.getResolvingPath(creds, pop) 707 for { 708 bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts) 709 if err == nil { 710 rp.Release(ctx) 711 return bep, nil 712 } 713 if checkInvariants { 714 if rp.canHandleError(err) && rp.Done() { 715 panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) 716 } 717 } 718 if !rp.handleError(ctx, err) { 719 rp.Release(ctx) 720 return nil, err 721 } 722 } 723 } 724 725 // ListXattrAt returns all extended attribute names for the file at the given 726 // path. 727 func (vfs *VirtualFilesystem) ListXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) { 728 rp := vfs.getResolvingPath(creds, pop) 729 for { 730 names, err := rp.mount.fs.impl.ListXattrAt(ctx, rp, size) 731 if err == nil { 732 rp.Release(ctx) 733 return names, nil 734 } 735 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 736 // Linux doesn't actually return EOPNOTSUPP in this case; instead, 737 // fs/xattr.c:vfs_listxattr() falls back to allowing the security 738 // subsystem to return security extended attributes, which by 739 // default don't exist. 740 rp.Release(ctx) 741 return nil, nil 742 } 743 if !rp.handleError(ctx, err) { 744 rp.Release(ctx) 745 return nil, err 746 } 747 } 748 } 749 750 // GetXattrAt returns the value associated with the given extended attribute 751 // for the file at the given path. 752 func (vfs *VirtualFilesystem) GetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetXattrOptions) (string, error) { 753 rp := vfs.getResolvingPath(creds, pop) 754 for { 755 val, err := rp.mount.fs.impl.GetXattrAt(ctx, rp, *opts) 756 if err == nil { 757 rp.Release(ctx) 758 return val, nil 759 } 760 if !rp.handleError(ctx, err) { 761 rp.Release(ctx) 762 return "", err 763 } 764 } 765 } 766 767 // SetXattrAt changes the value associated with the given extended attribute 768 // for the file at the given path. 769 func (vfs *VirtualFilesystem) SetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetXattrOptions) error { 770 rp := vfs.getResolvingPath(creds, pop) 771 for { 772 err := rp.mount.fs.impl.SetXattrAt(ctx, rp, *opts) 773 if err == nil { 774 rp.Release(ctx) 775 return nil 776 } 777 if !rp.handleError(ctx, err) { 778 rp.Release(ctx) 779 return err 780 } 781 } 782 } 783 784 // RemoveXattrAt removes the given extended attribute from the file at rp. 785 func (vfs *VirtualFilesystem) RemoveXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error { 786 rp := vfs.getResolvingPath(creds, pop) 787 for { 788 err := rp.mount.fs.impl.RemoveXattrAt(ctx, rp, name) 789 if err == nil { 790 rp.Release(ctx) 791 return nil 792 } 793 if !rp.handleError(ctx, err) { 794 rp.Release(ctx) 795 return err 796 } 797 } 798 } 799 800 // SyncAllFilesystems has the semantics of Linux's sync(2). 801 func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error { 802 var retErr error 803 for fs := range vfs.getFilesystems() { 804 if err := fs.impl.Sync(ctx); err != nil && retErr == nil { 805 retErr = err 806 } 807 fs.DecRef(ctx) 808 } 809 return retErr 810 } 811 812 func (vfs *VirtualFilesystem) getFilesystems() map[*Filesystem]struct{} { 813 fss := make(map[*Filesystem]struct{}) 814 vfs.filesystemsMu.Lock() 815 defer vfs.filesystemsMu.Unlock() 816 for fs := range vfs.filesystems { 817 if !fs.TryIncRef() { 818 continue 819 } 820 fss[fs] = struct{}{} 821 } 822 return fss 823 } 824 825 // MkdirAllAt recursively creates non-existent directories on the given path 826 // (including the last component). 827 func (vfs *VirtualFilesystem) MkdirAllAt(ctx context.Context, currentPath string, root VirtualDentry, creds *auth.Credentials, mkdirOpts *MkdirOptions) error { 828 pop := &PathOperation{ 829 Root: root, 830 Start: root, 831 Path: fspath.Parse(currentPath), 832 } 833 stat, err := vfs.StatAt(ctx, creds, pop, &StatOptions{Mask: linux.STATX_TYPE}) 834 switch { 835 case err == nil: 836 if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.FileTypeMask != linux.ModeDirectory { 837 return syserror.ENOTDIR 838 } 839 // Directory already exists. 840 return nil 841 case linuxerr.Equals(linuxerr.ENOENT, err): 842 // Expected, we will create the dir. 843 default: 844 return fmt.Errorf("stat failed for %q during directory creation: %w", currentPath, err) 845 } 846 847 // Recurse to ensure parent is created and then create the final directory. 848 if err := vfs.MkdirAllAt(ctx, path.Dir(currentPath), root, creds, mkdirOpts); err != nil { 849 return err 850 } 851 if err := vfs.MkdirAt(ctx, creds, pop, mkdirOpts); err != nil { 852 return fmt.Errorf("failed to create directory %q: %w", currentPath, err) 853 } 854 return nil 855 } 856 857 // MakeSyntheticMountpoint creates parent directories of target if they do not 858 // exist and attempts to create a directory for the mountpoint. If a 859 // non-directory file already exists there then we allow it. 860 func (vfs *VirtualFilesystem) MakeSyntheticMountpoint(ctx context.Context, target string, root VirtualDentry, creds *auth.Credentials) error { 861 mkdirOpts := &MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true} 862 863 // Make sure the parent directory of target exists. 864 if err := vfs.MkdirAllAt(ctx, path.Dir(target), root, creds, mkdirOpts); err != nil { 865 return fmt.Errorf("failed to create parent directory of mountpoint %q: %w", target, err) 866 } 867 868 // Attempt to mkdir the final component. If a file (of any type) exists 869 // then we let allow mounting on top of that because we do not require the 870 // target to be an existing directory, unlike Linux mount(2). 871 if err := vfs.MkdirAt(ctx, creds, &PathOperation{ 872 Root: root, 873 Start: root, 874 Path: fspath.Parse(target), 875 }, mkdirOpts); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 876 return fmt.Errorf("failed to create mountpoint %q: %w", target, err) 877 } 878 return nil 879 } 880 881 // A VirtualDentry represents a node in a VFS tree, by combining a Dentry 882 // (which represents a node in a Filesystem's tree) and a Mount (which 883 // represents the Filesystem's position in a VFS mount tree). 884 // 885 // VirtualDentry's semantics are similar to that of a Go interface object 886 // representing a pointer: it is a copyable value type that represents 887 // references to another entity. The zero value of VirtualDentry is an "empty 888 // VirtualDentry", directly analogous to a nil interface object. 889 // VirtualDentry.Ok() checks that a VirtualDentry is not zero-valued; unless 890 // otherwise specified, all other VirtualDentry methods require 891 // VirtualDentry.Ok() == true. 892 // 893 // Mounts and Dentries are reference-counted, requiring that users call 894 // VirtualDentry.{Inc,Dec}Ref() as appropriate. We often colloquially refer to 895 // references on the Mount and Dentry referred to by a VirtualDentry as 896 // references on the VirtualDentry itself. Unless otherwise specified, all 897 // VirtualDentry methods require that a reference is held on the VirtualDentry. 898 // 899 // VirtualDentry is analogous to Linux's struct path. 900 // 901 // +stateify savable 902 type VirtualDentry struct { 903 mount *Mount 904 dentry *Dentry 905 } 906 907 // MakeVirtualDentry creates a VirtualDentry. 908 func MakeVirtualDentry(mount *Mount, dentry *Dentry) VirtualDentry { 909 return VirtualDentry{ 910 mount: mount, 911 dentry: dentry, 912 } 913 } 914 915 // Ok returns true if vd is not empty. It does not require that a reference is 916 // held. 917 func (vd VirtualDentry) Ok() bool { 918 return vd.mount != nil 919 } 920 921 // IncRef increments the reference counts on the Mount and Dentry represented 922 // by vd. 923 func (vd VirtualDentry) IncRef() { 924 vd.mount.IncRef() 925 vd.dentry.IncRef() 926 } 927 928 // DecRef decrements the reference counts on the Mount and Dentry represented 929 // by vd. 930 func (vd VirtualDentry) DecRef(ctx context.Context) { 931 vd.dentry.DecRef(ctx) 932 vd.mount.DecRef(ctx) 933 } 934 935 // Mount returns the Mount associated with vd. It does not take a reference on 936 // the returned Mount. 937 func (vd VirtualDentry) Mount() *Mount { 938 return vd.mount 939 } 940 941 // Dentry returns the Dentry associated with vd. It does not take a reference 942 // on the returned Dentry. 943 func (vd VirtualDentry) Dentry() *Dentry { 944 return vd.dentry 945 }