github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/filesystem.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ext 16 17 import ( 18 "errors" 19 "io" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/fspath" 25 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 29 "github.com/SagerNet/gvisor/pkg/sync" 30 "github.com/SagerNet/gvisor/pkg/syserror" 31 ) 32 33 var ( 34 // errResolveDirent indicates that the vfs.ResolvingPath.Component() does 35 // not exist on the dentry tree but does exist on disk. So it has to be read in 36 // using the in-memory dirent and added to the dentry tree. Usually indicates 37 // the need to lock filesystem.mu for writing. 38 errResolveDirent = errors.New("resolve path component using dirent") 39 ) 40 41 // filesystem implements vfs.FilesystemImpl. 42 // 43 // +stateify savable 44 type filesystem struct { 45 vfsfs vfs.Filesystem 46 47 // mu serializes changes to the Dentry tree. 48 mu sync.RWMutex `state:"nosave"` 49 50 // dev represents the underlying fs device. It does not require protection 51 // because io.ReaderAt permits concurrent read calls to it. It translates to 52 // the pread syscall which passes on the read request directly to the device 53 // driver. Device drivers are intelligent in serving multiple concurrent read 54 // requests in the optimal order (taking locality into consideration). 55 dev io.ReaderAt 56 57 // inodeCache maps absolute inode numbers to the corresponding Inode struct. 58 // Inodes should be removed from this once their reference count hits 0. 59 // 60 // Protected by mu because most additions (see IterDirents) and all removals 61 // from this corresponds to a change in the dentry tree. 62 inodeCache map[uint32]*inode 63 64 // sb represents the filesystem superblock. Immutable after initialization. 65 sb disklayout.SuperBlock 66 67 // bgs represents all the block group descriptors for the filesystem. 68 // Immutable after initialization. 69 bgs []disklayout.BlockGroup 70 71 // devMinor is this filesystem's device minor number. Immutable after 72 // initialization. 73 devMinor uint32 74 } 75 76 // Compiles only if filesystem implements vfs.FilesystemImpl. 77 var _ vfs.FilesystemImpl = (*filesystem)(nil) 78 79 // stepLocked resolves rp.Component() in parent directory vfsd. The write 80 // parameter passed tells if the caller has acquired filesystem.mu for writing 81 // or not. If set to true, an existing inode on disk can be added to the dentry 82 // tree if not present already. 83 // 84 // stepLocked is loosely analogous to fs/namei.c:walk_component(). 85 // 86 // Preconditions: 87 // * filesystem.mu must be locked (for writing if write param is true). 88 // * !rp.Done(). 89 // * inode == vfsd.Impl().(*Dentry).inode. 90 func stepLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode, write bool) (*vfs.Dentry, *inode, error) { 91 if !inode.isDir() { 92 return nil, nil, syserror.ENOTDIR 93 } 94 if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 95 return nil, nil, err 96 } 97 98 for { 99 name := rp.Component() 100 if name == "." { 101 rp.Advance() 102 return vfsd, inode, nil 103 } 104 d := vfsd.Impl().(*dentry) 105 if name == ".." { 106 isRoot, err := rp.CheckRoot(ctx, vfsd) 107 if err != nil { 108 return nil, nil, err 109 } 110 if isRoot || d.parent == nil { 111 rp.Advance() 112 return vfsd, inode, nil 113 } 114 if err := rp.CheckMount(ctx, &d.parent.vfsd); err != nil { 115 return nil, nil, err 116 } 117 rp.Advance() 118 return &d.parent.vfsd, d.parent.inode, nil 119 } 120 121 dir := inode.impl.(*directory) 122 child, ok := dir.childCache[name] 123 if !ok { 124 // We may need to instantiate a new dentry for this child. 125 childDirent, ok := dir.childMap[name] 126 if !ok { 127 // The underlying inode does not exist on disk. 128 return nil, nil, syserror.ENOENT 129 } 130 131 if !write { 132 // filesystem.mu must be held for writing to add to the dentry tree. 133 return nil, nil, errResolveDirent 134 } 135 136 // Create and add the component's dirent to the dentry tree. 137 fs := rp.Mount().Filesystem().Impl().(*filesystem) 138 childInode, err := fs.getOrCreateInodeLocked(childDirent.diskDirent.Inode()) 139 if err != nil { 140 return nil, nil, err 141 } 142 // incRef because this is being added to the dentry tree. 143 childInode.incRef() 144 child = newDentry(childInode) 145 child.parent = d 146 child.name = name 147 dir.childCache[name] = child 148 } 149 if err := rp.CheckMount(ctx, &child.vfsd); err != nil { 150 return nil, nil, err 151 } 152 if child.inode.isSymlink() && rp.ShouldFollowSymlink() { 153 if err := rp.HandleSymlink(child.inode.impl.(*symlink).target); err != nil { 154 return nil, nil, err 155 } 156 continue 157 } 158 rp.Advance() 159 return &child.vfsd, child.inode, nil 160 } 161 } 162 163 // walkLocked resolves rp to an existing file. The write parameter 164 // passed tells if the caller has acquired filesystem.mu for writing or not. 165 // If set to true, additions can be made to the dentry tree while walking. 166 // If errResolveDirent is returned, the walk needs to be continued with an 167 // upgraded filesystem.mu. 168 // 169 // walkLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). 170 // 171 // Preconditions: 172 // * filesystem.mu must be locked (for writing if write param is true). 173 func walkLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { 174 vfsd := rp.Start() 175 inode := vfsd.Impl().(*dentry).inode 176 for !rp.Done() { 177 var err error 178 vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) 179 if err != nil { 180 return nil, nil, err 181 } 182 } 183 if rp.MustBeDir() && !inode.isDir() { 184 return nil, nil, syserror.ENOTDIR 185 } 186 return vfsd, inode, nil 187 } 188 189 // walkParentLocked resolves all but the last path component of rp to an 190 // existing directory. It does not check that the returned directory is 191 // searchable by the provider of rp. The write parameter passed tells if the 192 // caller has acquired filesystem.mu for writing or not. If set to true, 193 // additions can be made to the dentry tree while walking. 194 // If errResolveDirent is returned, the walk needs to be continued with an 195 // upgraded filesystem.mu. 196 // 197 // walkParentLocked is loosely analogous to Linux's fs/namei.c:path_parentat(). 198 // 199 // Preconditions: 200 // * filesystem.mu must be locked (for writing if write param is true). 201 // * !rp.Done(). 202 func walkParentLocked(ctx context.Context, rp *vfs.ResolvingPath, write bool) (*vfs.Dentry, *inode, error) { 203 vfsd := rp.Start() 204 inode := vfsd.Impl().(*dentry).inode 205 for !rp.Final() { 206 var err error 207 vfsd, inode, err = stepLocked(ctx, rp, vfsd, inode, write) 208 if err != nil { 209 return nil, nil, err 210 } 211 } 212 if !inode.isDir() { 213 return nil, nil, syserror.ENOTDIR 214 } 215 return vfsd, inode, nil 216 } 217 218 // walk resolves rp to an existing file. If parent is set to true, it resolves 219 // the rp till the parent of the last component which should be an existing 220 // directory. If parent is false then resolves rp entirely. Attemps to resolve 221 // the path as far as it can with a read lock and upgrades the lock if needed. 222 func (fs *filesystem) walk(ctx context.Context, rp *vfs.ResolvingPath, parent bool) (*vfs.Dentry, *inode, error) { 223 var ( 224 vfsd *vfs.Dentry 225 inode *inode 226 err error 227 ) 228 229 // Try walking with the hopes that all dentries have already been pulled out 230 // of disk. This reduces congestion (allows concurrent walks). 231 fs.mu.RLock() 232 if parent { 233 vfsd, inode, err = walkParentLocked(ctx, rp, false) 234 } else { 235 vfsd, inode, err = walkLocked(ctx, rp, false) 236 } 237 fs.mu.RUnlock() 238 239 if err == errResolveDirent { 240 // Upgrade lock and continue walking. Lock upgrading in the middle of the 241 // walk is fine as this is a read only filesystem. 242 fs.mu.Lock() 243 if parent { 244 vfsd, inode, err = walkParentLocked(ctx, rp, true) 245 } else { 246 vfsd, inode, err = walkLocked(ctx, rp, true) 247 } 248 fs.mu.Unlock() 249 } 250 251 return vfsd, inode, err 252 } 253 254 // getOrCreateInodeLocked gets the inode corresponding to the inode number passed in. 255 // It creates a new one with the given inode number if one does not exist. 256 // The caller must increment the ref count if adding this to the dentry tree. 257 // 258 // Precondition: must be holding fs.mu for writing. 259 func (fs *filesystem) getOrCreateInodeLocked(inodeNum uint32) (*inode, error) { 260 if in, ok := fs.inodeCache[inodeNum]; ok { 261 return in, nil 262 } 263 264 in, err := newInode(fs, inodeNum) 265 if err != nil { 266 return nil, err 267 } 268 269 fs.inodeCache[inodeNum] = in 270 return in, nil 271 } 272 273 // statTo writes the statfs fields to the output parameter. 274 func (fs *filesystem) statTo(stat *linux.Statfs) { 275 stat.Type = uint64(fs.sb.Magic()) 276 stat.BlockSize = int64(fs.sb.BlockSize()) 277 stat.Blocks = fs.sb.BlocksCount() 278 stat.BlocksFree = fs.sb.FreeBlocksCount() 279 stat.BlocksAvailable = fs.sb.FreeBlocksCount() 280 stat.Files = uint64(fs.sb.InodesCount()) 281 stat.FilesFree = uint64(fs.sb.FreeInodesCount()) 282 stat.NameLength = disklayout.MaxFileName 283 stat.FragmentSize = int64(fs.sb.BlockSize()) 284 // TODO(b/134676337): Set Statfs.Flags and Statfs.FSID. 285 } 286 287 // AccessAt implements vfs.Filesystem.Impl.AccessAt. 288 func (fs *filesystem) AccessAt(ctx context.Context, rp *vfs.ResolvingPath, creds *auth.Credentials, ats vfs.AccessTypes) error { 289 _, inode, err := fs.walk(ctx, rp, false) 290 if err != nil { 291 return err 292 } 293 return inode.checkPermissions(rp.Credentials(), ats) 294 } 295 296 // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. 297 func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { 298 vfsd, inode, err := fs.walk(ctx, rp, false) 299 if err != nil { 300 return nil, err 301 } 302 303 if opts.CheckSearchable { 304 if !inode.isDir() { 305 return nil, syserror.ENOTDIR 306 } 307 if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec); err != nil { 308 return nil, err 309 } 310 } 311 312 inode.incRef() 313 return vfsd, nil 314 } 315 316 // GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. 317 func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { 318 vfsd, inode, err := fs.walk(ctx, rp, true) 319 if err != nil { 320 return nil, err 321 } 322 inode.incRef() 323 return vfsd, nil 324 } 325 326 // OpenAt implements vfs.FilesystemImpl.OpenAt. 327 func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 328 vfsd, inode, err := fs.walk(ctx, rp, false) 329 if err != nil { 330 return nil, err 331 } 332 333 // EROFS is returned if write access is needed. 334 if vfs.MayWriteFileWithOpenFlags(opts.Flags) || opts.Flags&(linux.O_CREAT|linux.O_EXCL|linux.O_TMPFILE) != 0 { 335 return nil, linuxerr.EROFS 336 } 337 return inode.open(rp, vfsd, &opts) 338 } 339 340 // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. 341 func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { 342 _, inode, err := fs.walk(ctx, rp, false) 343 if err != nil { 344 return "", err 345 } 346 symlink, ok := inode.impl.(*symlink) 347 if !ok { 348 return "", linuxerr.EINVAL 349 } 350 return symlink.target, nil 351 } 352 353 // StatAt implements vfs.FilesystemImpl.StatAt. 354 func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { 355 _, inode, err := fs.walk(ctx, rp, false) 356 if err != nil { 357 return linux.Statx{}, err 358 } 359 var stat linux.Statx 360 inode.statTo(&stat) 361 return stat, nil 362 } 363 364 // StatFSAt implements vfs.FilesystemImpl.StatFSAt. 365 func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { 366 if _, _, err := fs.walk(ctx, rp, false); err != nil { 367 return linux.Statfs{}, err 368 } 369 370 var stat linux.Statfs 371 fs.statTo(&stat) 372 return stat, nil 373 } 374 375 // Release implements vfs.FilesystemImpl.Release. 376 func (fs *filesystem) Release(ctx context.Context) { 377 fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) 378 } 379 380 // Sync implements vfs.FilesystemImpl.Sync. 381 func (fs *filesystem) Sync(ctx context.Context) error { 382 // This is a readonly filesystem for now. 383 return nil 384 } 385 386 // The vfs.FilesystemImpl functions below return EROFS because their respective 387 // man pages say that EROFS must be returned if the path resolves to a file on 388 // this read-only filesystem. 389 390 // LinkAt implements vfs.FilesystemImpl.LinkAt. 391 func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { 392 if rp.Done() { 393 return syserror.EEXIST 394 } 395 396 if _, _, err := fs.walk(ctx, rp, true); err != nil { 397 return err 398 } 399 400 return linuxerr.EROFS 401 } 402 403 // MkdirAt implements vfs.FilesystemImpl.MkdirAt. 404 func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { 405 if rp.Done() { 406 return syserror.EEXIST 407 } 408 409 if _, _, err := fs.walk(ctx, rp, true); err != nil { 410 return err 411 } 412 413 return linuxerr.EROFS 414 } 415 416 // MknodAt implements vfs.FilesystemImpl.MknodAt. 417 func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { 418 if rp.Done() { 419 return syserror.EEXIST 420 } 421 422 _, _, err := fs.walk(ctx, rp, true) 423 if err != nil { 424 return err 425 } 426 427 return linuxerr.EROFS 428 } 429 430 // RenameAt implements vfs.FilesystemImpl.RenameAt. 431 func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { 432 if rp.Done() { 433 return syserror.ENOENT 434 } 435 436 _, _, err := fs.walk(ctx, rp, false) 437 if err != nil { 438 return err 439 } 440 441 return linuxerr.EROFS 442 } 443 444 // RmdirAt implements vfs.FilesystemImpl.RmdirAt. 445 func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { 446 _, inode, err := fs.walk(ctx, rp, false) 447 if err != nil { 448 return err 449 } 450 451 if !inode.isDir() { 452 return syserror.ENOTDIR 453 } 454 455 return linuxerr.EROFS 456 } 457 458 // SetStatAt implements vfs.FilesystemImpl.SetStatAt. 459 func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { 460 _, _, err := fs.walk(ctx, rp, false) 461 if err != nil { 462 return err 463 } 464 465 return linuxerr.EROFS 466 } 467 468 // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. 469 func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { 470 if rp.Done() { 471 return syserror.EEXIST 472 } 473 474 _, _, err := fs.walk(ctx, rp, true) 475 if err != nil { 476 return err 477 } 478 479 return linuxerr.EROFS 480 } 481 482 // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. 483 func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { 484 _, inode, err := fs.walk(ctx, rp, false) 485 if err != nil { 486 return err 487 } 488 489 if inode.isDir() { 490 return syserror.EISDIR 491 } 492 493 return linuxerr.EROFS 494 } 495 496 // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. 497 func (fs *filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { 498 _, inode, err := fs.walk(ctx, rp, false) 499 if err != nil { 500 return nil, err 501 } 502 if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { 503 return nil, err 504 } 505 506 // TODO(b/134676337): Support sockets. 507 return nil, linuxerr.ECONNREFUSED 508 } 509 510 // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. 511 func (fs *filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { 512 _, _, err := fs.walk(ctx, rp, false) 513 if err != nil { 514 return nil, err 515 } 516 return nil, linuxerr.ENOTSUP 517 } 518 519 // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. 520 func (fs *filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { 521 _, _, err := fs.walk(ctx, rp, false) 522 if err != nil { 523 return "", err 524 } 525 return "", linuxerr.ENOTSUP 526 } 527 528 // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. 529 func (fs *filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { 530 _, _, err := fs.walk(ctx, rp, false) 531 if err != nil { 532 return err 533 } 534 return linuxerr.ENOTSUP 535 } 536 537 // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. 538 func (fs *filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { 539 _, _, err := fs.walk(ctx, rp, false) 540 if err != nil { 541 return err 542 } 543 return linuxerr.ENOTSUP 544 } 545 546 // PrependPath implements vfs.FilesystemImpl.PrependPath. 547 func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { 548 fs.mu.RLock() 549 defer fs.mu.RUnlock() 550 return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) 551 } 552 553 // MountOptions implements vfs.FilesystemImpl.MountOptions. 554 func (fs *filesystem) MountOptions() string { 555 return "" 556 }