github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/fsimpl/kernfs/kernfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package kernfs provides the tools to implement inode-based filesystems. 16 // Kernfs has two main features: 17 // 18 // 1. The Inode interface, which maps VFS's path-based filesystem operations to 19 // specific filesystem nodes. Kernfs uses the Inode interface to provide a 20 // blanket implementation for the vfs.FilesystemImpl. Kernfs also serves as 21 // the synchronization mechanism for all filesystem operations by holding a 22 // filesystem-wide lock across all operations. 23 // 24 // 2. Various utility types which provide generic implementations for various 25 // parts of the Inode and vfs.FileDescription interfaces. Client filesystems 26 // based on kernfs can embed the appropriate set of these to avoid having to 27 // reimplement common filesystem operations. See inode_impl_util.go and 28 // fd_impl_util.go. 29 // 30 // Reference Model: 31 // 32 // Kernfs dentries represents named pointers to inodes. Kernfs is solely 33 // reponsible for maintaining and modifying its dentry tree; inode 34 // implementations can not access the tree. Dentries and inodes have 35 // independent lifetimes and reference counts. A child dentry unconditionally 36 // holds a reference on its parent directory's dentry. A dentry also holds a 37 // reference on the inode it points to (although that might not be the only 38 // reference on the inode). Due to this inodes can outlive the dentries that 39 // point to them. Multiple dentries can point to the same inode (for example, 40 // in the case of hardlinks). File descriptors hold a reference to the dentry 41 // they're opened on. 42 // 43 // Dentries are guaranteed to exist while holding Filesystem.mu for 44 // reading. Dropping dentries require holding Filesystem.mu for writing. To 45 // queue dentries for destruction from a read critical section, see 46 // Filesystem.deferDecRef. 47 // 48 // Lock ordering: 49 // 50 // kernfs.Filesystem.mu 51 // kernel.TaskSet.mu 52 // kernel.Task.mu 53 // kernfs.Dentry.dirMu 54 // vfs.VirtualFilesystem.mountMu 55 // vfs.Dentry.mu 56 // (inode implementation locks, if any) 57 // 58 // kernfs.Filesystem.deferredDecRefsMu 59 package kernfs 60 61 import ( 62 "fmt" 63 64 "github.com/ttpreport/gvisor-ligolo/pkg/abi/linux" 65 "github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops" 66 "github.com/ttpreport/gvisor-ligolo/pkg/context" 67 "github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr" 68 "github.com/ttpreport/gvisor-ligolo/pkg/fspath" 69 "github.com/ttpreport/gvisor-ligolo/pkg/refs" 70 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/kernel/auth" 71 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/vfs" 72 "github.com/ttpreport/gvisor-ligolo/pkg/sync" 73 ) 74 75 // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory 76 // filesystem. Concrete implementations are expected to embed this in their own 77 // Filesystem type. 78 // 79 // +stateify savable 80 type Filesystem struct { 81 vfsfs vfs.Filesystem 82 83 deferredDecRefsMu deferredDecRefsMutex `state:"nosave"` 84 85 // deferredDecRefs is a list of dentries waiting to be DecRef()ed. This is 86 // used to defer dentry destruction until mu can be acquired for 87 // writing. Protected by deferredDecRefsMu. 88 deferredDecRefs []refs.RefCounter 89 90 // mu synchronizes the lifetime of Dentries on this filesystem. Holding it 91 // for reading guarantees continued existence of any resolved dentries, but 92 // the dentry tree may be modified. 93 // 94 // Kernfs dentries can only be DecRef()ed while holding mu for writing. For 95 // example: 96 // 97 // fs.mu.Lock() 98 // defer fs.mu.Unlock() 99 // ... 100 // dentry1.DecRef() 101 // defer dentry2.DecRef() // Ok, will run before Unlock. 102 // 103 // If discarding dentries in a read context, use Filesystem.deferDecRef. For 104 // example: 105 // 106 // fs.mu.RLock() 107 // defer fs.processDeferredDecRefs() 108 // defer fs.mu.RUnlock() 109 // ... 110 // fs.deferDecRef(dentry) 111 mu filesystemRWMutex `state:"nosave"` 112 113 // nextInoMinusOne is used to to allocate inode numbers on this 114 // filesystem. Must be accessed by atomic operations. 115 nextInoMinusOne atomicbitops.Uint64 116 117 // cachedDentries contains all dentries with 0 references. (Due to race 118 // conditions, it may also contain dentries with non-zero references.) 119 // cachedDentriesLen is the number of dentries in cachedDentries. These 120 // fields are protected by mu. 121 cachedDentries dentryList 122 cachedDentriesLen uint64 123 124 // MaxCachedDentries is the maximum size of cachedDentries. If not set, 125 // defaults to 0 and kernfs does not cache any dentries. This is immutable. 126 MaxCachedDentries uint64 127 128 // root is the root dentry of this filesystem. Note that root may be nil for 129 // filesystems on a disconnected mount without a root (e.g. pipefs, sockfs, 130 // hostfs). Filesystem holds an extra reference on root to prevent it from 131 // being destroyed prematurely. This is immutable. 132 root *Dentry 133 } 134 135 // deferDecRef defers dropping a dentry ref until the next call to 136 // processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. 137 // This may be called while Filesystem.mu or Dentry.dirMu is locked. 138 func (fs *Filesystem) deferDecRef(d refs.RefCounter) { 139 fs.deferredDecRefsMu.Lock() 140 fs.deferredDecRefs = append(fs.deferredDecRefs, d) 141 fs.deferredDecRefsMu.Unlock() 142 } 143 144 // SafeDecRefFD safely DecRef the FileDescription making sure DecRef is deferred 145 // in case Filesystem.mu is held. See comment on Filesystem.mu. 146 func (fs *Filesystem) SafeDecRefFD(ctx context.Context, fd *vfs.FileDescription) { 147 if d, ok := fd.Dentry().Impl().(*Dentry); ok && d.fs == fs { 148 // Only defer if dentry belongs to this filesystem, since locks cannot cross 149 // filesystems. 150 fs.deferDecRef(fd) 151 return 152 } 153 fd.DecRef(ctx) 154 } 155 156 // SafeDecRef safely DecRef the virtual dentry making sure DecRef is deferred 157 // in case Filesystem.mu is held. See comment on Filesystem.mu. 158 func (fs *Filesystem) SafeDecRef(ctx context.Context, vd vfs.VirtualDentry) { 159 if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { 160 // Only defer if dentry belongs to this filesystem, since locks cannot cross 161 // filesystems. 162 fs.deferDecRef(&vd) 163 return 164 } 165 vd.DecRef(ctx) 166 } 167 168 // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the 169 // deferredDecRefs list. See comment on Filesystem.mu. 170 // 171 // Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked. 172 func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { 173 fs.deferredDecRefsMu.Lock() 174 for _, d := range fs.deferredDecRefs { 175 // Defer the DecRef call so that we are not holding deferredDecRefsMu 176 // when DecRef is called. 177 defer d.DecRef(ctx) 178 } 179 fs.deferredDecRefs = fs.deferredDecRefs[:0] // Keep slice memory for reuse. 180 fs.deferredDecRefsMu.Unlock() 181 } 182 183 // VFSFilesystem returns the generic vfs filesystem object. 184 func (fs *Filesystem) VFSFilesystem() *vfs.Filesystem { 185 return &fs.vfsfs 186 } 187 188 // NextIno allocates a new inode number on this filesystem. 189 func (fs *Filesystem) NextIno() uint64 { 190 return fs.nextInoMinusOne.Add(1) 191 } 192 193 // These consts are used in the Dentry.flags field. 194 const ( 195 // Dentry points to a directory inode. 196 dflagsIsDir = 1 << iota 197 198 // Dentry points to a symlink inode. 199 dflagsIsSymlink 200 ) 201 202 // Dentry implements vfs.DentryImpl. 203 // 204 // A kernfs dentry is similar to a dentry in a traditional filesystem: it's a 205 // named reference to an inode. A dentry generally lives as long as it's part of 206 // a mounted filesystem tree. Kernfs drops dentries once all references to them 207 // are dropped. Dentries hold a single reference to the inode they point 208 // to, and child dentries hold a reference on their parent. 209 // 210 // Must be initialized by Init prior to first use. 211 // 212 // +stateify savable 213 type Dentry struct { 214 vfsd vfs.Dentry 215 216 // refs is the reference count. When refs reaches 0, the dentry may be 217 // added to the cache or destroyed. If refs == -1, the dentry has already 218 // been destroyed. refs are allowed to go to 0 and increase again. refs is 219 // accessed using atomic memory operations. 220 refs atomicbitops.Int64 221 222 // fs is the owning filesystem. fs is immutable. 223 fs *Filesystem 224 225 // flags caches useful information about the dentry from the inode. See the 226 // dflags* consts above. 227 flags atomicbitops.Uint32 228 229 parent *Dentry 230 name string 231 232 // If cached is true, dentryEntry links dentry into 233 // Filesystem.cachedDentries. cached and dentryEntry are protected by 234 // Filesystem.mu. 235 cached bool 236 dentryEntry 237 238 // dirMu protects children and the names of child Dentries. 239 // 240 // Note that holding fs.mu for writing is not sufficient; 241 // revalidateChildLocked(), which is a very hot path, may modify children with 242 // fs.mu acquired for reading only. 243 dirMu sync.Mutex `state:"nosave"` 244 children map[string]*Dentry 245 246 inode Inode 247 248 // If deleted is non-zero, the file represented by this dentry has been 249 // deleted. deleted is accessed using atomic memory operations. 250 deleted atomicbitops.Uint32 251 } 252 253 // IncRef implements vfs.DentryImpl.IncRef. 254 func (d *Dentry) IncRef() { 255 // d.refs may be 0 if d.fs.mu is locked, which serializes against 256 // d.cacheLocked(). 257 r := d.refs.Add(1) 258 if d.LogRefs() { 259 refs.LogIncRef(d, r) 260 } 261 } 262 263 // TryIncRef implements vfs.DentryImpl.TryIncRef. 264 func (d *Dentry) TryIncRef() bool { 265 for { 266 r := d.refs.Load() 267 if r <= 0 { 268 return false 269 } 270 if d.refs.CompareAndSwap(r, r+1) { 271 if d.LogRefs() { 272 refs.LogTryIncRef(d, r+1) 273 } 274 return true 275 } 276 } 277 } 278 279 // DecRef implements vfs.DentryImpl.DecRef. 280 func (d *Dentry) DecRef(ctx context.Context) { 281 r := d.refs.Add(-1) 282 if d.LogRefs() { 283 refs.LogDecRef(d, r) 284 } 285 if r == 0 { 286 if d.inode.Anonymous() { 287 // Nothing to cache. Skip right to destroy. This avoids 288 // taking fs.mu in the DecRef() path for anonymous 289 // inodes. 290 d.destroy(ctx) 291 return 292 } 293 294 d.fs.mu.Lock() 295 defer d.fs.mu.Unlock() 296 d.cacheLocked(ctx) 297 } else if r < 0 { 298 panic("kernfs.Dentry.DecRef() called without holding a reference") 299 } 300 } 301 302 func (d *Dentry) decRefLocked(ctx context.Context) { 303 r := d.refs.Add(-1) 304 if d.LogRefs() { 305 refs.LogDecRef(d, r) 306 } 307 if r == 0 { 308 d.cacheLocked(ctx) 309 } else if r < 0 { 310 panic("kernfs.Dentry.DecRef() called without holding a reference") 311 } 312 } 313 314 // cacheLocked should be called after d's reference count becomes 0. The ref 315 // count check may happen before acquiring d.fs.mu so there might be a race 316 // condition where the ref count is increased again by the time the caller 317 // acquires d.fs.mu. This race is handled. 318 // Only reachable dentries are added to the cache. However, a dentry might 319 // become unreachable *while* it is in the cache due to invalidation. 320 // 321 // Preconditions: d.fs.mu must be locked for writing. 322 func (d *Dentry) cacheLocked(ctx context.Context) { 323 // Dentries with a non-zero reference count must be retained. (The only way 324 // to obtain a reference on a dentry with zero references is via path 325 // resolution, which requires d.fs.mu, so if d.refs is zero then it will 326 // remain zero while we hold d.fs.mu for writing.) 327 refs := d.refs.Load() 328 if refs == -1 { 329 // Dentry has already been destroyed. 330 return 331 } 332 if refs > 0 { 333 if d.cached { 334 d.fs.cachedDentries.Remove(d) 335 d.fs.cachedDentriesLen-- 336 d.cached = false 337 } 338 return 339 } 340 // If the dentry is deleted and invalidated or has no parent, then it is no 341 // longer reachable by path resolution and should be dropped immediately 342 // because it has zero references. 343 // Note that a dentry may not always have a parent; for example magic links 344 // as described in Inode.Getlink. 345 if isDead := d.VFSDentry().IsDead(); isDead || d.parent == nil { 346 if !isDead { 347 rcs := d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry()) 348 for _, rc := range rcs { 349 d.fs.deferDecRef(rc) 350 } 351 } 352 if d.cached { 353 d.fs.cachedDentries.Remove(d) 354 d.fs.cachedDentriesLen-- 355 d.cached = false 356 } 357 if d.isDeleted() { 358 d.inode.Watches().HandleDeletion(ctx) 359 } 360 d.destroy(ctx) 361 if d.parent != nil { 362 d.parent.decRefLocked(ctx) 363 } 364 return 365 } 366 if d.VFSDentry().IsEvictable() { 367 d.evictLocked(ctx) 368 return 369 } 370 // If d is already cached, just move it to the front of the LRU. 371 if d.cached { 372 d.fs.cachedDentries.Remove(d) 373 d.fs.cachedDentries.PushFront(d) 374 return 375 } 376 // Cache the dentry, then evict the least recently used cached dentry if 377 // the cache becomes over-full. 378 d.fs.cachedDentries.PushFront(d) 379 d.fs.cachedDentriesLen++ 380 d.cached = true 381 if d.fs.cachedDentriesLen <= d.fs.MaxCachedDentries { 382 return 383 } 384 d.fs.evictCachedDentryLocked(ctx) 385 // Whether or not victim was destroyed, we brought fs.cachedDentriesLen 386 // back down to fs.opts.maxCachedDentries, so we don't loop. 387 } 388 389 // Preconditions: 390 // - fs.mu must be locked for writing. 391 func (fs *Filesystem) evictCachedDentryLocked(ctx context.Context) { 392 // Evict the least recently used dentry because cache size is greater than 393 // max cache size (configured on mount). 394 fs.cachedDentries.Back().evictLocked(ctx) 395 } 396 397 // Preconditions: 398 // - d.fs.mu must be locked for writing. 399 func (d *Dentry) evictLocked(ctx context.Context) { 400 if d == nil { 401 return 402 } 403 if d.cached { 404 d.fs.cachedDentries.Remove(d) 405 d.fs.cachedDentriesLen-- 406 d.cached = false 407 } 408 // victim.refs may have become non-zero from an earlier path resolution 409 // after it was inserted into fs.cachedDentries. 410 if d.refs.Load() == 0 { 411 if !d.vfsd.IsDead() { 412 d.parent.dirMu.Lock() 413 // Note that victim can't be a mount point (in any mount 414 // namespace), since VFS holds references on mount points. 415 rcs := d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry()) 416 for _, rc := range rcs { 417 d.fs.deferDecRef(rc) 418 } 419 delete(d.parent.children, d.name) 420 d.parent.dirMu.Unlock() 421 } 422 d.destroy(ctx) 423 if d.parent != nil { 424 d.parent.decRefLocked(ctx) 425 } 426 } 427 } 428 429 // destroy destroys the dentry. 430 // 431 // Preconditions: 432 // - d.refs == 0. 433 // - d should have been removed from d.parent.children, i.e. d is not reachable 434 // by path traversal. 435 // - d.vfsd.IsDead() is true. 436 func (d *Dentry) destroy(ctx context.Context) { 437 switch refs := d.refs.Load(); refs { 438 case 0: 439 // Mark the dentry destroyed. 440 d.refs.Store(-1) 441 case -1: 442 panic("dentry.destroy() called on already destroyed dentry") 443 default: 444 panic("dentry.destroy() called with references on the dentry") 445 } 446 447 d.inode.DecRef(ctx) // IncRef from Init. 448 449 refs.Unregister(d) 450 } 451 452 // RefType implements refs.CheckedObject.Type. 453 func (d *Dentry) RefType() string { 454 return "kernfs.Dentry" 455 } 456 457 // LeakMessage implements refs.CheckedObject.LeakMessage. 458 func (d *Dentry) LeakMessage() string { 459 return fmt.Sprintf("[kernfs.Dentry %p] reference count of %d instead of -1", d, d.refs.Load()) 460 } 461 462 // LogRefs implements refs.CheckedObject.LogRefs. 463 // 464 // This should only be set to true for debugging purposes, as it can generate an 465 // extremely large amount of output and drastically degrade performance. 466 func (d *Dentry) LogRefs() bool { 467 return false 468 } 469 470 // InitRoot initializes this dentry as the root of the filesystem. 471 // 472 // Precondition: Caller must hold a reference on inode. 473 // 474 // Postcondition: Caller's reference on inode is transferred to the dentry. 475 func (d *Dentry) InitRoot(fs *Filesystem, inode Inode) { 476 d.Init(fs, inode) 477 fs.root = d 478 // Hold an extra reference on the root dentry. It is held by fs to prevent the 479 // root from being "cached" and subsequently evicted. 480 d.IncRef() 481 } 482 483 // Init initializes this dentry. 484 // 485 // Precondition: Caller must hold a reference on inode. 486 // 487 // Postcondition: Caller's reference on inode is transferred to the dentry. 488 func (d *Dentry) Init(fs *Filesystem, inode Inode) { 489 d.vfsd.Init(d) 490 d.fs = fs 491 d.inode = inode 492 d.refs.Store(1) 493 ftype := inode.Mode().FileType() 494 if ftype == linux.ModeDirectory { 495 d.flags = atomicbitops.FromUint32(d.flags.RacyLoad() | dflagsIsDir) 496 } 497 if ftype == linux.ModeSymlink { 498 d.flags = atomicbitops.FromUint32(d.flags.RacyLoad() | dflagsIsSymlink) 499 } 500 refs.Register(d) 501 } 502 503 // VFSDentry returns the generic vfs dentry for this kernfs dentry. 504 func (d *Dentry) VFSDentry() *vfs.Dentry { 505 return &d.vfsd 506 } 507 508 func (d *Dentry) isDeleted() bool { 509 return d.deleted.Load() != 0 510 } 511 512 func (d *Dentry) setDeleted() { 513 d.deleted.Store(1) 514 } 515 516 // isDir checks whether the dentry points to a directory inode. 517 func (d *Dentry) isDir() bool { 518 return d.flags.Load()&dflagsIsDir != 0 519 } 520 521 // isSymlink checks whether the dentry points to a symlink inode. 522 func (d *Dentry) isSymlink() bool { 523 return d.flags.Load()&dflagsIsSymlink != 0 524 } 525 526 // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. 527 func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) { 528 if d.isDir() { 529 events |= linux.IN_ISDIR 530 } 531 532 // Linux always notifies the parent first. 533 534 // Don't bother looking for a parent if the inode is anonymous. It 535 // won't have one. 536 if !d.inode.Anonymous() { 537 d.fs.mu.RLock() 538 if d.parent != nil { 539 d.parent.inode.Watches().Notify(ctx, d.name, events, cookie, et, d.isDeleted()) 540 } 541 d.fs.mu.RUnlock() 542 } 543 544 d.inode.Watches().Notify(ctx, "", events, cookie, et, d.isDeleted()) 545 } 546 547 // Watches implements vfs.DentryImpl.Watches. 548 func (d *Dentry) Watches() *vfs.Watches { 549 return d.inode.Watches() 550 } 551 552 // OnZeroWatches implements vfs.Dentry.OnZeroWatches. 553 func (d *Dentry) OnZeroWatches(context.Context) {} 554 555 // insertChild inserts child into the vfs dentry cache with the given name under 556 // this dentry. This does not update the directory inode, so calling this on its 557 // own isn't sufficient to insert a child into a directory. 558 // 559 // Preconditions: 560 // - d must represent a directory inode. 561 // - d.fs.mu must be locked for at least reading. 562 func (d *Dentry) insertChild(name string, child *Dentry) { 563 d.dirMu.Lock() 564 d.insertChildLocked(name, child) 565 d.dirMu.Unlock() 566 } 567 568 // insertChildLocked is equivalent to insertChild, with additional 569 // preconditions. 570 // 571 // Preconditions: 572 // - d must represent a directory inode. 573 // - d.dirMu must be locked. 574 // - d.fs.mu must be locked for at least reading. 575 func (d *Dentry) insertChildLocked(name string, child *Dentry) { 576 if !d.isDir() { 577 panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d)) 578 } 579 d.IncRef() // DecRef in child's Dentry.destroy. 580 child.parent = d 581 child.name = name 582 if d.children == nil { 583 d.children = make(map[string]*Dentry) 584 } 585 d.children[name] = child 586 } 587 588 // Inode returns the dentry's inode. 589 func (d *Dentry) Inode() Inode { 590 return d.inode 591 } 592 593 // FSLocalPath returns an absolute path to d, relative to the root of its 594 // filesystem. 595 func (d *Dentry) FSLocalPath() string { 596 var b fspath.Builder 597 _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) 598 b.PrependByte('/') 599 return b.String() 600 } 601 602 // WalkDentryTree traverses p in the dentry tree for this filesystem. Note that 603 // this only traverses the dentry tree and is not a general path traversal. No 604 // symlinks and dynamic children are resolved, and no permission checks are 605 // performed. The caller is responsible for ensuring the returned Dentry exists 606 // for an appropriate lifetime. 607 // 608 // p is interpreted starting at d, and may be absolute or relative (absolute vs 609 // relative paths both refer to the same target here, since p is absolute from 610 // d). p may contain "." and "..", but will not allow traversal above d (similar 611 // to ".." at the root dentry). 612 // 613 // This is useful for filesystem internals, where the filesystem may not be 614 // mounted yet. For a mounted filesystem, use GetDentryAt. 615 func (d *Dentry) WalkDentryTree(ctx context.Context, vfsObj *vfs.VirtualFilesystem, p fspath.Path) (*Dentry, error) { 616 d.fs.mu.RLock() 617 defer d.fs.processDeferredDecRefs(ctx) 618 defer d.fs.mu.RUnlock() 619 620 target := d 621 622 for pit := p.Begin; pit.Ok(); pit = pit.Next() { 623 pc := pit.String() 624 625 switch { 626 case target == nil: 627 return nil, linuxerr.ENOENT 628 case pc == ".": 629 // No-op, consume component and continue. 630 case pc == "..": 631 if target == d { 632 // Don't let .. traverse above the start point of the walk. 633 continue 634 } 635 target = target.parent 636 // Parent doesn't need revalidation since we revalidated it on the 637 // way to the child, and we're still holding fs.mu. 638 default: 639 var err error 640 641 d.dirMu.Lock() 642 target, err = d.fs.revalidateChildLocked(ctx, vfsObj, target, pc, target.children[pc]) 643 d.dirMu.Unlock() 644 645 if err != nil { 646 return nil, err 647 } 648 } 649 } 650 651 if target == nil { 652 return nil, linuxerr.ENOENT 653 } 654 655 target.IncRef() 656 return target, nil 657 } 658 659 // Parent returns the parent of this Dentry. This is not safe in general, the 660 // filesystem may concurrently move d elsewhere. The caller is responsible for 661 // ensuring the returned result remains valid while it is used. 662 func (d *Dentry) Parent() *Dentry { 663 return d.parent 664 } 665 666 // The Inode interface maps filesystem-level operations that operate on paths to 667 // equivalent operations on specific filesystem nodes. 668 // 669 // The interface methods are groups into logical categories as sub interfaces 670 // below. Generally, an implementation for each sub interface can be provided by 671 // embedding an appropriate type from inode_impl_utils.go. The sub interfaces 672 // are purely organizational. Methods declared directly in the main interface 673 // have no generic implementations, and should be explicitly provided by the 674 // client filesystem. 675 // 676 // Generally, implementations are not responsible for tasks that are common to 677 // all filesystems. These include: 678 // 679 // - Checking that dentries passed to methods are of the appropriate file type. 680 // - Checking permissions. 681 // 682 // Inode functions may be called holding filesystem wide locks and are not 683 // allowed to call vfs functions that may reenter, unless otherwise noted. 684 // 685 // Specific responsibilities of implementations are documented below. 686 type Inode interface { 687 // Methods related to reference counting. A generic implementation is 688 // provided by InodeNoopRefCount. These methods are generally called by the 689 // equivalent Dentry methods. 690 inodeRefs 691 692 // Methods related to node metadata. A generic implementation is provided by 693 // InodeAttrs. Note that a concrete filesystem using kernfs is responsible for 694 // managing link counts. 695 inodeMetadata 696 697 // Method for inodes that represent symlink. InodeNotSymlink provides a 698 // blanket implementation for all non-symlink inodes. 699 inodeSymlink 700 701 // Method for inodes that represent directories. InodeNotDirectory provides 702 // a blanket implementation for all non-directory inodes. 703 inodeDirectory 704 705 // Open creates a file description for the filesystem object represented by 706 // this inode. The returned file description should hold a reference on the 707 // dentry for its lifetime. 708 // 709 // Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing 710 // the inode on which Open() is being called. 711 Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) 712 713 // StatFS returns filesystem statistics for the client filesystem. This 714 // corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem 715 // doesn't support statfs(2), this should return ENOSYS. 716 StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) 717 718 // Keep indicates whether the dentry created after Inode.Lookup should be 719 // kept in the kernfs dentry tree. 720 Keep() bool 721 722 // Valid should return true if this inode is still valid, or needs to 723 // be resolved again by a call to Lookup. 724 Valid(ctx context.Context) bool 725 726 // Watches returns the set of inotify watches associated with this inode. 727 Watches() *vfs.Watches 728 729 // Anonymous indicates that the Inode is anonymous. It will never have 730 // a name or parent. 731 Anonymous() bool 732 } 733 734 type inodeRefs interface { 735 IncRef() 736 DecRef(ctx context.Context) 737 TryIncRef() bool 738 } 739 740 type inodeMetadata interface { 741 // CheckPermissions checks that creds may access this inode for the 742 // requested access type, per the the rules of 743 // fs/namei.c:generic_permission(). 744 CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error 745 746 // Mode returns the (struct stat)::st_mode value for this inode. This is 747 // separated from Stat for performance. 748 Mode() linux.FileMode 749 750 // UID returns the (struct stat)::st_uid value for this inode. This is 751 // separated from Stat for performance. 752 UID() auth.KUID 753 754 // GID returns the (struct stat)::st_gid value for this inode. This is 755 // separated from Stat for performance. 756 GID() auth.KGID 757 758 // Stat returns the metadata for this inode. This corresponds to 759 // vfs.FilesystemImpl.StatAt. 760 Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) 761 762 // SetStat updates the metadata for this inode. This corresponds to 763 // vfs.FilesystemImpl.SetStatAt. Implementations are responsible for checking 764 // if the operation can be performed (see vfs.CheckSetStat() for common 765 // checks). 766 SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error 767 } 768 769 // Precondition: All methods in this interface may only be called on directory 770 // inodes. 771 type inodeDirectory interface { 772 // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode 773 // that will be hashed into the dentry tree. 774 // 775 // These inode constructors are inode-level operations rather than 776 // filesystem-level operations to allow client filesystems to mix different 777 // implementations based on the new node's location in the 778 // filesystem. 779 780 // HasChildren returns true if the directory inode has any children. 781 HasChildren() bool 782 783 // NewFile creates a new regular file inode. 784 NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) 785 786 // NewDir creates a new directory inode. 787 NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) 788 789 // NewLink creates a new hardlink to a specified inode in this 790 // directory. Implementations should create a new kernfs Dentry pointing to 791 // target, and update target's link count. 792 NewLink(ctx context.Context, name string, target Inode) (Inode, error) 793 794 // NewSymlink creates a new symbolic link inode. 795 NewSymlink(ctx context.Context, name, target string) (Inode, error) 796 797 // NewNode creates a new filesystem node for a mknod syscall. 798 NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) 799 800 // Unlink removes a child dentry from this directory inode. 801 Unlink(ctx context.Context, name string, child Inode) error 802 803 // RmDir removes an empty child directory from this directory 804 // inode. Implementations must update the parent directory's link count, 805 // if required. Implementations are not responsible for checking that child 806 // is a directory, or checking for an empty directory. 807 RmDir(ctx context.Context, name string, child Inode) error 808 809 // Rename is called on the source directory containing an inode being 810 // renamed. child points to the resolved child in the source directory. 811 // dstDir is guaranteed to be a directory inode. 812 // 813 // On a successful call to Rename, the caller updates the dentry tree to 814 // reflect the name change. 815 // 816 // Precondition: Caller must serialize concurrent calls to Rename. 817 Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error 818 819 // Lookup should return an appropriate inode if name should resolve to a 820 // child of this directory inode. This gives the directory an opportunity 821 // on every lookup to resolve additional entries. This is only called when 822 // the inode is a directory. 823 // 824 // The child returned by Lookup will be hashed into the VFS dentry tree, 825 // at least for the duration of the current FS operation. 826 // 827 // Lookup must return the child with an extra reference whose ownership is 828 // transferred to the dentry that is created to point to that inode. If 829 // Inode.Keep returns false, that new dentry will be dropped at the end of 830 // the current filesystem operation (before returning back to the VFS 831 // layer) if no other ref is picked on that dentry. If Inode.Keep returns 832 // true, then the dentry will be cached into the dentry tree until it is 833 // Unlink'd or RmDir'd. 834 Lookup(ctx context.Context, name string) (Inode, error) 835 836 // IterDirents is used to iterate over dynamically created entries. It invokes 837 // cb on each entry in the directory represented by the Inode. 838 // 'offset' is the offset for the entire IterDirents call, which may include 839 // results from the caller (e.g. "." and ".."). 'relOffset' is the offset 840 // inside the entries returned by this IterDirents invocation. In other words, 841 // 'offset' should be used to calculate each vfs.Dirent.NextOff as well as 842 // the return value, while 'relOffset' is the place to start iteration. 843 IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) 844 } 845 846 type inodeSymlink interface { 847 // Readlink returns the target of a symbolic link. If an inode is not a 848 // symlink, the implementation should return EINVAL. 849 // 850 // Readlink is called with no kernfs locks held, so it may reenter if needed 851 // to resolve symlink targets. 852 Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) 853 854 // Getlink returns the target of a symbolic link, as used by path 855 // resolution: 856 // 857 // - If the inode is a "magic link" (a link whose target is most accurately 858 // represented as a VirtualDentry), Getlink returns (ok VirtualDentry, "", 859 // nil). A reference is taken on the returned VirtualDentry. 860 // 861 // - If the inode is an ordinary symlink, Getlink returns (zero-value 862 // VirtualDentry, symlink target, nil). 863 // 864 // - If the inode is not a symlink, Getlink returns (zero-value 865 // VirtualDentry, "", EINVAL). 866 Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) 867 }