github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/kernfs/kernfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package kernfs provides the tools to implement inode-based filesystems. 16 // Kernfs has two main features: 17 // 18 // 1. The Inode interface, which maps VFS2's path-based filesystem operations to 19 // specific filesystem nodes. Kernfs uses the Inode interface to provide a 20 // blanket implementation for the vfs.FilesystemImpl. Kernfs also serves as 21 // the synchronization mechanism for all filesystem operations by holding a 22 // filesystem-wide lock across all operations. 23 // 24 // 2. Various utility types which provide generic implementations for various 25 // parts of the Inode and vfs.FileDescription interfaces. Client filesystems 26 // based on kernfs can embed the appropriate set of these to avoid having to 27 // reimplement common filesystem operations. See inode_impl_util.go and 28 // fd_impl_util.go. 29 // 30 // Reference Model: 31 // 32 // Kernfs dentries represents named pointers to inodes. Kernfs is solely 33 // reponsible for maintaining and modifying its dentry tree; inode 34 // implementations can not access the tree. Dentries and inodes have 35 // independent lifetimes and reference counts. A child dentry unconditionally 36 // holds a reference on its parent directory's dentry. A dentry also holds a 37 // reference on the inode it points to (although that might not be the only 38 // reference on the inode). Due to this inodes can outlive the dentries that 39 // point to them. Multiple dentries can point to the same inode (for example, 40 // in the case of hardlinks). File descriptors hold a reference to the dentry 41 // they're opened on. 42 // 43 // Dentries are guaranteed to exist while holding Filesystem.mu for 44 // reading. Dropping dentries require holding Filesystem.mu for writing. To 45 // queue dentries for destruction from a read critical section, see 46 // Filesystem.deferDecRef. 47 // 48 // Lock ordering: 49 // 50 // kernfs.Filesystem.mu 51 // kernfs.Dentry.dirMu 52 // vfs.VirtualFilesystem.mountMu 53 // vfs.Dentry.mu 54 // (inode implementation locks, if any) 55 // kernfs.Filesystem.droppedDentriesMu 56 package kernfs 57 58 import ( 59 "fmt" 60 "sync/atomic" 61 62 "github.com/SagerNet/gvisor/pkg/abi/linux" 63 "github.com/SagerNet/gvisor/pkg/context" 64 "github.com/SagerNet/gvisor/pkg/fspath" 65 "github.com/SagerNet/gvisor/pkg/refsvfs2" 66 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 67 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 68 "github.com/SagerNet/gvisor/pkg/sync" 69 ) 70 71 // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory 72 // filesystem. Concrete implementations are expected to embed this in their own 73 // Filesystem type. 74 // 75 // +stateify savable 76 type Filesystem struct { 77 vfsfs vfs.Filesystem 78 79 droppedDentriesMu sync.Mutex `state:"nosave"` 80 81 // droppedDentries is a list of dentries waiting to be DecRef()ed. This is 82 // used to defer dentry destruction until mu can be acquired for 83 // writing. Protected by droppedDentriesMu. 84 droppedDentries []*Dentry 85 86 // mu synchronizes the lifetime of Dentries on this filesystem. Holding it 87 // for reading guarantees continued existence of any resolved dentries, but 88 // the dentry tree may be modified. 89 // 90 // Kernfs dentries can only be DecRef()ed while holding mu for writing. For 91 // example: 92 // 93 // fs.mu.Lock() 94 // defer fs.mu.Unlock() 95 // ... 96 // dentry1.DecRef() 97 // defer dentry2.DecRef() // Ok, will run before Unlock. 98 // 99 // If discarding dentries in a read context, use Filesystem.deferDecRef. For 100 // example: 101 // 102 // fs.mu.RLock() 103 // defer fs.processDeferredDecRefs() 104 // defer fs.mu.RUnlock() 105 // ... 106 // fs.deferDecRef(dentry) 107 mu sync.RWMutex `state:"nosave"` 108 109 // nextInoMinusOne is used to to allocate inode numbers on this 110 // filesystem. Must be accessed by atomic operations. 111 nextInoMinusOne uint64 112 113 // cachedDentries contains all dentries with 0 references. (Due to race 114 // conditions, it may also contain dentries with non-zero references.) 115 // cachedDentriesLen is the number of dentries in cachedDentries. These 116 // fields are protected by mu. 117 cachedDentries dentryList 118 cachedDentriesLen uint64 119 120 // MaxCachedDentries is the maximum size of cachedDentries. If not set, 121 // defaults to 0 and kernfs does not cache any dentries. This is immutable. 122 MaxCachedDentries uint64 123 124 // root is the root dentry of this filesystem. Note that root may be nil for 125 // filesystems on a disconnected mount without a root (e.g. pipefs, sockfs, 126 // hostfs). Filesystem holds an extra reference on root to prevent it from 127 // being destroyed prematurely. This is immutable. 128 root *Dentry 129 } 130 131 // deferDecRef defers dropping a dentry ref until the next call to 132 // processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. 133 // This may be called while Filesystem.mu or Dentry.dirMu is locked. 134 func (fs *Filesystem) deferDecRef(d *Dentry) { 135 fs.droppedDentriesMu.Lock() 136 fs.droppedDentries = append(fs.droppedDentries, d) 137 fs.droppedDentriesMu.Unlock() 138 } 139 140 // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the 141 // droppedDentries list. See comment on Filesystem.mu. 142 // 143 // Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked. 144 func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { 145 fs.droppedDentriesMu.Lock() 146 for _, d := range fs.droppedDentries { 147 // Defer the DecRef call so that we are not holding droppedDentriesMu 148 // when DecRef is called. 149 defer d.DecRef(ctx) 150 } 151 fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. 152 fs.droppedDentriesMu.Unlock() 153 } 154 155 // VFSFilesystem returns the generic vfs filesystem object. 156 func (fs *Filesystem) VFSFilesystem() *vfs.Filesystem { 157 return &fs.vfsfs 158 } 159 160 // NextIno allocates a new inode number on this filesystem. 161 func (fs *Filesystem) NextIno() uint64 { 162 return atomic.AddUint64(&fs.nextInoMinusOne, 1) 163 } 164 165 // These consts are used in the Dentry.flags field. 166 const ( 167 // Dentry points to a directory inode. 168 dflagsIsDir = 1 << iota 169 170 // Dentry points to a symlink inode. 171 dflagsIsSymlink 172 ) 173 174 // Dentry implements vfs.DentryImpl. 175 // 176 // A kernfs dentry is similar to a dentry in a traditional filesystem: it's a 177 // named reference to an inode. A dentry generally lives as long as it's part of 178 // a mounted filesystem tree. Kernfs drops dentries once all references to them 179 // are dropped. Dentries hold a single reference to the inode they point 180 // to, and child dentries hold a reference on their parent. 181 // 182 // Must be initialized by Init prior to first use. 183 // 184 // +stateify savable 185 type Dentry struct { 186 vfsd vfs.Dentry 187 188 // refs is the reference count. When refs reaches 0, the dentry may be 189 // added to the cache or destroyed. If refs == -1, the dentry has already 190 // been destroyed. refs are allowed to go to 0 and increase again. refs is 191 // accessed using atomic memory operations. 192 refs int64 193 194 // fs is the owning filesystem. fs is immutable. 195 fs *Filesystem 196 197 // flags caches useful information about the dentry from the inode. See the 198 // dflags* consts above. Must be accessed by atomic ops. 199 flags uint32 200 201 parent *Dentry 202 name string 203 204 // If cached is true, dentryEntry links dentry into 205 // Filesystem.cachedDentries. cached and dentryEntry are protected by 206 // Filesystem.mu. 207 cached bool 208 dentryEntry 209 210 // dirMu protects children and the names of child Dentries. 211 // 212 // Note that holding fs.mu for writing is not sufficient; 213 // revalidateChildLocked(), which is a very hot path, may modify children with 214 // fs.mu acquired for reading only. 215 dirMu sync.Mutex `state:"nosave"` 216 children map[string]*Dentry 217 218 inode Inode 219 } 220 221 // IncRef implements vfs.DentryImpl.IncRef. 222 func (d *Dentry) IncRef() { 223 // d.refs may be 0 if d.fs.mu is locked, which serializes against 224 // d.cacheLocked(). 225 r := atomic.AddInt64(&d.refs, 1) 226 if d.LogRefs() { 227 refsvfs2.LogIncRef(d, r) 228 } 229 } 230 231 // TryIncRef implements vfs.DentryImpl.TryIncRef. 232 func (d *Dentry) TryIncRef() bool { 233 for { 234 r := atomic.LoadInt64(&d.refs) 235 if r <= 0 { 236 return false 237 } 238 if atomic.CompareAndSwapInt64(&d.refs, r, r+1) { 239 if d.LogRefs() { 240 refsvfs2.LogTryIncRef(d, r+1) 241 } 242 return true 243 } 244 } 245 } 246 247 // DecRef implements vfs.DentryImpl.DecRef. 248 func (d *Dentry) DecRef(ctx context.Context) { 249 r := atomic.AddInt64(&d.refs, -1) 250 if d.LogRefs() { 251 refsvfs2.LogDecRef(d, r) 252 } 253 if r == 0 { 254 d.fs.mu.Lock() 255 d.cacheLocked(ctx) 256 d.fs.mu.Unlock() 257 } else if r < 0 { 258 panic("kernfs.Dentry.DecRef() called without holding a reference") 259 } 260 } 261 262 func (d *Dentry) decRefLocked(ctx context.Context) { 263 r := atomic.AddInt64(&d.refs, -1) 264 if d.LogRefs() { 265 refsvfs2.LogDecRef(d, r) 266 } 267 if r == 0 { 268 d.cacheLocked(ctx) 269 } else if r < 0 { 270 panic("kernfs.Dentry.DecRef() called without holding a reference") 271 } 272 } 273 274 // cacheLocked should be called after d's reference count becomes 0. The ref 275 // count check may happen before acquiring d.fs.mu so there might be a race 276 // condition where the ref count is increased again by the time the caller 277 // acquires d.fs.mu. This race is handled. 278 // Only reachable dentries are added to the cache. However, a dentry might 279 // become unreachable *while* it is in the cache due to invalidation. 280 // 281 // Preconditions: d.fs.mu must be locked for writing. 282 func (d *Dentry) cacheLocked(ctx context.Context) { 283 // Dentries with a non-zero reference count must be retained. (The only way 284 // to obtain a reference on a dentry with zero references is via path 285 // resolution, which requires d.fs.mu, so if d.refs is zero then it will 286 // remain zero while we hold d.fs.mu for writing.) 287 refs := atomic.LoadInt64(&d.refs) 288 if refs == -1 { 289 // Dentry has already been destroyed. 290 return 291 } 292 if refs > 0 { 293 if d.cached { 294 d.fs.cachedDentries.Remove(d) 295 d.fs.cachedDentriesLen-- 296 d.cached = false 297 } 298 return 299 } 300 // If the dentry is deleted and invalidated or has no parent, then it is no 301 // longer reachable by path resolution and should be dropped immediately 302 // because it has zero references. 303 // Note that a dentry may not always have a parent; for example magic links 304 // as described in Inode.Getlink. 305 if isDead := d.VFSDentry().IsDead(); isDead || d.parent == nil { 306 if !isDead { 307 d.fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry()) 308 } 309 if d.cached { 310 d.fs.cachedDentries.Remove(d) 311 d.fs.cachedDentriesLen-- 312 d.cached = false 313 } 314 d.destroyLocked(ctx) 315 return 316 } 317 // If d is already cached, just move it to the front of the LRU. 318 if d.cached { 319 d.fs.cachedDentries.Remove(d) 320 d.fs.cachedDentries.PushFront(d) 321 return 322 } 323 // Cache the dentry, then evict the least recently used cached dentry if 324 // the cache becomes over-full. 325 d.fs.cachedDentries.PushFront(d) 326 d.fs.cachedDentriesLen++ 327 d.cached = true 328 if d.fs.cachedDentriesLen <= d.fs.MaxCachedDentries { 329 return 330 } 331 d.fs.evictCachedDentryLocked(ctx) 332 // Whether or not victim was destroyed, we brought fs.cachedDentriesLen 333 // back down to fs.opts.maxCachedDentries, so we don't loop. 334 } 335 336 // Preconditions: 337 // * fs.mu must be locked for writing. 338 // * fs.cachedDentriesLen != 0. 339 func (fs *Filesystem) evictCachedDentryLocked(ctx context.Context) { 340 // Evict the least recently used dentry because cache size is greater than 341 // max cache size (configured on mount). 342 victim := fs.cachedDentries.Back() 343 fs.cachedDentries.Remove(victim) 344 fs.cachedDentriesLen-- 345 victim.cached = false 346 // victim.refs may have become non-zero from an earlier path resolution 347 // after it was inserted into fs.cachedDentries. 348 if atomic.LoadInt64(&victim.refs) == 0 { 349 if !victim.vfsd.IsDead() { 350 victim.parent.dirMu.Lock() 351 // Note that victim can't be a mount point (in any mount 352 // namespace), since VFS holds references on mount points. 353 fs.vfsfs.VirtualFilesystem().InvalidateDentry(ctx, victim.VFSDentry()) 354 delete(victim.parent.children, victim.name) 355 victim.parent.dirMu.Unlock() 356 } 357 victim.destroyLocked(ctx) 358 } 359 // Whether or not victim was destroyed, we brought fs.cachedDentriesLen 360 // back down to fs.MaxCachedDentries, so we don't loop. 361 } 362 363 // destroyLocked destroys the dentry. 364 // 365 // Preconditions: 366 // * d.fs.mu must be locked for writing. 367 // * d.refs == 0. 368 // * d should have been removed from d.parent.children, i.e. d is not reachable 369 // by path traversal. 370 // * d.vfsd.IsDead() is true. 371 func (d *Dentry) destroyLocked(ctx context.Context) { 372 refs := atomic.LoadInt64(&d.refs) 373 switch refs { 374 case 0: 375 // Mark the dentry destroyed. 376 atomic.StoreInt64(&d.refs, -1) 377 case -1: 378 panic("dentry.destroyLocked() called on already destroyed dentry") 379 default: 380 panic("dentry.destroyLocked() called with references on the dentry") 381 } 382 383 d.inode.DecRef(ctx) // IncRef from Init. 384 d.inode = nil 385 386 if d.parent != nil { 387 d.parent.decRefLocked(ctx) 388 } 389 390 refsvfs2.Unregister(d) 391 } 392 393 // RefType implements refsvfs2.CheckedObject.Type. 394 func (d *Dentry) RefType() string { 395 return "kernfs.Dentry" 396 } 397 398 // LeakMessage implements refsvfs2.CheckedObject.LeakMessage. 399 func (d *Dentry) LeakMessage() string { 400 return fmt.Sprintf("[kernfs.Dentry %p] reference count of %d instead of -1", d, atomic.LoadInt64(&d.refs)) 401 } 402 403 // LogRefs implements refsvfs2.CheckedObject.LogRefs. 404 // 405 // This should only be set to true for debugging purposes, as it can generate an 406 // extremely large amount of output and drastically degrade performance. 407 func (d *Dentry) LogRefs() bool { 408 return false 409 } 410 411 // InitRoot initializes this dentry as the root of the filesystem. 412 // 413 // Precondition: Caller must hold a reference on inode. 414 // 415 // Postcondition: Caller's reference on inode is transferred to the dentry. 416 func (d *Dentry) InitRoot(fs *Filesystem, inode Inode) { 417 d.Init(fs, inode) 418 fs.root = d 419 // Hold an extra reference on the root dentry. It is held by fs to prevent the 420 // root from being "cached" and subsequently evicted. 421 d.IncRef() 422 } 423 424 // Init initializes this dentry. 425 // 426 // Precondition: Caller must hold a reference on inode. 427 // 428 // Postcondition: Caller's reference on inode is transferred to the dentry. 429 func (d *Dentry) Init(fs *Filesystem, inode Inode) { 430 d.vfsd.Init(d) 431 d.fs = fs 432 d.inode = inode 433 atomic.StoreInt64(&d.refs, 1) 434 ftype := inode.Mode().FileType() 435 if ftype == linux.ModeDirectory { 436 d.flags |= dflagsIsDir 437 } 438 if ftype == linux.ModeSymlink { 439 d.flags |= dflagsIsSymlink 440 } 441 refsvfs2.Register(d) 442 } 443 444 // VFSDentry returns the generic vfs dentry for this kernfs dentry. 445 func (d *Dentry) VFSDentry() *vfs.Dentry { 446 return &d.vfsd 447 } 448 449 // isDir checks whether the dentry points to a directory inode. 450 func (d *Dentry) isDir() bool { 451 return atomic.LoadUint32(&d.flags)&dflagsIsDir != 0 452 } 453 454 // isSymlink checks whether the dentry points to a symlink inode. 455 func (d *Dentry) isSymlink() bool { 456 return atomic.LoadUint32(&d.flags)&dflagsIsSymlink != 0 457 } 458 459 // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. 460 // 461 // Although Linux technically supports inotify on pseudo filesystems (inotify 462 // is implemented at the vfs layer), it is not particularly useful. It is left 463 // unimplemented until someone actually needs it. 464 func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et vfs.EventType) {} 465 466 // Watches implements vfs.DentryImpl.Watches. 467 func (d *Dentry) Watches() *vfs.Watches { 468 return nil 469 } 470 471 // OnZeroWatches implements vfs.Dentry.OnZeroWatches. 472 func (d *Dentry) OnZeroWatches(context.Context) {} 473 474 // insertChild inserts child into the vfs dentry cache with the given name under 475 // this dentry. This does not update the directory inode, so calling this on its 476 // own isn't sufficient to insert a child into a directory. 477 // 478 // Preconditions: 479 // * d must represent a directory inode. 480 // * d.fs.mu must be locked for at least reading. 481 func (d *Dentry) insertChild(name string, child *Dentry) { 482 d.dirMu.Lock() 483 d.insertChildLocked(name, child) 484 d.dirMu.Unlock() 485 } 486 487 // insertChildLocked is equivalent to insertChild, with additional 488 // preconditions. 489 // 490 // Preconditions: 491 // * d must represent a directory inode. 492 // * d.dirMu must be locked. 493 // * d.fs.mu must be locked for at least reading. 494 func (d *Dentry) insertChildLocked(name string, child *Dentry) { 495 if !d.isDir() { 496 panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d)) 497 } 498 d.IncRef() // DecRef in child's Dentry.destroy. 499 child.parent = d 500 child.name = name 501 if d.children == nil { 502 d.children = make(map[string]*Dentry) 503 } 504 d.children[name] = child 505 } 506 507 // Inode returns the dentry's inode. 508 func (d *Dentry) Inode() Inode { 509 return d.inode 510 } 511 512 // FSLocalPath returns an absolute path to d, relative to the root of its 513 // filesystem. 514 func (d *Dentry) FSLocalPath() string { 515 var b fspath.Builder 516 _ = genericPrependPath(vfs.VirtualDentry{}, nil, d, &b) 517 b.PrependByte('/') 518 return b.String() 519 } 520 521 // The Inode interface maps filesystem-level operations that operate on paths to 522 // equivalent operations on specific filesystem nodes. 523 // 524 // The interface methods are groups into logical categories as sub interfaces 525 // below. Generally, an implementation for each sub interface can be provided by 526 // embedding an appropriate type from inode_impl_utils.go. The sub interfaces 527 // are purely organizational. Methods declared directly in the main interface 528 // have no generic implementations, and should be explicitly provided by the 529 // client filesystem. 530 // 531 // Generally, implementations are not responsible for tasks that are common to 532 // all filesystems. These include: 533 // 534 // - Checking that dentries passed to methods are of the appropriate file type. 535 // - Checking permissions. 536 // 537 // Inode functions may be called holding filesystem wide locks and are not 538 // allowed to call vfs functions that may reenter, unless otherwise noted. 539 // 540 // Specific responsibilities of implementations are documented below. 541 type Inode interface { 542 // Methods related to reference counting. A generic implementation is 543 // provided by InodeNoopRefCount. These methods are generally called by the 544 // equivalent Dentry methods. 545 inodeRefs 546 547 // Methods related to node metadata. A generic implementation is provided by 548 // InodeAttrs. Note that a concrete filesystem using kernfs is responsible for 549 // managing link counts. 550 inodeMetadata 551 552 // Method for inodes that represent symlink. InodeNotSymlink provides a 553 // blanket implementation for all non-symlink inodes. 554 inodeSymlink 555 556 // Method for inodes that represent directories. InodeNotDirectory provides 557 // a blanket implementation for all non-directory inodes. 558 inodeDirectory 559 560 // Open creates a file description for the filesystem object represented by 561 // this inode. The returned file description should hold a reference on the 562 // dentry for its lifetime. 563 // 564 // Precondition: rp.Done(). vfsd.Impl() must be the kernfs Dentry containing 565 // the inode on which Open() is being called. 566 Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) 567 568 // StatFS returns filesystem statistics for the client filesystem. This 569 // corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem 570 // doesn't support statfs(2), this should return ENOSYS. 571 StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) 572 573 // Keep indicates whether the dentry created after Inode.Lookup should be 574 // kept in the kernfs dentry tree. 575 Keep() bool 576 577 // Valid should return true if this inode is still valid, or needs to 578 // be resolved again by a call to Lookup. 579 Valid(ctx context.Context) bool 580 } 581 582 type inodeRefs interface { 583 IncRef() 584 DecRef(ctx context.Context) 585 TryIncRef() bool 586 } 587 588 type inodeMetadata interface { 589 // CheckPermissions checks that creds may access this inode for the 590 // requested access type, per the the rules of 591 // fs/namei.c:generic_permission(). 592 CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error 593 594 // Mode returns the (struct stat)::st_mode value for this inode. This is 595 // separated from Stat for performance. 596 Mode() linux.FileMode 597 598 // Stat returns the metadata for this inode. This corresponds to 599 // vfs.FilesystemImpl.StatAt. 600 Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) 601 602 // SetStat updates the metadata for this inode. This corresponds to 603 // vfs.FilesystemImpl.SetStatAt. Implementations are responsible for checking 604 // if the operation can be performed (see vfs.CheckSetStat() for common 605 // checks). 606 SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error 607 } 608 609 // Precondition: All methods in this interface may only be called on directory 610 // inodes. 611 type inodeDirectory interface { 612 // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode 613 // that will be hashed into the dentry tree. 614 // 615 // These inode constructors are inode-level operations rather than 616 // filesystem-level operations to allow client filesystems to mix different 617 // implementations based on the new node's location in the 618 // filesystem. 619 620 // HasChildren returns true if the directory inode has any children. 621 HasChildren() bool 622 623 // NewFile creates a new regular file inode. 624 NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) 625 626 // NewDir creates a new directory inode. 627 NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) 628 629 // NewLink creates a new hardlink to a specified inode in this 630 // directory. Implementations should create a new kernfs Dentry pointing to 631 // target, and update target's link count. 632 NewLink(ctx context.Context, name string, target Inode) (Inode, error) 633 634 // NewSymlink creates a new symbolic link inode. 635 NewSymlink(ctx context.Context, name, target string) (Inode, error) 636 637 // NewNode creates a new filesystem node for a mknod syscall. 638 NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) 639 640 // Unlink removes a child dentry from this directory inode. 641 Unlink(ctx context.Context, name string, child Inode) error 642 643 // RmDir removes an empty child directory from this directory 644 // inode. Implementations must update the parent directory's link count, 645 // if required. Implementations are not responsible for checking that child 646 // is a directory, checking for an empty directory. 647 RmDir(ctx context.Context, name string, child Inode) error 648 649 // Rename is called on the source directory containing an inode being 650 // renamed. child should point to the resolved child in the source 651 // directory. 652 // 653 // Precondition: Caller must serialize concurrent calls to Rename. 654 Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error 655 656 // Lookup should return an appropriate inode if name should resolve to a 657 // child of this directory inode. This gives the directory an opportunity 658 // on every lookup to resolve additional entries. This is only called when 659 // the inode is a directory. 660 // 661 // The child returned by Lookup will be hashed into the VFS dentry tree, 662 // at least for the duration of the current FS operation. 663 // 664 // Lookup must return the child with an extra reference whose ownership is 665 // transferred to the dentry that is created to point to that inode. If 666 // Inode.Keep returns false, that new dentry will be dropped at the end of 667 // the current filesystem operation (before returning back to the VFS 668 // layer) if no other ref is picked on that dentry. If Inode.Keep returns 669 // true, then the dentry will be cached into the dentry tree until it is 670 // Unlink'd or RmDir'd. 671 Lookup(ctx context.Context, name string) (Inode, error) 672 673 // IterDirents is used to iterate over dynamically created entries. It invokes 674 // cb on each entry in the directory represented by the Inode. 675 // 'offset' is the offset for the entire IterDirents call, which may include 676 // results from the caller (e.g. "." and ".."). 'relOffset' is the offset 677 // inside the entries returned by this IterDirents invocation. In other words, 678 // 'offset' should be used to calculate each vfs.Dirent.NextOff as well as 679 // the return value, while 'relOffset' is the place to start iteration. 680 IterDirents(ctx context.Context, mnt *vfs.Mount, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) 681 } 682 683 type inodeSymlink interface { 684 // Readlink returns the target of a symbolic link. If an inode is not a 685 // symlink, the implementation should return EINVAL. 686 // 687 // Readlink is called with no kernfs locks held, so it may reenter if needed 688 // to resolve symlink targets. 689 Readlink(ctx context.Context, mnt *vfs.Mount) (string, error) 690 691 // Getlink returns the target of a symbolic link, as used by path 692 // resolution: 693 // 694 // - If the inode is a "magic link" (a link whose target is most accurately 695 // represented as a VirtualDentry), Getlink returns (ok VirtualDentry, "", 696 // nil). A reference is taken on the returned VirtualDentry. 697 // 698 // - If the inode is an ordinary symlink, Getlink returns (zero-value 699 // VirtualDentry, symlink target, nil). 700 // 701 // - If the inode is not a symlink, Getlink returns (zero-value 702 // VirtualDentry, "", EINVAL). 703 Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDentry, string, error) 704 }