gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/vfs/dentry.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "gvisor.dev/gvisor/pkg/atomicbitops" 19 "gvisor.dev/gvisor/pkg/context" 20 "gvisor.dev/gvisor/pkg/errors/linuxerr" 21 "gvisor.dev/gvisor/pkg/refs" 22 "gvisor.dev/gvisor/pkg/sync" 23 ) 24 25 // Dentry represents a node in a Filesystem tree at which a file exists. 26 // 27 // Dentries are reference-counted. Unless otherwise specified, all Dentry 28 // methods require that a reference is held. 29 // 30 // Dentry is loosely analogous to Linux's struct dentry, but: 31 // 32 // - VFS does not associate Dentries with inodes. gVisor interacts primarily 33 // with filesystems that are accessed through filesystem APIs (as opposed to 34 // raw block devices); many such APIs support only paths and file descriptors, 35 // and not inodes. Furthermore, when parties outside the scope of VFS can 36 // rename inodes on such filesystems, VFS generally cannot "follow" the rename, 37 // both due to synchronization issues and because it may not even be able to 38 // name the destination path; this implies that it would in fact be incorrect 39 // for Dentries to be associated with inodes on such filesystems. Consequently, 40 // operations that are inode operations in Linux are FilesystemImpl methods 41 // and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do 42 // support inodes may store appropriate state in implementations of DentryImpl. 43 // 44 // - VFS does not require that Dentries are instantiated for all paths accessed 45 // through VFS, only those that are tracked beyond the scope of a single 46 // Filesystem operation. This includes file descriptions, mount points, mount 47 // roots, process working directories, and chroots. This avoids instantiation 48 // of Dentries for operations on mutable remote filesystems that can't actually 49 // cache any state in the Dentry. 50 // 51 // - VFS does not track filesystem structure (i.e. relationships between 52 // Dentries), since both the relevant state and synchronization are 53 // filesystem-specific. 54 // 55 // - For the reasons above, VFS is not directly responsible for managing Dentry 56 // lifetime. Dentry reference counts only indicate the extent to which VFS 57 // requires Dentries to exist; Filesystems may elect to cache or discard 58 // Dentries with zero references. 59 // 60 // +stateify savable 61 type Dentry struct { 62 // mu synchronizes deletion/invalidation and mounting over this Dentry. 63 mu sync.Mutex `state:"nosave"` 64 65 // dead is true if the file represented by this Dentry has been deleted (by 66 // CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by 67 // InvalidateDentry). dead is protected by mu. 68 dead bool 69 70 // evictable is set by the VFS layer or filesystems like overlayfs as a hint 71 // that this dentry will not be accessed hence forth. So filesystems that 72 // cache dentries locally can use this hint to release the dentry when all 73 // references are dropped. evictable is protected by mu. 74 evictable bool 75 76 // mounts is the number of Mounts for which this Dentry is Mount.point. 77 mounts atomicbitops.Uint32 78 79 // impl is the DentryImpl associated with this Dentry. impl is immutable. 80 // This should be the last field in Dentry. 81 impl DentryImpl 82 } 83 84 // Init must be called before first use of d. 85 func (d *Dentry) Init(impl DentryImpl) { 86 d.impl = impl 87 } 88 89 // Impl returns the DentryImpl associated with d. 90 func (d *Dentry) Impl() DentryImpl { 91 return d.impl 92 } 93 94 // DentryImpl contains implementation details for a Dentry. Implementations of 95 // DentryImpl should contain their associated Dentry by value as their first 96 // field. 97 // 98 // +stateify savable 99 type DentryImpl interface { 100 // IncRef increments the Dentry's reference count. A Dentry with a non-zero 101 // reference count must remain coherent with the state of the filesystem. 102 IncRef() 103 104 // TryIncRef increments the Dentry's reference count and returns true. If 105 // the Dentry's reference count is zero, TryIncRef may do nothing and 106 // return false. (It is also permitted to succeed if it can restore the 107 // guarantee that the Dentry is coherent with the state of the filesystem.) 108 // 109 // TryIncRef does not require that a reference is held on the Dentry. 110 TryIncRef() bool 111 112 // DecRef decrements the Dentry's reference count. 113 DecRef(ctx context.Context) 114 115 // InotifyWithParent notifies all watches on the targets represented by this 116 // dentry and its parent. The parent's watches are notified first, followed 117 // by this dentry's. 118 // 119 // InotifyWithParent automatically adds the IN_ISDIR flag for dentries 120 // representing directories. 121 // 122 // Note that the events may not actually propagate up to the user, depending 123 // on the event masks. 124 InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) 125 126 // Watches returns the set of inotify watches for the file corresponding to 127 // the Dentry. Dentries that are hard links to the same underlying file 128 // share the same watches. 129 // 130 // The caller does not need to hold a reference on the dentry. 131 Watches() *Watches 132 133 // OnZeroWatches is called whenever the number of watches on a dentry drops 134 // to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage 135 // dentry lifetime. 136 // 137 // The caller does not need to hold a reference on the dentry. OnZeroWatches 138 // may acquire inotify locks, so to prevent deadlock, no inotify locks should 139 // be held by the caller. 140 OnZeroWatches(ctx context.Context) 141 } 142 143 // IncRef increments d's reference count. 144 func (d *Dentry) IncRef() { 145 d.impl.IncRef() 146 } 147 148 // TryIncRef increments d's reference count and returns true. If d's reference 149 // count is zero, TryIncRef may instead do nothing and return false. 150 func (d *Dentry) TryIncRef() bool { 151 return d.impl.TryIncRef() 152 } 153 154 // DecRef decrements d's reference count. 155 func (d *Dentry) DecRef(ctx context.Context) { 156 d.impl.DecRef(ctx) 157 } 158 159 // IsDead returns true if d has been deleted or invalidated by its owning 160 // filesystem. 161 func (d *Dentry) IsDead() bool { 162 d.mu.Lock() 163 defer d.mu.Unlock() 164 return d.dead 165 } 166 167 // IsEvictable returns true if d is evictable from filesystem dentry cache. 168 func (d *Dentry) IsEvictable() bool { 169 d.mu.Lock() 170 defer d.mu.Unlock() 171 return d.evictable 172 } 173 174 // MarkEvictable marks d as evictable. 175 func (d *Dentry) MarkEvictable() { 176 d.mu.Lock() 177 defer d.mu.Unlock() 178 d.evictable = true 179 } 180 181 func (d *Dentry) isMounted() bool { 182 return d.mounts.Load() != 0 183 } 184 185 // InotifyWithParent notifies all watches on the targets represented by d and 186 // its parent of events. 187 func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) { 188 d.impl.InotifyWithParent(ctx, events, cookie, et) 189 } 190 191 // Watches returns the set of inotify watches associated with d. 192 func (d *Dentry) Watches() *Watches { 193 return d.impl.Watches() 194 } 195 196 // OnZeroWatches performs cleanup tasks whenever the number of watches on a 197 // dentry drops to zero. 198 func (d *Dentry) OnZeroWatches(ctx context.Context) { 199 d.impl.OnZeroWatches(ctx) 200 } 201 202 // The following functions are exported so that filesystem implementations can 203 // use them. The vfs package, and users of VFS, should not call these 204 // functions. 205 206 // PrepareDeleteDentry must be called before attempting to delete the file 207 // represented by d. If PrepareDeleteDentry succeeds, the caller must call 208 // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome. 209 // +checklocksacquire:d.mu 210 func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error { 211 vfs.lockMounts() 212 defer vfs.unlockMounts(context.Background()) 213 if mntns.mountpoints[d] != 0 { 214 return linuxerr.EBUSY // +checklocksforce: inconsistent return. 215 } 216 d.mu.Lock() 217 // Return with d.mu locked to block attempts to mount over it; it will be 218 // unlocked by AbortDeleteDentry or CommitDeleteDentry. 219 return nil 220 } 221 222 // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion 223 // fails. 224 // +checklocksrelease:d.mu 225 func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) { 226 d.mu.Unlock() 227 } 228 229 // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion 230 // succeeds. If d is mounted, the method returns a list of Virtual Dentries 231 // mounted on d that the caller is responsible for DecRefing. 232 // +checklocksrelease:d.mu 233 func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) []refs.RefCounter { 234 d.dead = true 235 d.mu.Unlock() 236 if d.isMounted() { 237 return vfs.forgetDeadMountpoint(ctx, d) 238 } 239 return nil 240 } 241 242 // InvalidateDentry is called when d ceases to represent the file it formerly 243 // did for reasons outside of VFS' control (e.g. d represents the local state 244 // of a file on a remote filesystem on which the file has already been 245 // deleted). If d is mounted, the method returns a list of Virtual Dentries 246 // mounted on d that the caller is responsible for DecRefing. 247 func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) []refs.RefCounter { 248 d.mu.Lock() 249 d.dead = true 250 d.mu.Unlock() 251 if d.isMounted() { 252 return vfs.forgetDeadMountpoint(ctx, d) 253 } 254 return nil 255 } 256 257 // PrepareRenameDentry must be called before attempting to rename the file 258 // represented by from. If to is not nil, it represents the file that will be 259 // replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the 260 // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or 261 // CommitRenameExchangeDentry depending on the rename's outcome. 262 // 263 // Preconditions: 264 // - If to is not nil, it must be a child Dentry from the same Filesystem. 265 // - from != to. 266 // 267 // +checklocksacquire:from.mu 268 // +checklocksacquire:to.mu 269 func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error { 270 vfs.lockMounts() 271 defer vfs.unlockMounts(context.Background()) 272 if mntns.mountpoints[from] != 0 { 273 return linuxerr.EBUSY // +checklocksforce: no locks acquired. 274 } 275 if to != nil { 276 if mntns.mountpoints[to] != 0 { 277 return linuxerr.EBUSY // +checklocksforce: no locks acquired. 278 } 279 to.mu.Lock() 280 } 281 from.mu.Lock() 282 // Return with from.mu and to.mu locked, which will be unlocked by 283 // AbortRenameDentry, CommitRenameReplaceDentry, or 284 // CommitRenameExchangeDentry. 285 return nil // +checklocksforce: to may not be acquired. 286 } 287 288 // AbortRenameDentry must be called after PrepareRenameDentry if the rename 289 // fails. 290 // +checklocksrelease:from.mu 291 // +checklocksrelease:to.mu 292 func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { 293 from.mu.Unlock() 294 if to != nil { 295 to.mu.Unlock() 296 } 297 } 298 299 // CommitRenameReplaceDentry must be called after the file represented by from 300 // is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file 301 // that was replaced by from. If to is mounted, the method returns a list of 302 // Virtual Dentries mounted on to that the caller is responsible for DecRefing. 303 // 304 // Preconditions: PrepareRenameDentry was previously called on from and to. 305 // +checklocksrelease:from.mu 306 // +checklocksrelease:to.mu 307 func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) []refs.RefCounter { 308 from.mu.Unlock() 309 if to != nil { 310 to.dead = true 311 to.mu.Unlock() 312 if to.isMounted() { 313 return vfs.forgetDeadMountpoint(ctx, to) 314 } 315 } 316 return nil 317 } 318 319 // CommitRenameExchangeDentry must be called after the files represented by 320 // from and to are exchanged by rename(RENAME_EXCHANGE). 321 // 322 // Preconditions: PrepareRenameDentry was previously called on from and to. 323 // +checklocksrelease:from.mu 324 // +checklocksrelease:to.mu 325 func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { 326 from.mu.Unlock() 327 to.mu.Unlock() 328 } 329 330 // forgetDeadMountpoint is called when a mount point is deleted or invalidated 331 // to umount all mounts using it in all other mount namespaces. If skipDecRef 332 // is true, the method returns a list of reference counted objects with an 333 // an extra reference. 334 // 335 // forgetDeadMountpoint is analogous to Linux's 336 // fs/namespace.c:__detach_mounts(). 337 func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry) []refs.RefCounter { 338 vfs.lockMounts() 339 defer vfs.unlockMounts(ctx) 340 for mnt := range vfs.mountpoints[d] { 341 // If umounted is true, the mount point has already been decrefed by umount 342 // so we don't need to release the reference again here. 343 if mnt.umounted { 344 vfs.mounts.seq.BeginWrite() 345 vfs.disconnectLocked(mnt) 346 vfs.delayDecRef(mnt) 347 vfs.mounts.seq.EndWrite() 348 } else { 349 vfs.umountTreeLocked(mnt, &umountRecursiveOptions{}) 350 } 351 } 352 return vfs.PopDelayedDecRefs() 353 }