github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/vfs/dentry.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 19 "github.com/MerlinKodo/gvisor/pkg/context" 20 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 21 "github.com/MerlinKodo/gvisor/pkg/refs" 22 "github.com/MerlinKodo/gvisor/pkg/sync" 23 ) 24 25 // Dentry represents a node in a Filesystem tree at which a file exists. 26 // 27 // Dentries are reference-counted. Unless otherwise specified, all Dentry 28 // methods require that a reference is held. 29 // 30 // Dentry is loosely analogous to Linux's struct dentry, but: 31 // 32 // - VFS does not associate Dentries with inodes. gVisor interacts primarily 33 // with filesystems that are accessed through filesystem APIs (as opposed to 34 // raw block devices); many such APIs support only paths and file descriptors, 35 // and not inodes. Furthermore, when parties outside the scope of VFS can 36 // rename inodes on such filesystems, VFS generally cannot "follow" the rename, 37 // both due to synchronization issues and because it may not even be able to 38 // name the destination path; this implies that it would in fact be incorrect 39 // for Dentries to be associated with inodes on such filesystems. Consequently, 40 // operations that are inode operations in Linux are FilesystemImpl methods 41 // and/or FileDescriptionImpl methods in gVisor's VFS. Filesystems that do 42 // support inodes may store appropriate state in implementations of DentryImpl. 43 // 44 // - VFS does not require that Dentries are instantiated for all paths accessed 45 // through VFS, only those that are tracked beyond the scope of a single 46 // Filesystem operation. This includes file descriptions, mount points, mount 47 // roots, process working directories, and chroots. This avoids instantiation 48 // of Dentries for operations on mutable remote filesystems that can't actually 49 // cache any state in the Dentry. 50 // 51 // - VFS does not track filesystem structure (i.e. relationships between 52 // Dentries), since both the relevant state and synchronization are 53 // filesystem-specific. 54 // 55 // - For the reasons above, VFS is not directly responsible for managing Dentry 56 // lifetime. Dentry reference counts only indicate the extent to which VFS 57 // requires Dentries to exist; Filesystems may elect to cache or discard 58 // Dentries with zero references. 59 // 60 // +stateify savable 61 type Dentry struct { 62 // mu synchronizes deletion/invalidation and mounting over this Dentry. 63 mu sync.Mutex `state:"nosave"` 64 65 // dead is true if the file represented by this Dentry has been deleted (by 66 // CommitDeleteDentry or CommitRenameReplaceDentry) or invalidated (by 67 // InvalidateDentry). dead is protected by mu. 68 dead bool 69 70 // evictable is set by the VFS layer or filesystems like overlayfs as a hint 71 // that this dentry will not be accessed hence forth. So filesystems that 72 // cache dentries locally can use this hint to release the dentry when all 73 // references are dropped. evictable is protected by mu. 74 evictable bool 75 76 // mounts is the number of Mounts for which this Dentry is Mount.point. 77 mounts atomicbitops.Uint32 78 79 // impl is the DentryImpl associated with this Dentry. impl is immutable. 80 // This should be the last field in Dentry. 81 impl DentryImpl 82 } 83 84 // Init must be called before first use of d. 85 func (d *Dentry) Init(impl DentryImpl) { 86 d.impl = impl 87 } 88 89 // Impl returns the DentryImpl associated with d. 90 func (d *Dentry) Impl() DentryImpl { 91 return d.impl 92 } 93 94 // DentryImpl contains implementation details for a Dentry. Implementations of 95 // DentryImpl should contain their associated Dentry by value as their first 96 // field. 97 // 98 // +stateify savable 99 type DentryImpl interface { 100 // IncRef increments the Dentry's reference count. A Dentry with a non-zero 101 // reference count must remain coherent with the state of the filesystem. 102 IncRef() 103 104 // TryIncRef increments the Dentry's reference count and returns true. If 105 // the Dentry's reference count is zero, TryIncRef may do nothing and 106 // return false. (It is also permitted to succeed if it can restore the 107 // guarantee that the Dentry is coherent with the state of the filesystem.) 108 // 109 // TryIncRef does not require that a reference is held on the Dentry. 110 TryIncRef() bool 111 112 // DecRef decrements the Dentry's reference count. 113 DecRef(ctx context.Context) 114 115 // InotifyWithParent notifies all watches on the targets represented by this 116 // dentry and its parent. The parent's watches are notified first, followed 117 // by this dentry's. 118 // 119 // InotifyWithParent automatically adds the IN_ISDIR flag for dentries 120 // representing directories. 121 // 122 // Note that the events may not actually propagate up to the user, depending 123 // on the event masks. 124 InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) 125 126 // Watches returns the set of inotify watches for the file corresponding to 127 // the Dentry. Dentries that are hard links to the same underlying file 128 // share the same watches. 129 // 130 // The caller does not need to hold a reference on the dentry. 131 Watches() *Watches 132 133 // OnZeroWatches is called whenever the number of watches on a dentry drops 134 // to zero. This is needed by some FilesystemImpls (e.g. gofer) to manage 135 // dentry lifetime. 136 // 137 // The caller does not need to hold a reference on the dentry. OnZeroWatches 138 // may acquire inotify locks, so to prevent deadlock, no inotify locks should 139 // be held by the caller. 140 OnZeroWatches(ctx context.Context) 141 } 142 143 // IncRef increments d's reference count. 144 func (d *Dentry) IncRef() { 145 d.impl.IncRef() 146 } 147 148 // TryIncRef increments d's reference count and returns true. If d's reference 149 // count is zero, TryIncRef may instead do nothing and return false. 150 func (d *Dentry) TryIncRef() bool { 151 return d.impl.TryIncRef() 152 } 153 154 // DecRef decrements d's reference count. 155 func (d *Dentry) DecRef(ctx context.Context) { 156 d.impl.DecRef(ctx) 157 } 158 159 // IsDead returns true if d has been deleted or invalidated by its owning 160 // filesystem. 161 func (d *Dentry) IsDead() bool { 162 d.mu.Lock() 163 defer d.mu.Unlock() 164 return d.dead 165 } 166 167 // IsEvictable returns true if d is evictable from filesystem dentry cache. 168 func (d *Dentry) IsEvictable() bool { 169 d.mu.Lock() 170 defer d.mu.Unlock() 171 return d.evictable 172 } 173 174 // MarkEvictable marks d as evictable. 175 func (d *Dentry) MarkEvictable() { 176 d.mu.Lock() 177 defer d.mu.Unlock() 178 d.evictable = true 179 } 180 181 func (d *Dentry) isMounted() bool { 182 return d.mounts.Load() != 0 183 } 184 185 // InotifyWithParent notifies all watches on the targets represented by d and 186 // its parent of events. 187 func (d *Dentry) InotifyWithParent(ctx context.Context, events, cookie uint32, et EventType) { 188 d.impl.InotifyWithParent(ctx, events, cookie, et) 189 } 190 191 // Watches returns the set of inotify watches associated with d. 192 func (d *Dentry) Watches() *Watches { 193 return d.impl.Watches() 194 } 195 196 // OnZeroWatches performs cleanup tasks whenever the number of watches on a 197 // dentry drops to zero. 198 func (d *Dentry) OnZeroWatches(ctx context.Context) { 199 d.impl.OnZeroWatches(ctx) 200 } 201 202 // The following functions are exported so that filesystem implementations can 203 // use them. The vfs package, and users of VFS, should not call these 204 // functions. 205 206 // PrepareDeleteDentry must be called before attempting to delete the file 207 // represented by d. If PrepareDeleteDentry succeeds, the caller must call 208 // AbortDeleteDentry or CommitDeleteDentry depending on the deletion's outcome. 209 // +checklocksacquire:d.mu 210 func (vfs *VirtualFilesystem) PrepareDeleteDentry(mntns *MountNamespace, d *Dentry) error { 211 vfs.lockMounts() 212 defer vfs.unlockMounts(context.Background()) 213 if mntns.mountpoints[d] != 0 { 214 return linuxerr.EBUSY // +checklocksforce: inconsistent return. 215 } 216 d.mu.Lock() 217 // Return with d.mu locked to block attempts to mount over it; it will be 218 // unlocked by AbortDeleteDentry or CommitDeleteDentry. 219 return nil 220 } 221 222 // AbortDeleteDentry must be called after PrepareDeleteDentry if the deletion 223 // fails. 224 // +checklocksrelease:d.mu 225 func (vfs *VirtualFilesystem) AbortDeleteDentry(d *Dentry) { 226 d.mu.Unlock() 227 } 228 229 // CommitDeleteDentry must be called after PrepareDeleteDentry if the deletion 230 // succeeds. 231 // +checklocksrelease:d.mu 232 func (vfs *VirtualFilesystem) CommitDeleteDentry(ctx context.Context, d *Dentry) { 233 d.dead = true 234 d.mu.Unlock() 235 if d.isMounted() { 236 vfs.forgetDeadMountpoint(ctx, d, false /*skipDecRef*/) 237 } 238 } 239 240 // InvalidateDentry is called when d ceases to represent the file it formerly 241 // did for reasons outside of VFS' control (e.g. d represents the local state 242 // of a file on a remote filesystem on which the file has already been 243 // deleted). If d is mounted, the method returns a list of Virtual Dentries 244 // mounted on d that the caller is responsible for DecRefing. 245 func (vfs *VirtualFilesystem) InvalidateDentry(ctx context.Context, d *Dentry) []refs.RefCounter { 246 d.mu.Lock() 247 d.dead = true 248 d.mu.Unlock() 249 if d.isMounted() { 250 return vfs.forgetDeadMountpoint(ctx, d, true /*skipDecRef*/) 251 } 252 return nil 253 } 254 255 // PrepareRenameDentry must be called before attempting to rename the file 256 // represented by from. If to is not nil, it represents the file that will be 257 // replaced or exchanged by the rename. If PrepareRenameDentry succeeds, the 258 // caller must call AbortRenameDentry, CommitRenameReplaceDentry, or 259 // CommitRenameExchangeDentry depending on the rename's outcome. 260 // 261 // Preconditions: 262 // - If to is not nil, it must be a child Dentry from the same Filesystem. 263 // - from != to. 264 // 265 // +checklocksacquire:from.mu 266 // +checklocksacquire:to.mu 267 func (vfs *VirtualFilesystem) PrepareRenameDentry(mntns *MountNamespace, from, to *Dentry) error { 268 vfs.lockMounts() 269 defer vfs.unlockMounts(context.Background()) 270 if mntns.mountpoints[from] != 0 { 271 return linuxerr.EBUSY // +checklocksforce: no locks acquired. 272 } 273 if to != nil { 274 if mntns.mountpoints[to] != 0 { 275 return linuxerr.EBUSY // +checklocksforce: no locks acquired. 276 } 277 to.mu.Lock() 278 } 279 from.mu.Lock() 280 // Return with from.mu and to.mu locked, which will be unlocked by 281 // AbortRenameDentry, CommitRenameReplaceDentry, or 282 // CommitRenameExchangeDentry. 283 return nil // +checklocksforce: to may not be acquired. 284 } 285 286 // AbortRenameDentry must be called after PrepareRenameDentry if the rename 287 // fails. 288 // +checklocksrelease:from.mu 289 // +checklocksrelease:to.mu 290 func (vfs *VirtualFilesystem) AbortRenameDentry(from, to *Dentry) { 291 from.mu.Unlock() 292 if to != nil { 293 to.mu.Unlock() 294 } 295 } 296 297 // CommitRenameReplaceDentry must be called after the file represented by from 298 // is renamed without RENAME_EXCHANGE. If to is not nil, it represents the file 299 // that was replaced by from. 300 // 301 // Preconditions: PrepareRenameDentry was previously called on from and to. 302 // +checklocksrelease:from.mu 303 // +checklocksrelease:to.mu 304 func (vfs *VirtualFilesystem) CommitRenameReplaceDentry(ctx context.Context, from, to *Dentry) { 305 from.mu.Unlock() 306 if to != nil { 307 to.dead = true 308 to.mu.Unlock() 309 if to.isMounted() { 310 vfs.forgetDeadMountpoint(ctx, to, false /*skipDecRef*/) 311 } 312 } 313 } 314 315 // CommitRenameExchangeDentry must be called after the files represented by 316 // from and to are exchanged by rename(RENAME_EXCHANGE). 317 // 318 // Preconditions: PrepareRenameDentry was previously called on from and to. 319 // +checklocksrelease:from.mu 320 // +checklocksrelease:to.mu 321 func (vfs *VirtualFilesystem) CommitRenameExchangeDentry(from, to *Dentry) { 322 from.mu.Unlock() 323 to.mu.Unlock() 324 } 325 326 // forgetDeadMountpoint is called when a mount point is deleted or invalidated 327 // to umount all mounts using it in all other mount namespaces. If skipDecRef 328 // is true, the method returns a list of reference counted objects with an 329 // an extra reference. 330 // 331 // forgetDeadMountpoint is analogous to Linux's 332 // fs/namespace.c:__detach_mounts(). 333 func (vfs *VirtualFilesystem) forgetDeadMountpoint(ctx context.Context, d *Dentry, skipDecRef bool) []refs.RefCounter { 334 vfs.lockMounts() 335 defer vfs.unlockMounts(ctx) 336 vfs.mounts.seq.BeginWrite() 337 for mnt := range vfs.mountpoints[d] { 338 vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{}) 339 } 340 vfs.mounts.seq.EndWrite() 341 var rcs []refs.RefCounter 342 if skipDecRef { 343 rcs = vfs.PopDelayedDecRefs() 344 } 345 return rcs 346 }