github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/inode.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fs 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/log" 22 "github.com/SagerNet/gvisor/pkg/refs" 23 "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 24 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 26 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/SagerNet/gvisor/pkg/sync" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 ) 31 32 // Inode is a file system object that can be simultaneously referenced by different 33 // components of the VFS (Dirent, fs.File, etc). 34 // 35 // +stateify savable 36 type Inode struct { 37 // AtomicRefCount is our reference count. 38 refs.AtomicRefCount 39 40 // InodeOperations is the file system specific behavior of the Inode. 41 InodeOperations InodeOperations 42 43 // StableAttr are stable cached attributes of the Inode. 44 StableAttr StableAttr 45 46 // LockCtx is the file lock context. It manages its own sychronization and tracks 47 // regions of the Inode that have locks held. 48 LockCtx LockCtx 49 50 // Watches is the set of inotify watches for this inode. 51 Watches *Watches 52 53 // MountSource is the mount source this Inode is a part of. 54 MountSource *MountSource 55 56 // overlay is the overlay entry for this Inode. 57 overlay *overlayEntry 58 59 // appendMu is used to synchronize write operations into files which 60 // have been opened with O_APPEND. Operations which change a file size 61 // have to take this lock for read. Write operations to files with 62 // O_APPEND have to take this lock for write. 63 appendMu sync.RWMutex `state:"nosave"` 64 } 65 66 // LockCtx is an Inode's lock context and contains different personalities of locks; both 67 // Posix and BSD style locks are supported. 68 // 69 // Note that in Linux fcntl(2) and flock(2) locks are _not_ cooperative, because race and 70 // deadlock conditions make merging them prohibitive. We do the same and keep them oblivious 71 // to each other but provide a "context" as a convenient container. 72 // 73 // +stateify savable 74 type LockCtx struct { 75 // Posix is a set of POSIX-style regional advisory locks, see fcntl(2). 76 Posix lock.Locks 77 78 // BSD is a set of BSD-style advisory file wide locks, see flock(2). 79 BSD lock.Locks 80 } 81 82 // NewInode constructs an Inode from InodeOperations, a MountSource, and stable attributes. 83 // 84 // NewInode takes a reference on msrc. 85 func NewInode(ctx context.Context, iops InodeOperations, msrc *MountSource, sattr StableAttr) *Inode { 86 msrc.IncRef() 87 i := Inode{ 88 InodeOperations: iops, 89 StableAttr: sattr, 90 Watches: newWatches(), 91 MountSource: msrc, 92 } 93 i.EnableLeakCheck("fs.Inode") 94 return &i 95 } 96 97 // DecRef drops a reference on the Inode. 98 func (i *Inode) DecRef(ctx context.Context) { 99 i.DecRefWithDestructor(ctx, i.destroy) 100 } 101 102 // destroy releases the Inode and releases the msrc reference taken. 103 func (i *Inode) destroy(ctx context.Context) { 104 if err := i.WriteOut(ctx); err != nil { 105 // FIXME(b/65209558): Mark as warning again once noatime is 106 // properly supported. 107 log.Debugf("Inode %+v, failed to sync all metadata: %v", i.StableAttr, err) 108 } 109 110 // If this inode is being destroyed because it was unlinked, queue a 111 // deletion event. This may not be the case for inodes being revalidated. 112 if i.Watches.unlinked { 113 i.Watches.Notify("", linux.IN_DELETE_SELF, 0) 114 } 115 116 // Remove references from the watch owners to the watches on this inode, 117 // since the watches are about to be GCed. Note that we don't need to worry 118 // about the watch pins since if there were any active pins, this inode 119 // wouldn't be in the destructor. 120 i.Watches.targetDestroyed() 121 122 if i.overlay != nil { 123 i.overlay.release(ctx) 124 } else { 125 i.InodeOperations.Release(ctx) 126 } 127 128 i.MountSource.DecRef(ctx) 129 } 130 131 // Mappable calls i.InodeOperations.Mappable. 132 func (i *Inode) Mappable() memmap.Mappable { 133 if i.overlay != nil { 134 // In an overlay, Mappable is always implemented by 135 // the overlayEntry metadata to synchronize memory 136 // access of files with copy up. But first check if 137 // the Inodes involved would be mappable in the first 138 // place. 139 i.overlay.copyMu.RLock() 140 ok := i.overlay.isMappableLocked() 141 i.overlay.copyMu.RUnlock() 142 if !ok { 143 return nil 144 } 145 return i.overlay 146 } 147 return i.InodeOperations.Mappable(i) 148 } 149 150 // WriteOut calls i.InodeOperations.WriteOut with i as the Inode. 151 func (i *Inode) WriteOut(ctx context.Context) error { 152 if i.overlay != nil { 153 return overlayWriteOut(ctx, i.overlay) 154 } 155 return i.InodeOperations.WriteOut(ctx, i) 156 } 157 158 // Lookup calls i.InodeOperations.Lookup with i as the directory. 159 func (i *Inode) Lookup(ctx context.Context, name string) (*Dirent, error) { 160 if i.overlay != nil { 161 d, _, err := overlayLookup(ctx, i.overlay, i, name) 162 return d, err 163 } 164 return i.InodeOperations.Lookup(ctx, i, name) 165 } 166 167 // Create calls i.InodeOperations.Create with i as the directory. 168 func (i *Inode) Create(ctx context.Context, d *Dirent, name string, flags FileFlags, perm FilePermissions) (*File, error) { 169 if i.overlay != nil { 170 return overlayCreate(ctx, i.overlay, d, name, flags, perm) 171 } 172 return i.InodeOperations.Create(ctx, i, name, flags, perm) 173 } 174 175 // CreateDirectory calls i.InodeOperations.CreateDirectory with i as the directory. 176 func (i *Inode) CreateDirectory(ctx context.Context, d *Dirent, name string, perm FilePermissions) error { 177 if i.overlay != nil { 178 return overlayCreateDirectory(ctx, i.overlay, d, name, perm) 179 } 180 return i.InodeOperations.CreateDirectory(ctx, i, name, perm) 181 } 182 183 // CreateLink calls i.InodeOperations.CreateLink with i as the directory. 184 func (i *Inode) CreateLink(ctx context.Context, d *Dirent, oldname string, newname string) error { 185 if i.overlay != nil { 186 return overlayCreateLink(ctx, i.overlay, d, oldname, newname) 187 } 188 return i.InodeOperations.CreateLink(ctx, i, oldname, newname) 189 } 190 191 // CreateHardLink calls i.InodeOperations.CreateHardLink with i as the directory. 192 func (i *Inode) CreateHardLink(ctx context.Context, d *Dirent, target *Dirent, name string) error { 193 if i.overlay != nil { 194 return overlayCreateHardLink(ctx, i.overlay, d, target, name) 195 } 196 return i.InodeOperations.CreateHardLink(ctx, i, target.Inode, name) 197 } 198 199 // CreateFifo calls i.InodeOperations.CreateFifo with i as the directory. 200 func (i *Inode) CreateFifo(ctx context.Context, d *Dirent, name string, perm FilePermissions) error { 201 if i.overlay != nil { 202 return overlayCreateFifo(ctx, i.overlay, d, name, perm) 203 } 204 return i.InodeOperations.CreateFifo(ctx, i, name, perm) 205 } 206 207 // Remove calls i.InodeOperations.Remove/RemoveDirectory with i as the directory. 208 func (i *Inode) Remove(ctx context.Context, d *Dirent, remove *Dirent) error { 209 if i.overlay != nil { 210 return overlayRemove(ctx, i.overlay, d, remove) 211 } 212 switch remove.Inode.StableAttr.Type { 213 case Directory, SpecialDirectory: 214 return i.InodeOperations.RemoveDirectory(ctx, i, remove.name) 215 default: 216 return i.InodeOperations.Remove(ctx, i, remove.name) 217 } 218 } 219 220 // Rename calls i.InodeOperations.Rename with the given arguments. 221 func (i *Inode) Rename(ctx context.Context, oldParent *Dirent, renamed *Dirent, newParent *Dirent, newName string, replacement bool) error { 222 if i.overlay != nil { 223 return overlayRename(ctx, i.overlay, oldParent, renamed, newParent, newName, replacement) 224 } 225 return i.InodeOperations.Rename(ctx, renamed.Inode, oldParent.Inode, renamed.name, newParent.Inode, newName, replacement) 226 } 227 228 // Bind calls i.InodeOperations.Bind with i as the directory. 229 func (i *Inode) Bind(ctx context.Context, parent *Dirent, name string, data transport.BoundEndpoint, perm FilePermissions) (*Dirent, error) { 230 if i.overlay != nil { 231 return overlayBind(ctx, i.overlay, parent, name, data, perm) 232 } 233 return i.InodeOperations.Bind(ctx, i, name, data, perm) 234 } 235 236 // BoundEndpoint calls i.InodeOperations.BoundEndpoint with i as the Inode. 237 func (i *Inode) BoundEndpoint(path string) transport.BoundEndpoint { 238 if i.overlay != nil { 239 return overlayBoundEndpoint(i.overlay, path) 240 } 241 return i.InodeOperations.BoundEndpoint(i, path) 242 } 243 244 // GetFile calls i.InodeOperations.GetFile with the given arguments. 245 func (i *Inode) GetFile(ctx context.Context, d *Dirent, flags FileFlags) (*File, error) { 246 if i.overlay != nil { 247 return overlayGetFile(ctx, i.overlay, d, flags) 248 } 249 fsmetric.Opens.Increment() 250 return i.InodeOperations.GetFile(ctx, d, flags) 251 } 252 253 // UnstableAttr calls i.InodeOperations.UnstableAttr with i as the Inode. 254 func (i *Inode) UnstableAttr(ctx context.Context) (UnstableAttr, error) { 255 if i.overlay != nil { 256 return overlayUnstableAttr(ctx, i.overlay) 257 } 258 return i.InodeOperations.UnstableAttr(ctx, i) 259 } 260 261 // GetXattr calls i.InodeOperations.GetXattr with i as the Inode. 262 func (i *Inode) GetXattr(ctx context.Context, name string, size uint64) (string, error) { 263 if i.overlay != nil { 264 return overlayGetXattr(ctx, i.overlay, name, size) 265 } 266 return i.InodeOperations.GetXattr(ctx, i, name, size) 267 } 268 269 // SetXattr calls i.InodeOperations.SetXattr with i as the Inode. 270 func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, flags uint32) error { 271 if i.overlay != nil { 272 return overlaySetXattr(ctx, i.overlay, d, name, value, flags) 273 } 274 return i.InodeOperations.SetXattr(ctx, i, name, value, flags) 275 } 276 277 // ListXattr calls i.InodeOperations.ListXattr with i as the Inode. 278 func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) { 279 if i.overlay != nil { 280 return overlayListXattr(ctx, i.overlay, size) 281 } 282 return i.InodeOperations.ListXattr(ctx, i, size) 283 } 284 285 // RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode. 286 func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error { 287 if i.overlay != nil { 288 return overlayRemoveXattr(ctx, i.overlay, d, name) 289 } 290 return i.InodeOperations.RemoveXattr(ctx, i, name) 291 } 292 293 // CheckPermission will check if the caller may access this file in the 294 // requested way for reading, writing, or executing. 295 // 296 // CheckPermission is like Linux's fs/namei.c:inode_permission. It 297 // - checks file system mount flags, 298 // - and utilizes InodeOperations.Check to check capabilities and modes. 299 func (i *Inode) CheckPermission(ctx context.Context, p PermMask) error { 300 // First check the outer-most mounted filesystem. 301 if p.Write && i.MountSource.Flags.ReadOnly { 302 return linuxerr.EROFS 303 } 304 305 if i.overlay != nil { 306 // CheckPermission requires some special handling for 307 // an overlay. 308 // 309 // Writes will always be redirected to an upper filesystem, 310 // so ignore all lower layers being read-only. 311 // 312 // But still honor the upper-most filesystem's mount flags; 313 // we should not attempt to modify the writable layer if it 314 // is mounted read-only. 315 if p.Write && overlayUpperMountSource(i.MountSource).Flags.ReadOnly { 316 return linuxerr.EROFS 317 } 318 } 319 320 return i.check(ctx, p) 321 } 322 323 func (i *Inode) check(ctx context.Context, p PermMask) error { 324 if i.overlay != nil { 325 return overlayCheck(ctx, i.overlay, p) 326 } 327 if !i.InodeOperations.Check(ctx, i, p) { 328 return linuxerr.EACCES 329 } 330 return nil 331 } 332 333 // SetPermissions calls i.InodeOperations.SetPermissions with i as the Inode. 334 func (i *Inode) SetPermissions(ctx context.Context, d *Dirent, f FilePermissions) bool { 335 if i.overlay != nil { 336 return overlaySetPermissions(ctx, i.overlay, d, f) 337 } 338 return i.InodeOperations.SetPermissions(ctx, i, f) 339 } 340 341 // SetOwner calls i.InodeOperations.SetOwner with i as the Inode. 342 func (i *Inode) SetOwner(ctx context.Context, d *Dirent, o FileOwner) error { 343 if i.overlay != nil { 344 return overlaySetOwner(ctx, i.overlay, d, o) 345 } 346 return i.InodeOperations.SetOwner(ctx, i, o) 347 } 348 349 // SetTimestamps calls i.InodeOperations.SetTimestamps with i as the Inode. 350 func (i *Inode) SetTimestamps(ctx context.Context, d *Dirent, ts TimeSpec) error { 351 if i.overlay != nil { 352 return overlaySetTimestamps(ctx, i.overlay, d, ts) 353 } 354 return i.InodeOperations.SetTimestamps(ctx, i, ts) 355 } 356 357 // Truncate calls i.InodeOperations.Truncate with i as the Inode. 358 func (i *Inode) Truncate(ctx context.Context, d *Dirent, size int64) error { 359 if IsDir(i.StableAttr) { 360 return syserror.EISDIR 361 } 362 363 if i.overlay != nil { 364 return overlayTruncate(ctx, i.overlay, d, size) 365 } 366 i.appendMu.RLock() 367 defer i.appendMu.RUnlock() 368 return i.InodeOperations.Truncate(ctx, i, size) 369 } 370 371 // Allocate calls i.InodeOperations.Allocate with i as the Inode. 372 func (i *Inode) Allocate(ctx context.Context, d *Dirent, offset int64, length int64) error { 373 if i.overlay != nil { 374 return overlayAllocate(ctx, i.overlay, d, offset, length) 375 } 376 return i.InodeOperations.Allocate(ctx, i, offset, length) 377 } 378 379 // Readlink calls i.InodeOperations.Readlnk with i as the Inode. 380 func (i *Inode) Readlink(ctx context.Context) (string, error) { 381 if i.overlay != nil { 382 return overlayReadlink(ctx, i.overlay) 383 } 384 return i.InodeOperations.Readlink(ctx, i) 385 } 386 387 // Getlink calls i.InodeOperations.Getlink. 388 func (i *Inode) Getlink(ctx context.Context) (*Dirent, error) { 389 if i.overlay != nil { 390 return overlayGetlink(ctx, i.overlay) 391 } 392 return i.InodeOperations.Getlink(ctx, i) 393 } 394 395 // AddLink calls i.InodeOperations.AddLink. 396 func (i *Inode) AddLink() { 397 if i.overlay != nil { 398 // This interface is only used by ramfs to update metadata of 399 // children. These filesystems should _never_ have overlay 400 // Inodes cached as children. So explicitly disallow this 401 // scenario and avoid plumbing Dirents through to do copy up. 402 panic("overlay Inodes cached in ramfs directories are not supported") 403 } 404 i.InodeOperations.AddLink() 405 } 406 407 // DropLink calls i.InodeOperations.DropLink. 408 func (i *Inode) DropLink() { 409 if i.overlay != nil { 410 // Same as AddLink. 411 panic("overlay Inodes cached in ramfs directories are not supported") 412 } 413 i.InodeOperations.DropLink() 414 } 415 416 // IsVirtual calls i.InodeOperations.IsVirtual. 417 func (i *Inode) IsVirtual() bool { 418 if i.overlay != nil { 419 // An overlay configuration does not support virtual files. 420 return false 421 } 422 return i.InodeOperations.IsVirtual() 423 } 424 425 // StatFS calls i.InodeOperations.StatFS. 426 func (i *Inode) StatFS(ctx context.Context) (Info, error) { 427 if i.overlay != nil { 428 return overlayStatFS(ctx, i.overlay) 429 } 430 return i.InodeOperations.StatFS(ctx) 431 } 432 433 // CheckOwnership checks whether `ctx` owns this Inode or may act as its owner. 434 // Compare Linux's fs/inode.c:inode_owner_or_capable(). 435 func (i *Inode) CheckOwnership(ctx context.Context) bool { 436 uattr, err := i.UnstableAttr(ctx) 437 if err != nil { 438 return false 439 } 440 creds := auth.CredentialsFromContext(ctx) 441 if uattr.Owner.UID == creds.EffectiveKUID { 442 return true 443 } 444 if creds.HasCapability(linux.CAP_FOWNER) && creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() { 445 return true 446 } 447 return false 448 } 449 450 // CheckCapability checks whether `ctx` has capability `cp` with respect to 451 // operations on this Inode. 452 // 453 // Compare Linux's kernel/capability.c:capable_wrt_inode_uidgid(). 454 func (i *Inode) CheckCapability(ctx context.Context, cp linux.Capability) bool { 455 uattr, err := i.UnstableAttr(ctx) 456 if err != nil { 457 return false 458 } 459 creds := auth.CredentialsFromContext(ctx) 460 if !creds.UserNamespace.MapFromKUID(uattr.Owner.UID).Ok() { 461 return false 462 } 463 if !creds.UserNamespace.MapFromKGID(uattr.Owner.GID).Ok() { 464 return false 465 } 466 return creds.HasCapability(cp) 467 } 468 469 func (i *Inode) lockAppendMu(appendMode bool) func() { 470 if appendMode { 471 i.appendMu.Lock() 472 return i.appendMu.Unlock 473 } 474 i.appendMu.RLock() 475 return i.appendMu.RUnlock 476 }