github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/vfs/file_description.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "io" 19 20 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 21 "github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops" 22 "github.com/nicocha30/gvisor-ligolo/pkg/context" 23 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 24 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/lock" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsmetric" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/memmap" 29 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 30 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 31 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 32 ) 33 34 // A FileDescription represents an open file description, which is the entity 35 // referred to by a file descriptor (POSIX.1-2017 3.258 "Open File 36 // Description"). 37 // 38 // FileDescriptions are reference-counted. Unless otherwise specified, all 39 // FileDescription methods require that a reference is held. 40 // 41 // FileDescription is analogous to Linux's struct file. 42 // 43 // +stateify savable 44 type FileDescription struct { 45 FileDescriptionRefs 46 47 // flagsMu protects `statusFlags` and `asyncHandler` below. 48 flagsMu sync.Mutex `state:"nosave"` 49 50 // statusFlags contains status flags, "initialized by open(2) and possibly 51 // modified by fcntl()" - fcntl(2). statusFlags can be read using atomic 52 // memory operations when it does not need to be synchronized with an 53 // access to asyncHandler. 54 statusFlags atomicbitops.Uint32 55 56 // asyncHandler handles O_ASYNC signal generation. It is set with the 57 // F_SETOWN or F_SETOWN_EX fcntls. For asyncHandler to be used, O_ASYNC must 58 // also be set by fcntl(2). 59 asyncHandler FileAsync 60 61 // epolls is the set of epollInterests registered for this FileDescription. 62 // epolls is protected by epollMu. 63 epollMu epollMutex `state:"nosave"` 64 epolls map[*epollInterest]struct{} 65 66 // vd is the filesystem location at which this FileDescription was opened. 67 // A reference is held on vd. vd is immutable. 68 vd VirtualDentry 69 70 // opts contains options passed to FileDescription.Init(). opts is 71 // immutable. 72 opts FileDescriptionOptions 73 74 // readable is MayReadFileWithOpenFlags(statusFlags). readable is 75 // immutable. 76 // 77 // readable is analogous to Linux's FMODE_READ. 78 readable bool 79 80 // writable is MayWriteFileWithOpenFlags(statusFlags). If writable is true, 81 // the FileDescription holds a write count on vd.mount. writable is 82 // immutable. 83 // 84 // writable is analogous to Linux's FMODE_WRITE. 85 writable bool 86 87 usedLockBSD atomicbitops.Uint32 88 89 // impl is the FileDescriptionImpl associated with this Filesystem. impl is 90 // immutable. This should be the last field in FileDescription. 91 impl FileDescriptionImpl 92 } 93 94 // FileDescriptionOptions contains options to FileDescription.Init(). 95 // 96 // +stateify savable 97 type FileDescriptionOptions struct { 98 // If AllowDirectIO is true, allow O_DIRECT to be set on the file. 99 AllowDirectIO bool 100 101 // If DenyPRead is true, calls to FileDescription.PRead() return ESPIPE. 102 DenyPRead bool 103 104 // If DenyPWrite is true, calls to FileDescription.PWrite() return 105 // ESPIPE. 106 DenyPWrite bool 107 108 // If UseDentryMetadata is true, calls to FileDescription methods that 109 // interact with file and filesystem metadata (Stat, SetStat, StatFS, 110 // ListXattr, GetXattr, SetXattr, RemoveXattr) are implemented by calling 111 // the corresponding FilesystemImpl methods instead of the corresponding 112 // FileDescriptionImpl methods. 113 // 114 // UseDentryMetadata is intended for file descriptions that are implemented 115 // outside of individual filesystems, such as pipes, sockets, and device 116 // special files. FileDescriptions for which UseDentryMetadata is true may 117 // embed DentryMetadataFileDescriptionImpl to obtain appropriate 118 // implementations of FileDescriptionImpl methods that should not be 119 // called. 120 UseDentryMetadata bool 121 122 // If DenySpliceIn is true, splice into descriptor isn't allowed. 123 DenySpliceIn bool 124 } 125 126 // FileCreationFlags are the set of flags passed to FileDescription.Init() but 127 // omitted from FileDescription.StatusFlags(). 128 const FileCreationFlags = linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC 129 130 // Init must be called before first use of fd. If it succeeds, it takes 131 // references on mnt and d. flags is the initial file description flags, which 132 // is usually the full set of flags passed to open(2). 133 func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error { 134 writable := MayWriteFileWithOpenFlags(flags) 135 if writable { 136 if err := mnt.CheckBeginWrite(); err != nil { 137 return err 138 } 139 } 140 141 fd.InitRefs() 142 143 // Remove "file creation flags" to mirror the behavior from file.f_flags in 144 // fs/open.c:do_dentry_open. 145 fd.statusFlags = atomicbitops.FromUint32(flags &^ FileCreationFlags) 146 fd.vd = VirtualDentry{ 147 mount: mnt, 148 dentry: d, 149 } 150 mnt.IncRef() 151 d.IncRef() 152 fd.opts = *opts 153 fd.readable = MayReadFileWithOpenFlags(flags) 154 fd.writable = writable 155 fd.impl = impl 156 return nil 157 } 158 159 // DecRef decrements fd's reference count. 160 func (fd *FileDescription) DecRef(ctx context.Context) { 161 fd.FileDescriptionRefs.DecRef(func() { 162 // Generate inotify events. 163 ev := uint32(linux.IN_CLOSE_NOWRITE) 164 if fd.IsWritable() { 165 ev = linux.IN_CLOSE_WRITE 166 } 167 fd.Dentry().InotifyWithParent(ctx, ev, 0, PathEvent) 168 169 // Unregister fd from all epoll instances. 170 fd.epollMu.Lock() 171 epolls := fd.epolls 172 fd.epolls = nil 173 fd.epollMu.Unlock() 174 for epi := range epolls { 175 ep := epi.epoll 176 ep.interestMu.Lock() 177 // Check that epi has not been concurrently unregistered by 178 // EpollInstance.DeleteInterest() or EpollInstance.Release(). 179 if _, ok := ep.interest[epi.key]; ok { 180 fd.EventUnregister(&epi.waiter) 181 ep.removeLocked(epi) 182 } 183 ep.interestMu.Unlock() 184 } 185 186 // If BSD locks were used, release any lock that it may have acquired. 187 if fd.usedLockBSD.Load() != 0 { 188 fd.impl.UnlockBSD(context.Background(), fd) 189 } 190 191 // Unlock any OFD locks. 192 if fd.impl.SupportsLocks() { 193 fd.impl.UnlockPOSIX(ctx, fd, lock.LockRange{0, lock.LockEOF}) 194 } 195 196 // Release implementation resources. 197 fd.impl.Release(ctx) 198 if fd.writable { 199 fd.vd.mount.EndWrite() 200 } 201 fd.vd.DecRef(ctx) 202 fd.flagsMu.Lock() 203 if fd.statusFlags.RacyLoad()&linux.O_ASYNC != 0 && fd.asyncHandler != nil { 204 fd.impl.UnregisterFileAsyncHandler(fd) 205 } 206 fd.asyncHandler = nil 207 fd.flagsMu.Unlock() 208 }) 209 } 210 211 // Mount returns the mount on which fd was opened. It does not take a reference 212 // on the returned Mount. 213 func (fd *FileDescription) Mount() *Mount { 214 return fd.vd.mount 215 } 216 217 // Dentry returns the dentry at which fd was opened. It does not take a 218 // reference on the returned Dentry. 219 func (fd *FileDescription) Dentry() *Dentry { 220 return fd.vd.dentry 221 } 222 223 // VirtualDentry returns the location at which fd was opened. It does not take 224 // a reference on the returned VirtualDentry. 225 func (fd *FileDescription) VirtualDentry() VirtualDentry { 226 return fd.vd 227 } 228 229 // Options returns the options passed to fd.Init(). 230 func (fd *FileDescription) Options() FileDescriptionOptions { 231 return fd.opts 232 } 233 234 // StatusFlags returns file description status flags, as for fcntl(F_GETFL). 235 func (fd *FileDescription) StatusFlags() uint32 { 236 return fd.statusFlags.Load() 237 } 238 239 // SetStatusFlags sets file description status flags, as for fcntl(F_SETFL). 240 func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Credentials, flags uint32) error { 241 // Compare Linux's fs/fcntl.c:setfl(). 242 oldFlags := fd.StatusFlags() 243 // Linux documents this check as "O_APPEND cannot be cleared if the file is 244 // marked as append-only and the file is open for write", which would make 245 // sense. However, the check as actually implemented seems to be "O_APPEND 246 // cannot be changed if the file is marked as append-only". 247 if (flags^oldFlags)&linux.O_APPEND != 0 { 248 stat, err := fd.Stat(ctx, StatOptions{ 249 // There is no mask bit for stx_attributes. 250 Mask: 0, 251 // Linux just reads inode::i_flags directly. 252 Sync: linux.AT_STATX_DONT_SYNC, 253 }) 254 if err != nil { 255 return err 256 } 257 if (stat.AttributesMask&linux.STATX_ATTR_APPEND != 0) && (stat.Attributes&linux.STATX_ATTR_APPEND != 0) { 258 return linuxerr.EPERM 259 } 260 } 261 if (flags&linux.O_NOATIME != 0) && (oldFlags&linux.O_NOATIME == 0) { 262 stat, err := fd.Stat(ctx, StatOptions{ 263 Mask: linux.STATX_UID, 264 // Linux's inode_owner_or_capable() just reads inode::i_uid 265 // directly. 266 Sync: linux.AT_STATX_DONT_SYNC, 267 }) 268 if err != nil { 269 return err 270 } 271 if stat.Mask&linux.STATX_UID == 0 { 272 return linuxerr.EPERM 273 } 274 if !CanActAsOwner(creds, auth.KUID(stat.UID)) { 275 return linuxerr.EPERM 276 } 277 } 278 if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO { 279 return linuxerr.EINVAL 280 } 281 // TODO(gvisor.dev/issue/1035): FileDescriptionImpl.SetOAsync()? 282 const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK 283 fd.flagsMu.Lock() 284 defer fd.flagsMu.Unlock() 285 if fd.asyncHandler != nil { 286 // Use fd.statusFlags instead of oldFlags, which may have become outdated, 287 // to avoid double registering/unregistering. 288 if fd.statusFlags.RacyLoad()&linux.O_ASYNC == 0 && flags&linux.O_ASYNC != 0 { 289 if err := fd.impl.RegisterFileAsyncHandler(fd); err != nil { 290 return err 291 } 292 } else if fd.statusFlags.RacyLoad()&linux.O_ASYNC != 0 && flags&linux.O_ASYNC == 0 { 293 fd.impl.UnregisterFileAsyncHandler(fd) 294 } 295 } 296 fd.statusFlags.Store((oldFlags &^ settableFlags) | (flags & settableFlags)) 297 return nil 298 } 299 300 // IsReadable returns true if fd was opened for reading. 301 func (fd *FileDescription) IsReadable() bool { 302 return fd.readable 303 } 304 305 // IsWritable returns true if fd was opened for writing. 306 func (fd *FileDescription) IsWritable() bool { 307 return fd.writable 308 } 309 310 // Impl returns the FileDescriptionImpl associated with fd. 311 func (fd *FileDescription) Impl() FileDescriptionImpl { 312 return fd.impl 313 } 314 315 // FileDescriptionImpl contains implementation details for an FileDescription. 316 // Implementations of FileDescriptionImpl should contain their associated 317 // FileDescription by value as their first field. 318 // 319 // For all functions that return linux.Statx, Statx.Uid and Statx.Gid will 320 // be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID and 321 // auth.KGID respectively). 322 // 323 // All methods may return errors not specified. 324 // 325 // FileDescriptionImpl is analogous to Linux's struct file_operations. 326 type FileDescriptionImpl interface { 327 // Release is called when the associated FileDescription reaches zero 328 // references. 329 Release(ctx context.Context) 330 331 // OnClose is called when a file descriptor representing the 332 // FileDescription is closed. Note that returning a non-nil error does not 333 // prevent the file descriptor from being closed. 334 OnClose(ctx context.Context) error 335 336 // Stat returns metadata for the file represented by the FileDescription. 337 Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) 338 339 // SetStat updates metadata for the file represented by the 340 // FileDescription. Implementations are responsible for checking if the 341 // operation can be performed (see vfs.CheckSetStat() for common checks). 342 SetStat(ctx context.Context, opts SetStatOptions) error 343 344 // StatFS returns metadata for the filesystem containing the file 345 // represented by the FileDescription. 346 StatFS(ctx context.Context) (linux.Statfs, error) 347 348 // Allocate grows the file to offset + length bytes. 349 // Only mode == 0 is supported currently. 350 // 351 // Allocate should return EISDIR on directories, ESPIPE on pipes, and ENODEV on 352 // other files where it is not supported. 353 // 354 // Preconditions: The FileDescription was opened for writing. 355 Allocate(ctx context.Context, mode, offset, length uint64) error 356 357 // waiter.Waitable methods may be used to poll for I/O events. 358 waiter.Waitable 359 360 // Epollable indicates whether this file can be used with epoll_ctl(2). 361 Epollable() bool 362 363 // PRead reads from the file into dst, starting at the given offset, and 364 // returns the number of bytes read. PRead is permitted to return partial 365 // reads with a nil error. 366 // 367 // Errors: 368 // 369 // - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP. 370 // 371 // Preconditions: 372 // * The FileDescription was opened for reading. 373 // * FileDescriptionOptions.DenyPRead == false. 374 PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) 375 376 // Read is similar to PRead, but does not specify an offset. 377 // 378 // For files with an implicit FileDescription offset (e.g. regular files), 379 // Read begins at the FileDescription offset, and advances the offset by 380 // the number of bytes read; note that POSIX 2.9.7 "Thread Interactions 381 // with Regular File Operations" requires that all operations that may 382 // mutate the FileDescription offset are serialized. 383 // 384 // Errors: 385 // 386 // - If opts.Flags specifies unsupported options, Read returns EOPNOTSUPP. 387 // 388 // Preconditions: The FileDescription was opened for reading. 389 Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) 390 391 // PWrite writes src to the file, starting at the given offset, and returns 392 // the number of bytes written. PWrite is permitted to return partial 393 // writes with a nil error. 394 // 395 // As in Linux (but not POSIX), if O_APPEND is in effect for the 396 // FileDescription, PWrite should ignore the offset and append data to the 397 // end of the file. 398 // 399 // Errors: 400 // 401 // - If opts.Flags specifies unsupported options, PWrite returns 402 // EOPNOTSUPP. 403 // 404 // Preconditions: 405 // * The FileDescription was opened for writing. 406 // * FileDescriptionOptions.DenyPWrite == false. 407 PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) 408 409 // Write is similar to PWrite, but does not specify an offset, which is 410 // implied as for Read. 411 // 412 // Write is a FileDescriptionImpl method, instead of a wrapper around 413 // PWrite that uses a FileDescription offset, to make it possible for 414 // remote filesystems to implement O_APPEND correctly (i.e. atomically with 415 // respect to writers outside the scope of VFS). 416 // 417 // Errors: 418 // 419 // - If opts.Flags specifies unsupported options, Write returns EOPNOTSUPP. 420 // 421 // Preconditions: The FileDescription was opened for writing. 422 Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) 423 424 // IterDirents invokes cb on each entry in the directory represented by the 425 // FileDescription. If IterDirents has been called since the last call to 426 // Seek, it continues iteration from the end of the last call. 427 IterDirents(ctx context.Context, cb IterDirentsCallback) error 428 429 // Seek changes the FileDescription offset (assuming one exists) and 430 // returns its new value. 431 // 432 // For directories, if whence == SEEK_SET and offset == 0, the caller is 433 // rewinddir(), such that Seek "shall also cause the directory stream to 434 // refer to the current state of the corresponding directory" - 435 // POSIX.1-2017. 436 Seek(ctx context.Context, offset int64, whence int32) (int64, error) 437 438 // Sync requests that cached state associated with the file represented by 439 // the FileDescription is synchronized with persistent storage, and blocks 440 // until this is complete. 441 Sync(ctx context.Context) error 442 443 // ConfigureMMap mutates opts to implement mmap(2) for the file. Most 444 // implementations that support memory mapping can call 445 // GenericConfigureMMap with the appropriate memmap.Mappable. 446 ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error 447 448 // Ioctl implements the ioctl(2) syscall. 449 Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) 450 451 // ListXattr returns all extended attribute names for the file. 452 ListXattr(ctx context.Context, size uint64) ([]string, error) 453 454 // GetXattr returns the value associated with the given extended attribute 455 // for the file. 456 GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) 457 458 // SetXattr changes the value associated with the given extended attribute 459 // for the file. 460 SetXattr(ctx context.Context, opts SetXattrOptions) error 461 462 // RemoveXattr removes the given extended attribute from the file. 463 RemoveXattr(ctx context.Context, name string) error 464 465 // SupportsLocks indicates whether file locks are supported. 466 SupportsLocks() bool 467 468 // LockBSD tries to acquire a BSD-style advisory file lock. 469 LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block bool) error 470 471 // UnlockBSD releases a BSD-style advisory file lock. 472 UnlockBSD(ctx context.Context, uid lock.UniqueID) error 473 474 // LockPOSIX tries to acquire a POSIX-style advisory file lock. 475 LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block bool) error 476 477 // UnlockPOSIX releases a POSIX-style advisory file lock. 478 UnlockPOSIX(ctx context.Context, uid lock.UniqueID, ComputeLockRange lock.LockRange) error 479 480 // TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl. 481 TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) 482 483 RegisterFileAsyncHandler(fd *FileDescription) error 484 UnregisterFileAsyncHandler(fd *FileDescription) 485 } 486 487 // Dirent holds the information contained in struct linux_dirent64. 488 // 489 // +stateify savable 490 type Dirent struct { 491 // Name is the filename. 492 Name string 493 494 // Type is the file type, a linux.DT_* constant. 495 Type uint8 496 497 // Ino is the inode number. 498 Ino uint64 499 500 // NextOff is the offset of the *next* Dirent in the directory; that is, 501 // FileDescription.Seek(NextOff, SEEK_SET) (as called by seekdir(3)) will 502 // cause the next call to FileDescription.IterDirents() to yield the next 503 // Dirent. (The offset of the first Dirent in a directory is always 0.) 504 NextOff int64 505 } 506 507 // IterDirentsCallback receives Dirents from FileDescriptionImpl.IterDirents. 508 type IterDirentsCallback interface { 509 // Handle handles the given iterated Dirent. If Handle returns a non-nil 510 // error, FileDescriptionImpl.IterDirents must stop iteration and return 511 // the error; the next call to FileDescriptionImpl.IterDirents should 512 // restart with the same Dirent. 513 Handle(dirent Dirent) error 514 } 515 516 // IterDirentsCallbackFunc implements IterDirentsCallback for a function with 517 // the semantics of IterDirentsCallback.Handle. 518 type IterDirentsCallbackFunc func(dirent Dirent) error 519 520 // Handle implements IterDirentsCallback.Handle. 521 func (f IterDirentsCallbackFunc) Handle(dirent Dirent) error { 522 return f(dirent) 523 } 524 525 // OnClose is called when a file descriptor representing the FileDescription is 526 // closed. Returning a non-nil error should not prevent the file descriptor 527 // from being closed. 528 func (fd *FileDescription) OnClose(ctx context.Context) error { 529 return fd.impl.OnClose(ctx) 530 } 531 532 // Stat returns metadata for the file represented by fd. 533 func (fd *FileDescription) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { 534 if fd.opts.UseDentryMetadata { 535 vfsObj := fd.vd.mount.vfs 536 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 537 Root: fd.vd, 538 Start: fd.vd, 539 }) 540 stat, err := fd.vd.mount.fs.impl.StatAt(ctx, rp, opts) 541 rp.Release(ctx) 542 return stat, err 543 } 544 return fd.impl.Stat(ctx, opts) 545 } 546 547 // SetStat updates metadata for the file represented by fd. 548 func (fd *FileDescription) SetStat(ctx context.Context, opts SetStatOptions) error { 549 if fd.opts.UseDentryMetadata { 550 vfsObj := fd.vd.mount.vfs 551 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 552 Root: fd.vd, 553 Start: fd.vd, 554 }) 555 err := fd.vd.mount.fs.impl.SetStatAt(ctx, rp, opts) 556 rp.Release(ctx) 557 return err 558 } 559 if err := fd.impl.SetStat(ctx, opts); err != nil { 560 return err 561 } 562 if ev := InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { 563 fd.Dentry().InotifyWithParent(ctx, ev, 0, InodeEvent) 564 } 565 return nil 566 } 567 568 // StatFS returns metadata for the filesystem containing the file represented 569 // by fd. 570 func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { 571 if fd.opts.UseDentryMetadata { 572 vfsObj := fd.vd.mount.vfs 573 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 574 Root: fd.vd, 575 Start: fd.vd, 576 }) 577 statfs, err := fd.vd.mount.fs.impl.StatFSAt(ctx, rp) 578 rp.Release(ctx) 579 return statfs, err 580 } 581 return fd.impl.StatFS(ctx) 582 } 583 584 // Allocate grows file represented by FileDescription to offset + length bytes. 585 func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { 586 if !fd.IsWritable() { 587 return linuxerr.EBADF 588 } 589 if err := fd.impl.Allocate(ctx, mode, offset, length); err != nil { 590 return err 591 } 592 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 593 return nil 594 } 595 596 // Readiness implements waiter.Waitable.Readiness. 597 // 598 // It returns fd's I/O readiness. 599 func (fd *FileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { 600 return fd.impl.Readiness(mask) 601 } 602 603 // EventRegister implements waiter.Waitable.EventRegister. 604 // 605 // It registers e for I/O readiness events in mask. 606 func (fd *FileDescription) EventRegister(e *waiter.Entry) error { 607 return fd.impl.EventRegister(e) 608 } 609 610 // EventUnregister implements waiter.Waitable.EventUnregister. 611 // 612 // It unregisters e for I/O readiness events. 613 func (fd *FileDescription) EventUnregister(e *waiter.Entry) { 614 fd.impl.EventUnregister(e) 615 } 616 617 // Epollable returns whether this file can be used with epoll_ctl(2). 618 func (fd *FileDescription) Epollable() bool { 619 return fd.impl.Epollable() 620 } 621 622 // PRead reads from the file represented by fd into dst, starting at the given 623 // offset, and returns the number of bytes read. PRead is permitted to return 624 // partial reads with a nil error. 625 func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { 626 if fd.opts.DenyPRead { 627 return 0, linuxerr.ESPIPE 628 } 629 if !fd.readable { 630 return 0, linuxerr.EBADF 631 } 632 start := fsmetric.StartReadWait() 633 n, err := fd.impl.PRead(ctx, dst, offset, opts) 634 if n > 0 { 635 fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent) 636 } 637 fsmetric.Reads.Increment() 638 fsmetric.FinishReadWait(fsmetric.ReadWait, start) 639 return n, err 640 } 641 642 // Read is similar to PRead, but does not specify an offset. 643 func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { 644 if !fd.readable { 645 return 0, linuxerr.EBADF 646 } 647 start := fsmetric.StartReadWait() 648 n, err := fd.impl.Read(ctx, dst, opts) 649 if n > 0 { 650 fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent) 651 } 652 fsmetric.Reads.Increment() 653 fsmetric.FinishReadWait(fsmetric.ReadWait, start) 654 return n, err 655 } 656 657 // PWrite writes src to the file represented by fd, starting at the given 658 // offset, and returns the number of bytes written. PWrite is permitted to 659 // return partial writes with a nil error. 660 func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { 661 if fd.opts.DenyPWrite { 662 return 0, linuxerr.ESPIPE 663 } 664 if !fd.writable { 665 return 0, linuxerr.EBADF 666 } 667 n, err := fd.impl.PWrite(ctx, src, offset, opts) 668 if n > 0 { 669 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 670 } 671 return n, err 672 } 673 674 // Write is similar to PWrite, but does not specify an offset. 675 func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { 676 if !fd.writable { 677 return 0, linuxerr.EBADF 678 } 679 n, err := fd.impl.Write(ctx, src, opts) 680 if n > 0 { 681 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 682 } 683 return n, err 684 } 685 686 // IterDirents invokes cb on each entry in the directory represented by fd. If 687 // IterDirents has been called since the last call to Seek, it continues 688 // iteration from the end of the last call. 689 func (fd *FileDescription) IterDirents(ctx context.Context, cb IterDirentsCallback) error { 690 defer fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent) 691 return fd.impl.IterDirents(ctx, cb) 692 } 693 694 // Seek changes fd's offset (assuming one exists) and returns its new value. 695 func (fd *FileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 696 return fd.impl.Seek(ctx, offset, whence) 697 } 698 699 // Sync has the semantics of fsync(2). 700 func (fd *FileDescription) Sync(ctx context.Context) error { 701 return fd.impl.Sync(ctx) 702 } 703 704 // ConfigureMMap mutates opts to implement mmap(2) for the file represented by 705 // fd. 706 func (fd *FileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 707 return fd.impl.ConfigureMMap(ctx, opts) 708 } 709 710 // Ioctl implements the ioctl(2) syscall. 711 func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 712 return fd.impl.Ioctl(ctx, uio, sysno, args) 713 } 714 715 // ListXattr returns all extended attribute names for the file represented by 716 // fd. 717 // 718 // If the size of the list (including a NUL terminating byte after every entry) 719 // would exceed size, ERANGE may be returned. Note that implementations 720 // are free to ignore size entirely and return without error). In all cases, 721 // if size is 0, the list should be returned without error, regardless of size. 722 func (fd *FileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) { 723 if fd.opts.UseDentryMetadata { 724 vfsObj := fd.vd.mount.vfs 725 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 726 Root: fd.vd, 727 Start: fd.vd, 728 }) 729 names, err := fd.vd.mount.fs.impl.ListXattrAt(ctx, rp, size) 730 rp.Release(ctx) 731 return names, err 732 } 733 names, err := fd.impl.ListXattr(ctx, size) 734 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 735 // Linux doesn't actually return EOPNOTSUPP in this case; instead, 736 // fs/xattr.c:vfs_listxattr() falls back to allowing the security 737 // subsystem to return security extended attributes, which by default 738 // don't exist. 739 return nil, nil 740 } 741 return names, err 742 } 743 744 // GetXattr returns the value associated with the given extended attribute for 745 // the file represented by fd. 746 // 747 // If the size of the return value exceeds opts.Size, ERANGE may be returned 748 // (note that implementations are free to ignore opts.Size entirely and return 749 // without error). In all cases, if opts.Size is 0, the value should be 750 // returned without error, regardless of size. 751 func (fd *FileDescription) GetXattr(ctx context.Context, opts *GetXattrOptions) (string, error) { 752 if fd.opts.UseDentryMetadata { 753 vfsObj := fd.vd.mount.vfs 754 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 755 Root: fd.vd, 756 Start: fd.vd, 757 }) 758 val, err := fd.vd.mount.fs.impl.GetXattrAt(ctx, rp, *opts) 759 rp.Release(ctx) 760 return val, err 761 } 762 return fd.impl.GetXattr(ctx, *opts) 763 } 764 765 // SetXattr changes the value associated with the given extended attribute for 766 // the file represented by fd. 767 func (fd *FileDescription) SetXattr(ctx context.Context, opts *SetXattrOptions) error { 768 if fd.opts.UseDentryMetadata { 769 vfsObj := fd.vd.mount.vfs 770 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 771 Root: fd.vd, 772 Start: fd.vd, 773 }) 774 err := fd.vd.mount.fs.impl.SetXattrAt(ctx, rp, *opts) 775 rp.Release(ctx) 776 return err 777 } 778 if err := fd.impl.SetXattr(ctx, *opts); err != nil { 779 return err 780 } 781 fd.Dentry().InotifyWithParent(ctx, linux.IN_ATTRIB, 0, InodeEvent) 782 return nil 783 } 784 785 // RemoveXattr removes the given extended attribute from the file represented 786 // by fd. 787 func (fd *FileDescription) RemoveXattr(ctx context.Context, name string) error { 788 if fd.opts.UseDentryMetadata { 789 vfsObj := fd.vd.mount.vfs 790 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 791 Root: fd.vd, 792 Start: fd.vd, 793 }) 794 err := fd.vd.mount.fs.impl.RemoveXattrAt(ctx, rp, name) 795 rp.Release(ctx) 796 return err 797 } 798 if err := fd.impl.RemoveXattr(ctx, name); err != nil { 799 return err 800 } 801 fd.Dentry().InotifyWithParent(ctx, linux.IN_ATTRIB, 0, InodeEvent) 802 return nil 803 } 804 805 // SyncFS instructs the filesystem containing fd to execute the semantics of 806 // syncfs(2). 807 func (fd *FileDescription) SyncFS(ctx context.Context) error { 808 return fd.vd.mount.fs.impl.Sync(ctx) 809 } 810 811 // MappedName implements memmap.MappingIdentity.MappedName. 812 func (fd *FileDescription) MappedName(ctx context.Context) string { 813 vfsroot := RootFromContext(ctx) 814 s, _ := fd.vd.mount.vfs.PathnameWithDeleted(ctx, vfsroot, fd.vd) 815 if vfsroot.Ok() { 816 vfsroot.DecRef(ctx) 817 } 818 return s 819 } 820 821 // DeviceID implements memmap.MappingIdentity.DeviceID. 822 func (fd *FileDescription) DeviceID() uint64 { 823 stat, err := fd.Stat(context.Background(), StatOptions{ 824 // There is no STATX_DEV; we assume that Stat will return it if it's 825 // available regardless of mask. 826 Mask: 0, 827 // fs/proc/task_mmu.c:show_map_vma() just reads inode::i_sb->s_dev 828 // directly. 829 Sync: linux.AT_STATX_DONT_SYNC, 830 }) 831 if err != nil { 832 return 0 833 } 834 return uint64(linux.MakeDeviceID(uint16(stat.DevMajor), stat.DevMinor)) 835 } 836 837 // InodeID implements memmap.MappingIdentity.InodeID. 838 func (fd *FileDescription) InodeID() uint64 { 839 stat, err := fd.Stat(context.Background(), StatOptions{ 840 Mask: linux.STATX_INO, 841 // fs/proc/task_mmu.c:show_map_vma() just reads inode::i_ino directly. 842 Sync: linux.AT_STATX_DONT_SYNC, 843 }) 844 if err != nil || stat.Mask&linux.STATX_INO == 0 { 845 return 0 846 } 847 return stat.Ino 848 } 849 850 // Msync implements memmap.MappingIdentity.Msync. 851 func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) error { 852 return fd.Sync(ctx) 853 } 854 855 // SupportsLocks indicates whether file locks are supported. 856 func (fd *FileDescription) SupportsLocks() bool { 857 return fd.impl.SupportsLocks() 858 } 859 860 // LockBSD tries to acquire a BSD-style advisory file lock. 861 func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, block bool) error { 862 fd.usedLockBSD.Store(1) 863 return fd.impl.LockBSD(ctx, fd, ownerPID, lockType, block) 864 } 865 866 // UnlockBSD releases a BSD-style advisory file lock. 867 func (fd *FileDescription) UnlockBSD(ctx context.Context) error { 868 return fd.impl.UnlockBSD(ctx, fd) 869 } 870 871 // LockPOSIX locks a POSIX-style file range lock. 872 func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block bool) error { 873 return fd.impl.LockPOSIX(ctx, uid, ownerPID, t, r, block) 874 } 875 876 // UnlockPOSIX unlocks a POSIX-style file range lock. 877 func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, r lock.LockRange) error { 878 return fd.impl.UnlockPOSIX(ctx, uid, r) 879 } 880 881 // TestPOSIX returns information about whether the specified lock can be held. 882 func (fd *FileDescription) TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) { 883 return fd.impl.TestPOSIX(ctx, uid, t, r) 884 } 885 886 // ComputeLockRange computes the range of a file lock based on the given values. 887 func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, length uint64, whence int16) (lock.LockRange, error) { 888 var off int64 889 switch whence { 890 case linux.SEEK_SET: 891 off = 0 892 case linux.SEEK_CUR: 893 // Note that Linux does not hold any mutexes while retrieving the file 894 // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk. 895 curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR) 896 if err != nil { 897 return lock.LockRange{}, err 898 } 899 off = curOff 900 case linux.SEEK_END: 901 stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE}) 902 if err != nil { 903 return lock.LockRange{}, err 904 } 905 off = int64(stat.Size) 906 default: 907 return lock.LockRange{}, linuxerr.EINVAL 908 } 909 910 return lock.ComputeRange(int64(start), int64(length), off) 911 } 912 913 // ReadFull read all contents from the file. 914 func (fd *FileDescription) ReadFull(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { 915 var total int64 916 for dst.NumBytes() > 0 { 917 n, err := fd.PRead(ctx, dst, offset+total, ReadOptions{}) 918 total += n 919 if err == io.EOF && total != 0 { 920 return total, io.ErrUnexpectedEOF 921 } else if err != nil { 922 return total, err 923 } 924 dst = dst.DropFirst64(n) 925 } 926 return total, nil 927 } 928 929 // A FileAsync sends signals to its owner when w is ready for IO. This is only 930 // implemented by pkg/sentry/fasync:FileAsync, but we unfortunately need this 931 // interface to avoid circular dependencies. 932 type FileAsync interface { 933 Register(w waiter.Waitable) error 934 Unregister(w waiter.Waitable) 935 } 936 937 // AsyncHandler returns the FileAsync for fd. 938 func (fd *FileDescription) AsyncHandler() FileAsync { 939 fd.flagsMu.Lock() 940 defer fd.flagsMu.Unlock() 941 return fd.asyncHandler 942 } 943 944 // SetAsyncHandler sets fd.asyncHandler if it has not been set before and 945 // returns it. 946 func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) (FileAsync, error) { 947 fd.flagsMu.Lock() 948 defer fd.flagsMu.Unlock() 949 if fd.asyncHandler == nil { 950 fd.asyncHandler = newHandler() 951 if fd.statusFlags.RacyLoad()&linux.O_ASYNC != 0 { 952 if err := fd.impl.RegisterFileAsyncHandler(fd); err != nil { 953 return nil, err 954 } 955 } 956 } 957 return fd.asyncHandler, nil 958 } 959 960 // CopyRegularFileData copies data from srcFD to dstFD until reading from srcFD 961 // returns EOF or an error. It returns the number of bytes copied. 962 func CopyRegularFileData(ctx context.Context, dstFD, srcFD *FileDescription) (int64, error) { 963 done := int64(0) 964 buf := usermem.BytesIOSequence(make([]byte, 32*1024)) // arbitrary buffer size 965 for { 966 readN, readErr := srcFD.Read(ctx, buf, ReadOptions{}) 967 if readErr != nil && readErr != io.EOF { 968 return done, readErr 969 } 970 src := buf.TakeFirst64(readN) 971 for src.NumBytes() != 0 { 972 writeN, writeErr := dstFD.Write(ctx, src, WriteOptions{}) 973 done += writeN 974 src = src.DropFirst64(writeN) 975 if writeErr != nil { 976 return done, writeErr 977 } 978 } 979 if readErr == io.EOF { 980 return done, nil 981 } 982 } 983 }