github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/vfs/file_description.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "io" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/sentry/arch" 25 "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 26 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 27 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 28 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 29 "github.com/SagerNet/gvisor/pkg/sync" 30 "github.com/SagerNet/gvisor/pkg/usermem" 31 "github.com/SagerNet/gvisor/pkg/waiter" 32 ) 33 34 // A FileDescription represents an open file description, which is the entity 35 // referred to by a file descriptor (POSIX.1-2017 3.258 "Open File 36 // Description"). 37 // 38 // FileDescriptions are reference-counted. Unless otherwise specified, all 39 // FileDescription methods require that a reference is held. 40 // 41 // FileDescription is analogous to Linux's struct file. 42 // 43 // +stateify savable 44 type FileDescription struct { 45 FileDescriptionRefs 46 47 // flagsMu protects `statusFlags`, `saved`, and `asyncHandler` below. 48 flagsMu sync.Mutex `state:"nosave"` 49 50 // statusFlags contains status flags, "initialized by open(2) and possibly 51 // modified by fcntl()" - fcntl(2). statusFlags can be read using atomic 52 // memory operations when it does not need to be synchronized with an 53 // access to asyncHandler. 54 statusFlags uint32 55 56 // saved is true after beforeSave is called. This is used to prevent 57 // double-unregistration of asyncHandler. This does not work properly for 58 // save-resume, which is not currently supported in gVisor (see b/26588733). 59 saved bool `state:"nosave"` 60 61 // asyncHandler handles O_ASYNC signal generation. It is set with the 62 // F_SETOWN or F_SETOWN_EX fcntls. For asyncHandler to be used, O_ASYNC must 63 // also be set by fcntl(2). 64 asyncHandler FileAsync 65 66 // epolls is the set of epollInterests registered for this FileDescription. 67 // epolls is protected by epollMu. 68 epollMu sync.Mutex `state:"nosave"` 69 epolls map[*epollInterest]struct{} 70 71 // vd is the filesystem location at which this FileDescription was opened. 72 // A reference is held on vd. vd is immutable. 73 vd VirtualDentry 74 75 // opts contains options passed to FileDescription.Init(). opts is 76 // immutable. 77 opts FileDescriptionOptions 78 79 // readable is MayReadFileWithOpenFlags(statusFlags). readable is 80 // immutable. 81 // 82 // readable is analogous to Linux's FMODE_READ. 83 readable bool 84 85 // writable is MayWriteFileWithOpenFlags(statusFlags). If writable is true, 86 // the FileDescription holds a write count on vd.mount. writable is 87 // immutable. 88 // 89 // writable is analogous to Linux's FMODE_WRITE. 90 writable bool 91 92 usedLockBSD uint32 93 94 // impl is the FileDescriptionImpl associated with this Filesystem. impl is 95 // immutable. This should be the last field in FileDescription. 96 impl FileDescriptionImpl 97 } 98 99 // FileDescriptionOptions contains options to FileDescription.Init(). 100 // 101 // +stateify savable 102 type FileDescriptionOptions struct { 103 // If AllowDirectIO is true, allow O_DIRECT to be set on the file. 104 AllowDirectIO bool 105 106 // If DenyPRead is true, calls to FileDescription.PRead() return ESPIPE. 107 DenyPRead bool 108 109 // If DenyPWrite is true, calls to FileDescription.PWrite() return 110 // ESPIPE. 111 DenyPWrite bool 112 113 // If UseDentryMetadata is true, calls to FileDescription methods that 114 // interact with file and filesystem metadata (Stat, SetStat, StatFS, 115 // ListXattr, GetXattr, SetXattr, RemoveXattr) are implemented by calling 116 // the corresponding FilesystemImpl methods instead of the corresponding 117 // FileDescriptionImpl methods. 118 // 119 // UseDentryMetadata is intended for file descriptions that are implemented 120 // outside of individual filesystems, such as pipes, sockets, and device 121 // special files. FileDescriptions for which UseDentryMetadata is true may 122 // embed DentryMetadataFileDescriptionImpl to obtain appropriate 123 // implementations of FileDescriptionImpl methods that should not be 124 // called. 125 UseDentryMetadata bool 126 } 127 128 // FileCreationFlags are the set of flags passed to FileDescription.Init() but 129 // omitted from FileDescription.StatusFlags(). 130 const FileCreationFlags = linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC 131 132 // Init must be called before first use of fd. If it succeeds, it takes 133 // references on mnt and d. flags is the initial file description flags, which 134 // is usually the full set of flags passed to open(2). 135 func (fd *FileDescription) Init(impl FileDescriptionImpl, flags uint32, mnt *Mount, d *Dentry, opts *FileDescriptionOptions) error { 136 writable := MayWriteFileWithOpenFlags(flags) 137 if writable { 138 if err := mnt.CheckBeginWrite(); err != nil { 139 return err 140 } 141 } 142 143 fd.InitRefs() 144 145 // Remove "file creation flags" to mirror the behavior from file.f_flags in 146 // fs/open.c:do_dentry_open. 147 fd.statusFlags = flags &^ FileCreationFlags 148 fd.vd = VirtualDentry{ 149 mount: mnt, 150 dentry: d, 151 } 152 mnt.IncRef() 153 d.IncRef() 154 fd.opts = *opts 155 fd.readable = MayReadFileWithOpenFlags(flags) 156 fd.writable = writable 157 fd.impl = impl 158 return nil 159 } 160 161 // DecRef decrements fd's reference count. 162 func (fd *FileDescription) DecRef(ctx context.Context) { 163 fd.FileDescriptionRefs.DecRef(func() { 164 // Generate inotify events. 165 ev := uint32(linux.IN_CLOSE_NOWRITE) 166 if fd.IsWritable() { 167 ev = linux.IN_CLOSE_WRITE 168 } 169 fd.Dentry().InotifyWithParent(ctx, ev, 0, PathEvent) 170 171 // Unregister fd from all epoll instances. 172 fd.epollMu.Lock() 173 epolls := fd.epolls 174 fd.epolls = nil 175 fd.epollMu.Unlock() 176 for epi := range epolls { 177 ep := epi.epoll 178 ep.interestMu.Lock() 179 // Check that epi has not been concurrently unregistered by 180 // EpollInstance.DeleteInterest() or EpollInstance.Release(). 181 if _, ok := ep.interest[epi.key]; ok { 182 fd.EventUnregister(&epi.waiter) 183 ep.removeLocked(epi) 184 } 185 ep.interestMu.Unlock() 186 } 187 188 // If BSD locks were used, release any lock that it may have acquired. 189 if atomic.LoadUint32(&fd.usedLockBSD) != 0 { 190 fd.impl.UnlockBSD(context.Background(), fd) 191 } 192 193 // Release implementation resources. 194 fd.impl.Release(ctx) 195 if fd.writable { 196 fd.vd.mount.EndWrite() 197 } 198 fd.vd.DecRef(ctx) 199 fd.flagsMu.Lock() 200 if !fd.saved && fd.statusFlags&linux.O_ASYNC != 0 && fd.asyncHandler != nil { 201 fd.asyncHandler.Unregister(fd) 202 } 203 fd.asyncHandler = nil 204 fd.flagsMu.Unlock() 205 }) 206 } 207 208 // Mount returns the mount on which fd was opened. It does not take a reference 209 // on the returned Mount. 210 func (fd *FileDescription) Mount() *Mount { 211 return fd.vd.mount 212 } 213 214 // Dentry returns the dentry at which fd was opened. It does not take a 215 // reference on the returned Dentry. 216 func (fd *FileDescription) Dentry() *Dentry { 217 return fd.vd.dentry 218 } 219 220 // VirtualDentry returns the location at which fd was opened. It does not take 221 // a reference on the returned VirtualDentry. 222 func (fd *FileDescription) VirtualDentry() VirtualDentry { 223 return fd.vd 224 } 225 226 // Options returns the options passed to fd.Init(). 227 func (fd *FileDescription) Options() FileDescriptionOptions { 228 return fd.opts 229 } 230 231 // StatusFlags returns file description status flags, as for fcntl(F_GETFL). 232 func (fd *FileDescription) StatusFlags() uint32 { 233 return atomic.LoadUint32(&fd.statusFlags) 234 } 235 236 // SetStatusFlags sets file description status flags, as for fcntl(F_SETFL). 237 func (fd *FileDescription) SetStatusFlags(ctx context.Context, creds *auth.Credentials, flags uint32) error { 238 // Compare Linux's fs/fcntl.c:setfl(). 239 oldFlags := fd.StatusFlags() 240 // Linux documents this check as "O_APPEND cannot be cleared if the file is 241 // marked as append-only and the file is open for write", which would make 242 // sense. However, the check as actually implemented seems to be "O_APPEND 243 // cannot be changed if the file is marked as append-only". 244 if (flags^oldFlags)&linux.O_APPEND != 0 { 245 stat, err := fd.Stat(ctx, StatOptions{ 246 // There is no mask bit for stx_attributes. 247 Mask: 0, 248 // Linux just reads inode::i_flags directly. 249 Sync: linux.AT_STATX_DONT_SYNC, 250 }) 251 if err != nil { 252 return err 253 } 254 if (stat.AttributesMask&linux.STATX_ATTR_APPEND != 0) && (stat.Attributes&linux.STATX_ATTR_APPEND != 0) { 255 return linuxerr.EPERM 256 } 257 } 258 if (flags&linux.O_NOATIME != 0) && (oldFlags&linux.O_NOATIME == 0) { 259 stat, err := fd.Stat(ctx, StatOptions{ 260 Mask: linux.STATX_UID, 261 // Linux's inode_owner_or_capable() just reads inode::i_uid 262 // directly. 263 Sync: linux.AT_STATX_DONT_SYNC, 264 }) 265 if err != nil { 266 return err 267 } 268 if stat.Mask&linux.STATX_UID == 0 { 269 return linuxerr.EPERM 270 } 271 if !CanActAsOwner(creds, auth.KUID(stat.UID)) { 272 return linuxerr.EPERM 273 } 274 } 275 if flags&linux.O_DIRECT != 0 && !fd.opts.AllowDirectIO { 276 return linuxerr.EINVAL 277 } 278 // TODO(github.com/SagerNet/issue/1035): FileDescriptionImpl.SetOAsync()? 279 const settableFlags = linux.O_APPEND | linux.O_ASYNC | linux.O_DIRECT | linux.O_NOATIME | linux.O_NONBLOCK 280 fd.flagsMu.Lock() 281 if fd.asyncHandler != nil { 282 // Use fd.statusFlags instead of oldFlags, which may have become outdated, 283 // to avoid double registering/unregistering. 284 if fd.statusFlags&linux.O_ASYNC == 0 && flags&linux.O_ASYNC != 0 { 285 fd.asyncHandler.Register(fd) 286 } else if fd.statusFlags&linux.O_ASYNC != 0 && flags&linux.O_ASYNC == 0 { 287 fd.asyncHandler.Unregister(fd) 288 } 289 } 290 atomic.StoreUint32(&fd.statusFlags, (oldFlags&^settableFlags)|(flags&settableFlags)) 291 fd.flagsMu.Unlock() 292 return nil 293 } 294 295 // IsReadable returns true if fd was opened for reading. 296 func (fd *FileDescription) IsReadable() bool { 297 return fd.readable 298 } 299 300 // IsWritable returns true if fd was opened for writing. 301 func (fd *FileDescription) IsWritable() bool { 302 return fd.writable 303 } 304 305 // Impl returns the FileDescriptionImpl associated with fd. 306 func (fd *FileDescription) Impl() FileDescriptionImpl { 307 return fd.impl 308 } 309 310 // FileDescriptionImpl contains implementation details for an FileDescription. 311 // Implementations of FileDescriptionImpl should contain their associated 312 // FileDescription by value as their first field. 313 // 314 // For all functions that return linux.Statx, Statx.Uid and Statx.Gid will 315 // be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID and 316 // auth.KGID respectively). 317 // 318 // All methods may return errors not specified. 319 // 320 // FileDescriptionImpl is analogous to Linux's struct file_operations. 321 type FileDescriptionImpl interface { 322 // Release is called when the associated FileDescription reaches zero 323 // references. 324 Release(ctx context.Context) 325 326 // OnClose is called when a file descriptor representing the 327 // FileDescription is closed. Note that returning a non-nil error does not 328 // prevent the file descriptor from being closed. 329 OnClose(ctx context.Context) error 330 331 // Stat returns metadata for the file represented by the FileDescription. 332 Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) 333 334 // SetStat updates metadata for the file represented by the 335 // FileDescription. Implementations are responsible for checking if the 336 // operation can be performed (see vfs.CheckSetStat() for common checks). 337 SetStat(ctx context.Context, opts SetStatOptions) error 338 339 // StatFS returns metadata for the filesystem containing the file 340 // represented by the FileDescription. 341 StatFS(ctx context.Context) (linux.Statfs, error) 342 343 // Allocate grows the file to offset + length bytes. 344 // Only mode == 0 is supported currently. 345 // 346 // Allocate should return EISDIR on directories, ESPIPE on pipes, and ENODEV on 347 // other files where it is not supported. 348 // 349 // Preconditions: The FileDescription was opened for writing. 350 Allocate(ctx context.Context, mode, offset, length uint64) error 351 352 // waiter.Waitable methods may be used to poll for I/O events. 353 waiter.Waitable 354 355 // PRead reads from the file into dst, starting at the given offset, and 356 // returns the number of bytes read. PRead is permitted to return partial 357 // reads with a nil error. 358 // 359 // Errors: 360 // 361 // - If opts.Flags specifies unsupported options, PRead returns EOPNOTSUPP. 362 // 363 // Preconditions: 364 // * The FileDescription was opened for reading. 365 // * FileDescriptionOptions.DenyPRead == false. 366 PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) 367 368 // Read is similar to PRead, but does not specify an offset. 369 // 370 // For files with an implicit FileDescription offset (e.g. regular files), 371 // Read begins at the FileDescription offset, and advances the offset by 372 // the number of bytes read; note that POSIX 2.9.7 "Thread Interactions 373 // with Regular File Operations" requires that all operations that may 374 // mutate the FileDescription offset are serialized. 375 // 376 // Errors: 377 // 378 // - If opts.Flags specifies unsupported options, Read returns EOPNOTSUPP. 379 // 380 // Preconditions: The FileDescription was opened for reading. 381 Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) 382 383 // PWrite writes src to the file, starting at the given offset, and returns 384 // the number of bytes written. PWrite is permitted to return partial 385 // writes with a nil error. 386 // 387 // As in Linux (but not POSIX), if O_APPEND is in effect for the 388 // FileDescription, PWrite should ignore the offset and append data to the 389 // end of the file. 390 // 391 // Errors: 392 // 393 // - If opts.Flags specifies unsupported options, PWrite returns 394 // EOPNOTSUPP. 395 // 396 // Preconditions: 397 // * The FileDescription was opened for writing. 398 // * FileDescriptionOptions.DenyPWrite == false. 399 PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) 400 401 // Write is similar to PWrite, but does not specify an offset, which is 402 // implied as for Read. 403 // 404 // Write is a FileDescriptionImpl method, instead of a wrapper around 405 // PWrite that uses a FileDescription offset, to make it possible for 406 // remote filesystems to implement O_APPEND correctly (i.e. atomically with 407 // respect to writers outside the scope of VFS). 408 // 409 // Errors: 410 // 411 // - If opts.Flags specifies unsupported options, Write returns EOPNOTSUPP. 412 // 413 // Preconditions: The FileDescription was opened for writing. 414 Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) 415 416 // IterDirents invokes cb on each entry in the directory represented by the 417 // FileDescription. If IterDirents has been called since the last call to 418 // Seek, it continues iteration from the end of the last call. 419 IterDirents(ctx context.Context, cb IterDirentsCallback) error 420 421 // Seek changes the FileDescription offset (assuming one exists) and 422 // returns its new value. 423 // 424 // For directories, if whence == SEEK_SET and offset == 0, the caller is 425 // rewinddir(), such that Seek "shall also cause the directory stream to 426 // refer to the current state of the corresponding directory" - 427 // POSIX.1-2017. 428 Seek(ctx context.Context, offset int64, whence int32) (int64, error) 429 430 // Sync requests that cached state associated with the file represented by 431 // the FileDescription is synchronized with persistent storage, and blocks 432 // until this is complete. 433 Sync(ctx context.Context) error 434 435 // ConfigureMMap mutates opts to implement mmap(2) for the file. Most 436 // implementations that support memory mapping can call 437 // GenericConfigureMMap with the appropriate memmap.Mappable. 438 ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error 439 440 // Ioctl implements the ioctl(2) syscall. 441 Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) 442 443 // ListXattr returns all extended attribute names for the file. 444 ListXattr(ctx context.Context, size uint64) ([]string, error) 445 446 // GetXattr returns the value associated with the given extended attribute 447 // for the file. 448 GetXattr(ctx context.Context, opts GetXattrOptions) (string, error) 449 450 // SetXattr changes the value associated with the given extended attribute 451 // for the file. 452 SetXattr(ctx context.Context, opts SetXattrOptions) error 453 454 // RemoveXattr removes the given extended attribute from the file. 455 RemoveXattr(ctx context.Context, name string) error 456 457 // SupportsLocks indicates whether file locks are supported. 458 SupportsLocks() bool 459 460 // LockBSD tries to acquire a BSD-style advisory file lock. 461 LockBSD(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, block lock.Blocker) error 462 463 // UnlockBSD releases a BSD-style advisory file lock. 464 UnlockBSD(ctx context.Context, uid lock.UniqueID) error 465 466 // LockPOSIX tries to acquire a POSIX-style advisory file lock. 467 LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error 468 469 // UnlockPOSIX releases a POSIX-style advisory file lock. 470 UnlockPOSIX(ctx context.Context, uid lock.UniqueID, ComputeLockRange lock.LockRange) error 471 472 // TestPOSIX returns information about whether the specified lock can be held, in the style of the F_GETLK fcntl. 473 TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) 474 } 475 476 // Dirent holds the information contained in struct linux_dirent64. 477 // 478 // +stateify savable 479 type Dirent struct { 480 // Name is the filename. 481 Name string 482 483 // Type is the file type, a linux.DT_* constant. 484 Type uint8 485 486 // Ino is the inode number. 487 Ino uint64 488 489 // NextOff is the offset of the *next* Dirent in the directory; that is, 490 // FileDescription.Seek(NextOff, SEEK_SET) (as called by seekdir(3)) will 491 // cause the next call to FileDescription.IterDirents() to yield the next 492 // Dirent. (The offset of the first Dirent in a directory is always 0.) 493 NextOff int64 494 } 495 496 // IterDirentsCallback receives Dirents from FileDescriptionImpl.IterDirents. 497 type IterDirentsCallback interface { 498 // Handle handles the given iterated Dirent. If Handle returns a non-nil 499 // error, FileDescriptionImpl.IterDirents must stop iteration and return 500 // the error; the next call to FileDescriptionImpl.IterDirents should 501 // restart with the same Dirent. 502 Handle(dirent Dirent) error 503 } 504 505 // IterDirentsCallbackFunc implements IterDirentsCallback for a function with 506 // the semantics of IterDirentsCallback.Handle. 507 type IterDirentsCallbackFunc func(dirent Dirent) error 508 509 // Handle implements IterDirentsCallback.Handle. 510 func (f IterDirentsCallbackFunc) Handle(dirent Dirent) error { 511 return f(dirent) 512 } 513 514 // OnClose is called when a file descriptor representing the FileDescription is 515 // closed. Returning a non-nil error should not prevent the file descriptor 516 // from being closed. 517 func (fd *FileDescription) OnClose(ctx context.Context) error { 518 return fd.impl.OnClose(ctx) 519 } 520 521 // Stat returns metadata for the file represented by fd. 522 func (fd *FileDescription) Stat(ctx context.Context, opts StatOptions) (linux.Statx, error) { 523 if fd.opts.UseDentryMetadata { 524 vfsObj := fd.vd.mount.vfs 525 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 526 Root: fd.vd, 527 Start: fd.vd, 528 }) 529 stat, err := fd.vd.mount.fs.impl.StatAt(ctx, rp, opts) 530 rp.Release(ctx) 531 return stat, err 532 } 533 return fd.impl.Stat(ctx, opts) 534 } 535 536 // SetStat updates metadata for the file represented by fd. 537 func (fd *FileDescription) SetStat(ctx context.Context, opts SetStatOptions) error { 538 if fd.opts.UseDentryMetadata { 539 vfsObj := fd.vd.mount.vfs 540 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 541 Root: fd.vd, 542 Start: fd.vd, 543 }) 544 err := fd.vd.mount.fs.impl.SetStatAt(ctx, rp, opts) 545 rp.Release(ctx) 546 return err 547 } 548 return fd.impl.SetStat(ctx, opts) 549 } 550 551 // StatFS returns metadata for the filesystem containing the file represented 552 // by fd. 553 func (fd *FileDescription) StatFS(ctx context.Context) (linux.Statfs, error) { 554 if fd.opts.UseDentryMetadata { 555 vfsObj := fd.vd.mount.vfs 556 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 557 Root: fd.vd, 558 Start: fd.vd, 559 }) 560 statfs, err := fd.vd.mount.fs.impl.StatFSAt(ctx, rp) 561 rp.Release(ctx) 562 return statfs, err 563 } 564 return fd.impl.StatFS(ctx) 565 } 566 567 // Allocate grows file represented by FileDescription to offset + length bytes. 568 func (fd *FileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error { 569 if !fd.IsWritable() { 570 return linuxerr.EBADF 571 } 572 if err := fd.impl.Allocate(ctx, mode, offset, length); err != nil { 573 return err 574 } 575 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 576 return nil 577 } 578 579 // Readiness implements waiter.Waitable.Readiness. 580 // 581 // It returns fd's I/O readiness. 582 func (fd *FileDescription) Readiness(mask waiter.EventMask) waiter.EventMask { 583 return fd.impl.Readiness(mask) 584 } 585 586 // EventRegister implements waiter.Waitable.EventRegister. 587 // 588 // It registers e for I/O readiness events in mask. 589 func (fd *FileDescription) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 590 fd.impl.EventRegister(e, mask) 591 } 592 593 // EventUnregister implements waiter.Waitable.EventUnregister. 594 // 595 // It unregisters e for I/O readiness events. 596 func (fd *FileDescription) EventUnregister(e *waiter.Entry) { 597 fd.impl.EventUnregister(e) 598 } 599 600 // PRead reads from the file represented by fd into dst, starting at the given 601 // offset, and returns the number of bytes read. PRead is permitted to return 602 // partial reads with a nil error. 603 func (fd *FileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) { 604 if fd.opts.DenyPRead { 605 return 0, linuxerr.ESPIPE 606 } 607 if !fd.readable { 608 return 0, linuxerr.EBADF 609 } 610 start := fsmetric.StartReadWait() 611 n, err := fd.impl.PRead(ctx, dst, offset, opts) 612 if n > 0 { 613 fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent) 614 } 615 fsmetric.Reads.Increment() 616 fsmetric.FinishReadWait(fsmetric.ReadWait, start) 617 return n, err 618 } 619 620 // Read is similar to PRead, but does not specify an offset. 621 func (fd *FileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) { 622 if !fd.readable { 623 return 0, linuxerr.EBADF 624 } 625 start := fsmetric.StartReadWait() 626 n, err := fd.impl.Read(ctx, dst, opts) 627 if n > 0 { 628 fd.Dentry().InotifyWithParent(ctx, linux.IN_ACCESS, 0, PathEvent) 629 } 630 fsmetric.Reads.Increment() 631 fsmetric.FinishReadWait(fsmetric.ReadWait, start) 632 return n, err 633 } 634 635 // PWrite writes src to the file represented by fd, starting at the given 636 // offset, and returns the number of bytes written. PWrite is permitted to 637 // return partial writes with a nil error. 638 func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts WriteOptions) (int64, error) { 639 if fd.opts.DenyPWrite { 640 return 0, linuxerr.ESPIPE 641 } 642 if !fd.writable { 643 return 0, linuxerr.EBADF 644 } 645 n, err := fd.impl.PWrite(ctx, src, offset, opts) 646 if n > 0 { 647 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 648 } 649 return n, err 650 } 651 652 // Write is similar to PWrite, but does not specify an offset. 653 func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) { 654 if !fd.writable { 655 return 0, linuxerr.EBADF 656 } 657 n, err := fd.impl.Write(ctx, src, opts) 658 if n > 0 { 659 fd.Dentry().InotifyWithParent(ctx, linux.IN_MODIFY, 0, PathEvent) 660 } 661 return n, err 662 } 663 664 // IterDirents invokes cb on each entry in the directory represented by fd. If 665 // IterDirents has been called since the last call to Seek, it continues 666 // iteration from the end of the last call. 667 func (fd *FileDescription) IterDirents(ctx context.Context, cb IterDirentsCallback) error { 668 return fd.impl.IterDirents(ctx, cb) 669 } 670 671 // Seek changes fd's offset (assuming one exists) and returns its new value. 672 func (fd *FileDescription) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 673 return fd.impl.Seek(ctx, offset, whence) 674 } 675 676 // Sync has the semantics of fsync(2). 677 func (fd *FileDescription) Sync(ctx context.Context) error { 678 return fd.impl.Sync(ctx) 679 } 680 681 // ConfigureMMap mutates opts to implement mmap(2) for the file represented by 682 // fd. 683 func (fd *FileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 684 return fd.impl.ConfigureMMap(ctx, opts) 685 } 686 687 // Ioctl implements the ioctl(2) syscall. 688 func (fd *FileDescription) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { 689 return fd.impl.Ioctl(ctx, uio, args) 690 } 691 692 // ListXattr returns all extended attribute names for the file represented by 693 // fd. 694 // 695 // If the size of the list (including a NUL terminating byte after every entry) 696 // would exceed size, ERANGE may be returned. Note that implementations 697 // are free to ignore size entirely and return without error). In all cases, 698 // if size is 0, the list should be returned without error, regardless of size. 699 func (fd *FileDescription) ListXattr(ctx context.Context, size uint64) ([]string, error) { 700 if fd.opts.UseDentryMetadata { 701 vfsObj := fd.vd.mount.vfs 702 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 703 Root: fd.vd, 704 Start: fd.vd, 705 }) 706 names, err := fd.vd.mount.fs.impl.ListXattrAt(ctx, rp, size) 707 rp.Release(ctx) 708 return names, err 709 } 710 names, err := fd.impl.ListXattr(ctx, size) 711 if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { 712 // Linux doesn't actually return EOPNOTSUPP in this case; instead, 713 // fs/xattr.c:vfs_listxattr() falls back to allowing the security 714 // subsystem to return security extended attributes, which by default 715 // don't exist. 716 return nil, nil 717 } 718 return names, err 719 } 720 721 // GetXattr returns the value associated with the given extended attribute for 722 // the file represented by fd. 723 // 724 // If the size of the return value exceeds opts.Size, ERANGE may be returned 725 // (note that implementations are free to ignore opts.Size entirely and return 726 // without error). In all cases, if opts.Size is 0, the value should be 727 // returned without error, regardless of size. 728 func (fd *FileDescription) GetXattr(ctx context.Context, opts *GetXattrOptions) (string, error) { 729 if fd.opts.UseDentryMetadata { 730 vfsObj := fd.vd.mount.vfs 731 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 732 Root: fd.vd, 733 Start: fd.vd, 734 }) 735 val, err := fd.vd.mount.fs.impl.GetXattrAt(ctx, rp, *opts) 736 rp.Release(ctx) 737 return val, err 738 } 739 return fd.impl.GetXattr(ctx, *opts) 740 } 741 742 // SetXattr changes the value associated with the given extended attribute for 743 // the file represented by fd. 744 func (fd *FileDescription) SetXattr(ctx context.Context, opts *SetXattrOptions) error { 745 if fd.opts.UseDentryMetadata { 746 vfsObj := fd.vd.mount.vfs 747 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 748 Root: fd.vd, 749 Start: fd.vd, 750 }) 751 err := fd.vd.mount.fs.impl.SetXattrAt(ctx, rp, *opts) 752 rp.Release(ctx) 753 return err 754 } 755 return fd.impl.SetXattr(ctx, *opts) 756 } 757 758 // RemoveXattr removes the given extended attribute from the file represented 759 // by fd. 760 func (fd *FileDescription) RemoveXattr(ctx context.Context, name string) error { 761 if fd.opts.UseDentryMetadata { 762 vfsObj := fd.vd.mount.vfs 763 rp := vfsObj.getResolvingPath(auth.CredentialsFromContext(ctx), &PathOperation{ 764 Root: fd.vd, 765 Start: fd.vd, 766 }) 767 err := fd.vd.mount.fs.impl.RemoveXattrAt(ctx, rp, name) 768 rp.Release(ctx) 769 return err 770 } 771 return fd.impl.RemoveXattr(ctx, name) 772 } 773 774 // SyncFS instructs the filesystem containing fd to execute the semantics of 775 // syncfs(2). 776 func (fd *FileDescription) SyncFS(ctx context.Context) error { 777 return fd.vd.mount.fs.impl.Sync(ctx) 778 } 779 780 // MappedName implements memmap.MappingIdentity.MappedName. 781 func (fd *FileDescription) MappedName(ctx context.Context) string { 782 vfsroot := RootFromContext(ctx) 783 s, _ := fd.vd.mount.vfs.PathnameWithDeleted(ctx, vfsroot, fd.vd) 784 if vfsroot.Ok() { 785 vfsroot.DecRef(ctx) 786 } 787 return s 788 } 789 790 // DeviceID implements memmap.MappingIdentity.DeviceID. 791 func (fd *FileDescription) DeviceID() uint64 { 792 stat, err := fd.Stat(context.Background(), StatOptions{ 793 // There is no STATX_DEV; we assume that Stat will return it if it's 794 // available regardless of mask. 795 Mask: 0, 796 // fs/proc/task_mmu.c:show_map_vma() just reads inode::i_sb->s_dev 797 // directly. 798 Sync: linux.AT_STATX_DONT_SYNC, 799 }) 800 if err != nil { 801 return 0 802 } 803 return uint64(linux.MakeDeviceID(uint16(stat.DevMajor), stat.DevMinor)) 804 } 805 806 // InodeID implements memmap.MappingIdentity.InodeID. 807 func (fd *FileDescription) InodeID() uint64 { 808 stat, err := fd.Stat(context.Background(), StatOptions{ 809 Mask: linux.STATX_INO, 810 // fs/proc/task_mmu.c:show_map_vma() just reads inode::i_ino directly. 811 Sync: linux.AT_STATX_DONT_SYNC, 812 }) 813 if err != nil || stat.Mask&linux.STATX_INO == 0 { 814 return 0 815 } 816 return stat.Ino 817 } 818 819 // Msync implements memmap.MappingIdentity.Msync. 820 func (fd *FileDescription) Msync(ctx context.Context, mr memmap.MappableRange) error { 821 return fd.Sync(ctx) 822 } 823 824 // SupportsLocks indicates whether file locks are supported. 825 func (fd *FileDescription) SupportsLocks() bool { 826 return fd.impl.SupportsLocks() 827 } 828 829 // LockBSD tries to acquire a BSD-style advisory file lock. 830 func (fd *FileDescription) LockBSD(ctx context.Context, ownerPID int32, lockType lock.LockType, blocker lock.Blocker) error { 831 atomic.StoreUint32(&fd.usedLockBSD, 1) 832 return fd.impl.LockBSD(ctx, fd, ownerPID, lockType, blocker) 833 } 834 835 // UnlockBSD releases a BSD-style advisory file lock. 836 func (fd *FileDescription) UnlockBSD(ctx context.Context) error { 837 return fd.impl.UnlockBSD(ctx, fd) 838 } 839 840 // LockPOSIX locks a POSIX-style file range lock. 841 func (fd *FileDescription) LockPOSIX(ctx context.Context, uid lock.UniqueID, ownerPID int32, t lock.LockType, r lock.LockRange, block lock.Blocker) error { 842 return fd.impl.LockPOSIX(ctx, uid, ownerPID, t, r, block) 843 } 844 845 // UnlockPOSIX unlocks a POSIX-style file range lock. 846 func (fd *FileDescription) UnlockPOSIX(ctx context.Context, uid lock.UniqueID, r lock.LockRange) error { 847 return fd.impl.UnlockPOSIX(ctx, uid, r) 848 } 849 850 // TestPOSIX returns information about whether the specified lock can be held. 851 func (fd *FileDescription) TestPOSIX(ctx context.Context, uid lock.UniqueID, t lock.LockType, r lock.LockRange) (linux.Flock, error) { 852 return fd.impl.TestPOSIX(ctx, uid, t, r) 853 } 854 855 // ComputeLockRange computes the range of a file lock based on the given values. 856 func (fd *FileDescription) ComputeLockRange(ctx context.Context, start uint64, length uint64, whence int16) (lock.LockRange, error) { 857 var off int64 858 switch whence { 859 case linux.SEEK_SET: 860 off = 0 861 case linux.SEEK_CUR: 862 // Note that Linux does not hold any mutexes while retrieving the file 863 // offset, see fs/locks.c:flock_to_posix_lock and fs/locks.c:fcntl_setlk. 864 curOff, err := fd.Seek(ctx, 0, linux.SEEK_CUR) 865 if err != nil { 866 return lock.LockRange{}, err 867 } 868 off = curOff 869 case linux.SEEK_END: 870 stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_SIZE}) 871 if err != nil { 872 return lock.LockRange{}, err 873 } 874 off = int64(stat.Size) 875 default: 876 return lock.LockRange{}, linuxerr.EINVAL 877 } 878 879 return lock.ComputeRange(int64(start), int64(length), off) 880 } 881 882 // A FileAsync sends signals to its owner when w is ready for IO. This is only 883 // implemented by pkg/sentry/fasync:FileAsync, but we unfortunately need this 884 // interface to avoid circular dependencies. 885 type FileAsync interface { 886 Register(w waiter.Waitable) 887 Unregister(w waiter.Waitable) 888 } 889 890 // AsyncHandler returns the FileAsync for fd. 891 func (fd *FileDescription) AsyncHandler() FileAsync { 892 fd.flagsMu.Lock() 893 defer fd.flagsMu.Unlock() 894 return fd.asyncHandler 895 } 896 897 // SetAsyncHandler sets fd.asyncHandler if it has not been set before and 898 // returns it. 899 func (fd *FileDescription) SetAsyncHandler(newHandler func() FileAsync) FileAsync { 900 fd.flagsMu.Lock() 901 defer fd.flagsMu.Unlock() 902 if fd.asyncHandler == nil { 903 fd.asyncHandler = newHandler() 904 if fd.statusFlags&linux.O_ASYNC != 0 { 905 fd.asyncHandler.Register(fd) 906 } 907 } 908 return fd.asyncHandler 909 } 910 911 // CopyRegularFileData copies data from srcFD to dstFD until reading from srcFD 912 // returns EOF or an error. It returns the number of bytes copied. 913 func CopyRegularFileData(ctx context.Context, dstFD, srcFD *FileDescription) (int64, error) { 914 done := int64(0) 915 buf := usermem.BytesIOSequence(make([]byte, 32*1024)) // arbitrary buffer size 916 for { 917 readN, readErr := srcFD.Read(ctx, buf, ReadOptions{}) 918 if readErr != nil && readErr != io.EOF { 919 return done, readErr 920 } 921 src := buf.TakeFirst64(readN) 922 for src.NumBytes() != 0 { 923 writeN, writeErr := dstFD.Write(ctx, src, WriteOptions{}) 924 done += writeN 925 src = src.DropFirst64(writeN) 926 if writeErr != nil { 927 return done, writeErr 928 } 929 } 930 if readErr == io.EOF { 931 return done, nil 932 } 933 } 934 }