github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/file.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fs 16 17 import ( 18 "math" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/amutex" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/refs" 24 "github.com/SagerNet/gvisor/pkg/sentry/fs/lock" 25 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 26 "github.com/SagerNet/gvisor/pkg/sentry/limits" 27 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 28 "github.com/SagerNet/gvisor/pkg/sentry/uniqueid" 29 "github.com/SagerNet/gvisor/pkg/sync" 30 "github.com/SagerNet/gvisor/pkg/syserror" 31 "github.com/SagerNet/gvisor/pkg/usermem" 32 "github.com/SagerNet/gvisor/pkg/waiter" 33 ) 34 35 // FileMaxOffset is the maximum possible file offset. 36 const FileMaxOffset = math.MaxInt64 37 38 // File is an open file handle. It is thread-safe. 39 // 40 // File provides stronger synchronization guarantees than Linux. Linux 41 // synchronizes lseek(2), read(2), and write(2) with respect to the file 42 // offset for regular files and only for those interfaces. See 43 // fs/read_write.c:fdget_pos, fs.read_write.c:fdput_pos and FMODE_ATOMIC_POS. 44 // 45 // In contrast, File synchronizes any operation that could take a long time 46 // under a single abortable mutex which also synchronizes lseek(2), read(2), 47 // and write(2). 48 // 49 // FIXME(b/38451980): Split synchronization from cancellation. 50 // 51 // +stateify savable 52 type File struct { 53 refs.AtomicRefCount 54 55 // UniqueID is the globally unique identifier of the File. 56 UniqueID uint64 57 58 // Dirent is the Dirent backing this File. This encodes the name 59 // of the File via Dirent.FullName() as well as its identity via the 60 // Dirent's Inode. The Dirent is non-nil. 61 // 62 // A File holds a reference to this Dirent. Using the returned Dirent is 63 // only safe as long as a reference on the File is held. The association 64 // between a File and a Dirent is immutable. 65 // 66 // Files that are not parented in a filesystem return a root Dirent 67 // that holds a reference to their Inode. 68 // 69 // The name of the Dirent may reflect parentage if the Dirent is not a 70 // root Dirent or the identity of the File on a pseudo filesystem (pipefs, 71 // sockfs, etc). 72 // 73 // Multiple Files may hold a reference to the same Dirent. This is the 74 // common case for Files that are parented and maintain consistency with 75 // other files via the Dirent cache. 76 Dirent *Dirent 77 78 // flagsMu protects flags and async below. 79 flagsMu sync.Mutex `state:"nosave"` 80 81 // flags are the File's flags. Setting or getting flags is fully atomic 82 // and is not protected by mu (below). 83 flags FileFlags 84 85 // async handles O_ASYNC notifications. 86 async FileAsync 87 88 // saving indicates that this file is in the process of being saved. 89 saving bool `state:"nosave"` 90 91 // mu is dual-purpose: first, to make read(2) and write(2) thread-safe 92 // in conformity with POSIX, and second, to cancel operations before they 93 // begin in response to interruptions (i.e. signals). 94 mu amutex.AbortableMutex `state:"nosave"` 95 96 // FileOperations implements file system specific behavior for this File. 97 FileOperations FileOperations `state:"wait"` 98 99 // offset is the File's offset. Updating offset is protected by mu but 100 // can be read atomically via File.Offset() outside of mu. 101 offset int64 102 } 103 104 // NewFile returns a File. It takes a reference on the Dirent and owns the 105 // lifetime of the FileOperations. Files that do not support reading and 106 // writing at an arbitrary offset should set flags.Pread and flags.Pwrite 107 // to false respectively. 108 func NewFile(ctx context.Context, dirent *Dirent, flags FileFlags, fops FileOperations) *File { 109 dirent.IncRef() 110 f := File{ 111 UniqueID: uniqueid.GlobalFromContext(ctx), 112 Dirent: dirent, 113 FileOperations: fops, 114 flags: flags, 115 } 116 f.mu.Init() 117 f.EnableLeakCheck("fs.File") 118 return &f 119 } 120 121 // DecRef destroys the File when it is no longer referenced. 122 func (f *File) DecRef(ctx context.Context) { 123 f.DecRefWithDestructor(ctx, func(context.Context) { 124 // Drop BSD style locks. 125 lockRng := lock.LockRange{Start: 0, End: lock.LockEOF} 126 f.Dirent.Inode.LockCtx.BSD.UnlockRegion(f, lockRng) 127 128 // Release resources held by the FileOperations. 129 f.FileOperations.Release(ctx) 130 131 // Release a reference on the Dirent. 132 f.Dirent.DecRef(ctx) 133 134 // Only unregister if we are currently registered. There is nothing 135 // to register if f.async is nil (this happens when async mode is 136 // enabled without setting an owner). Also, we unregister during 137 // save. 138 f.flagsMu.Lock() 139 if !f.saving && f.flags.Async && f.async != nil { 140 f.async.Unregister(f) 141 } 142 f.async = nil 143 f.flagsMu.Unlock() 144 }) 145 } 146 147 // Flags atomically loads the File's flags. 148 func (f *File) Flags() FileFlags { 149 f.flagsMu.Lock() 150 flags := f.flags 151 f.flagsMu.Unlock() 152 return flags 153 } 154 155 // SetFlags atomically changes the File's flags to the values contained 156 // in newFlags. See SettableFileFlags for values that can be set. 157 func (f *File) SetFlags(newFlags SettableFileFlags) { 158 f.flagsMu.Lock() 159 f.flags.Direct = newFlags.Direct 160 f.flags.NonBlocking = newFlags.NonBlocking 161 f.flags.Append = newFlags.Append 162 if f.async != nil { 163 if newFlags.Async && !f.flags.Async { 164 f.async.Register(f) 165 } 166 if !newFlags.Async && f.flags.Async { 167 f.async.Unregister(f) 168 } 169 } 170 f.flags.Async = newFlags.Async 171 f.flagsMu.Unlock() 172 } 173 174 // Offset atomically loads the File's offset. 175 func (f *File) Offset() int64 { 176 return atomic.LoadInt64(&f.offset) 177 } 178 179 // Readiness implements waiter.Waitable.Readiness. 180 func (f *File) Readiness(mask waiter.EventMask) waiter.EventMask { 181 return f.FileOperations.Readiness(mask) 182 } 183 184 // EventRegister implements waiter.Waitable.EventRegister. 185 func (f *File) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 186 f.FileOperations.EventRegister(e, mask) 187 } 188 189 // EventUnregister implements waiter.Waitable.EventUnregister. 190 func (f *File) EventUnregister(e *waiter.Entry) { 191 f.FileOperations.EventUnregister(e) 192 } 193 194 // Seek calls f.FileOperations.Seek with f as the File, updating the file 195 // offset to the value returned by f.FileOperations.Seek if the operation 196 // is successful. 197 // 198 // Returns syserror.ErrInterrupted if seeking was interrupted. 199 func (f *File) Seek(ctx context.Context, whence SeekWhence, offset int64) (int64, error) { 200 if !f.mu.Lock(ctx) { 201 return 0, syserror.ErrInterrupted 202 } 203 defer f.mu.Unlock() 204 205 newOffset, err := f.FileOperations.Seek(ctx, f, whence, offset) 206 if err == nil { 207 atomic.StoreInt64(&f.offset, newOffset) 208 } 209 return newOffset, err 210 } 211 212 // Readdir reads the directory entries of this File and writes them out 213 // to the DentrySerializer until entries can no longer be written. If even 214 // a single directory entry is written then Readdir returns a nil error 215 // and the directory offset is advanced. 216 // 217 // Readdir unconditionally updates the access time on the File's Inode, 218 // see fs/readdir.c:iterate_dir. 219 // 220 // Returns syserror.ErrInterrupted if reading was interrupted. 221 func (f *File) Readdir(ctx context.Context, serializer DentrySerializer) error { 222 if !f.mu.Lock(ctx) { 223 return syserror.ErrInterrupted 224 } 225 defer f.mu.Unlock() 226 227 offset, err := f.FileOperations.Readdir(ctx, f, serializer) 228 atomic.StoreInt64(&f.offset, offset) 229 return err 230 } 231 232 // Readv calls f.FileOperations.Read with f as the File, advancing the file 233 // offset if f.FileOperations.Read returns bytes read > 0. 234 // 235 // Returns syserror.ErrInterrupted if reading was interrupted. 236 func (f *File) Readv(ctx context.Context, dst usermem.IOSequence) (int64, error) { 237 start := fsmetric.StartReadWait() 238 defer fsmetric.FinishReadWait(fsmetric.ReadWait, start) 239 240 if !f.mu.Lock(ctx) { 241 return 0, syserror.ErrInterrupted 242 } 243 244 fsmetric.Reads.Increment() 245 n, err := f.FileOperations.Read(ctx, f, dst, f.offset) 246 if n > 0 && !f.flags.NonSeekable { 247 atomic.AddInt64(&f.offset, n) 248 } 249 f.mu.Unlock() 250 return n, err 251 } 252 253 // Preadv calls f.FileOperations.Read with f as the File. It does not 254 // advance the file offset. If !f.Flags().Pread, Preadv should not be 255 // called. 256 // 257 // Otherwise same as Readv. 258 func (f *File) Preadv(ctx context.Context, dst usermem.IOSequence, offset int64) (int64, error) { 259 start := fsmetric.StartReadWait() 260 defer fsmetric.FinishReadWait(fsmetric.ReadWait, start) 261 262 if !f.mu.Lock(ctx) { 263 return 0, syserror.ErrInterrupted 264 } 265 266 fsmetric.Reads.Increment() 267 n, err := f.FileOperations.Read(ctx, f, dst, offset) 268 f.mu.Unlock() 269 return n, err 270 } 271 272 // Writev calls f.FileOperations.Write with f as the File, advancing the 273 // file offset if f.FileOperations.Write returns bytes written > 0. 274 // 275 // Writev positions the write offset at EOF if f.Flags().Append. This is 276 // unavoidably racy for network file systems. Writev also truncates src 277 // to avoid overrunning the current file size limit if necessary. 278 // 279 // Returns syserror.ErrInterrupted if writing was interrupted. 280 func (f *File) Writev(ctx context.Context, src usermem.IOSequence) (int64, error) { 281 if !f.mu.Lock(ctx) { 282 return 0, syserror.ErrInterrupted 283 } 284 unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) 285 // Handle append mode. 286 if f.Flags().Append { 287 if err := f.offsetForAppend(ctx, &f.offset); err != nil { 288 unlockAppendMu() 289 f.mu.Unlock() 290 return 0, err 291 } 292 } 293 294 // Enforce file limits. 295 limit, ok := f.checkLimit(ctx, f.offset) 296 switch { 297 case ok && limit == 0: 298 unlockAppendMu() 299 f.mu.Unlock() 300 return 0, syserror.ErrExceedsFileSizeLimit 301 case ok: 302 src = src.TakeFirst64(limit) 303 } 304 305 // We must hold the lock during the write. 306 n, err := f.FileOperations.Write(ctx, f, src, f.offset) 307 if n >= 0 && !f.flags.NonSeekable { 308 atomic.StoreInt64(&f.offset, f.offset+n) 309 } 310 unlockAppendMu() 311 f.mu.Unlock() 312 return n, err 313 } 314 315 // Pwritev calls f.FileOperations.Write with f as the File. It does not 316 // advance the file offset. If !f.Flags().Pwritev, Pwritev should not be 317 // called. 318 // 319 // Otherwise same as Writev. 320 func (f *File) Pwritev(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { 321 // "POSIX requires that opening a file with the O_APPEND flag should 322 // have no effect on the location at which pwrite() writes data. 323 // However, on Linux, if a file is opened with O_APPEND, pwrite() 324 // appends data to the end of the file, regardless of the value of 325 // offset." 326 unlockAppendMu := f.Dirent.Inode.lockAppendMu(f.Flags().Append) 327 defer unlockAppendMu() 328 if f.Flags().Append { 329 if err := f.offsetForAppend(ctx, &offset); err != nil { 330 return 0, err 331 } 332 } 333 334 // Enforce file limits. 335 limit, ok := f.checkLimit(ctx, offset) 336 switch { 337 case ok && limit == 0: 338 return 0, syserror.ErrExceedsFileSizeLimit 339 case ok: 340 src = src.TakeFirst64(limit) 341 } 342 343 return f.FileOperations.Write(ctx, f, src, offset) 344 } 345 346 // offsetForAppend atomically sets the given offset to the end of the file. 347 // 348 // Precondition: the file.Dirent.Inode.appendMu mutex should be held for 349 // writing. 350 func (f *File) offsetForAppend(ctx context.Context, offset *int64) error { 351 uattr, err := f.Dirent.Inode.UnstableAttr(ctx) 352 if err != nil { 353 // This is an odd error, we treat it as evidence that 354 // something is terribly wrong with the filesystem. 355 return syserror.EIO 356 } 357 358 // Update the offset. 359 atomic.StoreInt64(offset, uattr.Size) 360 361 return nil 362 } 363 364 // checkLimit checks the offset that the write will be performed at. The 365 // returned boolean indicates that the write must be limited. The returned 366 // integer indicates the new maximum write length. 367 func (f *File) checkLimit(ctx context.Context, offset int64) (int64, bool) { 368 if IsRegular(f.Dirent.Inode.StableAttr) { 369 // Enforce size limits. 370 fileSizeLimit := limits.FromContext(ctx).Get(limits.FileSize).Cur 371 if fileSizeLimit <= math.MaxInt64 { 372 if offset >= int64(fileSizeLimit) { 373 return 0, true 374 } 375 return int64(fileSizeLimit) - offset, true 376 } 377 } 378 379 return 0, false 380 } 381 382 // Fsync calls f.FileOperations.Fsync with f as the File. 383 // 384 // Returns syserror.ErrInterrupted if syncing was interrupted. 385 func (f *File) Fsync(ctx context.Context, start int64, end int64, syncType SyncType) error { 386 if !f.mu.Lock(ctx) { 387 return syserror.ErrInterrupted 388 } 389 defer f.mu.Unlock() 390 391 return f.FileOperations.Fsync(ctx, f, start, end, syncType) 392 } 393 394 // Flush calls f.FileOperations.Flush with f as the File. 395 // 396 // Returns syserror.ErrInterrupted if syncing was interrupted. 397 func (f *File) Flush(ctx context.Context) error { 398 if !f.mu.Lock(ctx) { 399 return syserror.ErrInterrupted 400 } 401 defer f.mu.Unlock() 402 403 return f.FileOperations.Flush(ctx, f) 404 } 405 406 // ConfigureMMap calls f.FileOperations.ConfigureMMap with f as the File. 407 // 408 // Returns syserror.ErrInterrupted if interrupted. 409 func (f *File) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 410 if !f.mu.Lock(ctx) { 411 return syserror.ErrInterrupted 412 } 413 defer f.mu.Unlock() 414 415 return f.FileOperations.ConfigureMMap(ctx, f, opts) 416 } 417 418 // UnstableAttr calls f.FileOperations.UnstableAttr with f as the File. 419 // 420 // Returns syserror.ErrInterrupted if interrupted. 421 func (f *File) UnstableAttr(ctx context.Context) (UnstableAttr, error) { 422 if !f.mu.Lock(ctx) { 423 return UnstableAttr{}, syserror.ErrInterrupted 424 } 425 defer f.mu.Unlock() 426 427 return f.FileOperations.UnstableAttr(ctx, f) 428 } 429 430 // MappedName implements memmap.MappingIdentity.MappedName. 431 func (f *File) MappedName(ctx context.Context) string { 432 root := RootFromContext(ctx) 433 if root != nil { 434 defer root.DecRef(ctx) 435 } 436 name, _ := f.Dirent.FullName(root) 437 return name 438 } 439 440 // DeviceID implements memmap.MappingIdentity.DeviceID. 441 func (f *File) DeviceID() uint64 { 442 return f.Dirent.Inode.StableAttr.DeviceID 443 } 444 445 // InodeID implements memmap.MappingIdentity.InodeID. 446 func (f *File) InodeID() uint64 { 447 return f.Dirent.Inode.StableAttr.InodeID 448 } 449 450 // Msync implements memmap.MappingIdentity.Msync. 451 func (f *File) Msync(ctx context.Context, mr memmap.MappableRange) error { 452 return f.Fsync(ctx, int64(mr.Start), int64(mr.End-1), SyncData) 453 } 454 455 // A FileAsync sends signals to its owner when w is ready for IO. 456 type FileAsync interface { 457 Register(w waiter.Waitable) 458 Unregister(w waiter.Waitable) 459 } 460 461 // Async gets the stored FileAsync or creates a new one with the supplied 462 // function. If the supplied function is nil, no FileAsync is created and the 463 // current value is returned. 464 func (f *File) Async(newAsync func() FileAsync) FileAsync { 465 f.flagsMu.Lock() 466 defer f.flagsMu.Unlock() 467 if f.async == nil && newAsync != nil { 468 f.async = newAsync() 469 if f.flags.Async { 470 f.async.Register(f) 471 } 472 } 473 return f.async 474 } 475 476 // lockedReader implements io.Reader and io.ReaderAt. 477 // 478 // Note this reads the underlying file using the file operations directly. It 479 // is the responsibility of the caller to ensure that locks are appropriately 480 // held and offsets updated if required. This should be used only by internal 481 // functions that perform these operations and checks at other times. 482 type lockedReader struct { 483 // Ctx is the context for the file reader. 484 Ctx context.Context 485 486 // File is the file to read from. 487 File *File 488 489 // Offset is the offset to start at. 490 // 491 // This applies only to Read, not ReadAt. 492 Offset int64 493 } 494 495 // Read implements io.Reader.Read. 496 func (r *lockedReader) Read(buf []byte) (int, error) { 497 if r.Ctx.Interrupted() { 498 return 0, syserror.ErrInterrupted 499 } 500 n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), r.Offset) 501 r.Offset += n 502 return int(n), err 503 } 504 505 // ReadAt implements io.Reader.ReadAt. 506 func (r *lockedReader) ReadAt(buf []byte, offset int64) (int, error) { 507 if r.Ctx.Interrupted() { 508 return 0, syserror.ErrInterrupted 509 } 510 n, err := r.File.FileOperations.Read(r.Ctx, r.File, usermem.BytesIOSequence(buf), offset) 511 return int(n), err 512 } 513 514 // lockedWriter implements io.Writer and io.WriterAt. 515 // 516 // The same constraints as lockedReader apply; see above. 517 type lockedWriter struct { 518 // Ctx is the context for the file writer. 519 Ctx context.Context 520 521 // File is the file to write to. 522 File *File 523 524 // Offset is the offset to start at. 525 // 526 // This applies only to Write, not WriteAt. 527 Offset int64 528 } 529 530 // Write implements io.Writer.Write. 531 func (w *lockedWriter) Write(buf []byte) (int, error) { 532 if w.Ctx.Interrupted() { 533 return 0, syserror.ErrInterrupted 534 } 535 n, err := w.WriteAt(buf, w.Offset) 536 w.Offset += int64(n) 537 return int(n), err 538 } 539 540 // WriteAt implements io.Writer.WriteAt. 541 func (w *lockedWriter) WriteAt(buf []byte, offset int64) (int, error) { 542 var ( 543 written int 544 err error 545 ) 546 // The io.Writer contract requires that Write writes all available 547 // bytes and does not return short writes. This causes errors with 548 // io.Copy, since our own Write interface does not have this same 549 // contract. Enforce that here. 550 for written < len(buf) { 551 if w.Ctx.Interrupted() { 552 return written, syserror.ErrInterrupted 553 } 554 var n int64 555 n, err = w.File.FileOperations.Write(w.Ctx, w.File, usermem.BytesIOSequence(buf[written:]), offset+int64(written)) 556 if n > 0 { 557 written += int(n) 558 } 559 if err != nil { 560 break 561 } 562 } 563 return written, err 564 }