gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/gofer/special_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 20 "gvisor.dev/gvisor/pkg/abi/linux" 21 "gvisor.dev/gvisor/pkg/atomicbitops" 22 "gvisor.dev/gvisor/pkg/context" 23 "gvisor.dev/gvisor/pkg/errors/linuxerr" 24 "gvisor.dev/gvisor/pkg/fdnotifier" 25 "gvisor.dev/gvisor/pkg/hostarch" 26 "gvisor.dev/gvisor/pkg/metric" 27 "gvisor.dev/gvisor/pkg/safemem" 28 "gvisor.dev/gvisor/pkg/sentry/fsmetric" 29 "gvisor.dev/gvisor/pkg/sentry/fsutil" 30 "gvisor.dev/gvisor/pkg/sentry/memmap" 31 "gvisor.dev/gvisor/pkg/sentry/vfs" 32 "gvisor.dev/gvisor/pkg/sync" 33 "gvisor.dev/gvisor/pkg/usermem" 34 "gvisor.dev/gvisor/pkg/waiter" 35 ) 36 37 // specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device 38 // special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is 39 // in effect) regular files. specialFileFD differs from regularFileFD by using 40 // per-FD handles instead of shared per-dentry handles, and never buffering I/O. 41 // 42 // +stateify savable 43 type specialFileFD struct { 44 fileDescription 45 specialFDEntry 46 memmap.NoBufferedIOFallback 47 48 // releaseMu synchronizes the closing of fd.handle with fd.sync(). It's safe 49 // to access fd.handle without locking for operations that require a ref to 50 // be held by the caller, e.g. vfs.FileDescriptionImpl implementations. 51 releaseMu sync.RWMutex `state:"nosave"` 52 53 // handle is used for file I/O. handle is immutable. 54 handle handle `state:"nosave"` 55 56 // isRegularFile is true if this FD represents a regular file which is only 57 // possible when filesystemOptions.regularFilesUseSpecialFileFD is in 58 // effect. isRegularFile is immutable. 59 isRegularFile bool 60 61 // seekable is true if this file description represents a file for which 62 // file offset is significant, i.e. a regular file, character device or 63 // block device. seekable is immutable. 64 seekable bool 65 66 // haveQueue is true if this file description represents a file for which 67 // queue may send I/O readiness events. haveQueue is immutable. 68 haveQueue bool `state:"nosave"` 69 queue waiter.Queue 70 71 // If seekable is true, off is the file offset. off is protected by mu. 72 mu sync.Mutex `state:"nosave"` 73 off int64 74 75 // If haveBuf is non-zero, this FD represents a pipe, and buf contains data 76 // read from the pipe from previous calls to specialFileFD.savePipeData(). 77 // haveBuf and buf are protected by bufMu. 78 bufMu sync.Mutex `state:"nosave"` 79 haveBuf atomicbitops.Uint32 80 buf []byte 81 82 // If handle.fd >= 0, hostFileMapper caches mappings of handle.fd, and 83 // hostFileMapperInitOnce is used to initialize it on first use. 84 hostFileMapperInitOnce sync.Once `state:"nosave"` 85 hostFileMapper fsutil.HostFileMapper 86 87 // If handle.fd >= 0, fileRefs counts references on memmap.File offsets. 88 // fileRefs is protected by fileRefsMu. 89 fileRefsMu sync.Mutex `state:"nosave"` 90 fileRefs fsutil.FrameRefSet 91 } 92 93 func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) { 94 ftype := d.fileType() 95 seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK 96 haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK || ftype == linux.S_IFCHR) && h.fd >= 0 97 fd := &specialFileFD{ 98 handle: h, 99 isRegularFile: ftype == linux.S_IFREG, 100 seekable: seekable, 101 haveQueue: haveQueue, 102 } 103 fd.LockFD.Init(&d.locks) 104 if haveQueue { 105 if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil { 106 return nil, err 107 } 108 } 109 if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ 110 AllowDirectIO: true, 111 DenyPRead: !seekable, 112 DenyPWrite: !seekable, 113 }); err != nil { 114 if haveQueue { 115 fdnotifier.RemoveFD(h.fd) 116 } 117 return nil, err 118 } 119 d.fs.syncMu.Lock() 120 d.fs.specialFileFDs.PushBack(fd) 121 d.fs.syncMu.Unlock() 122 if fd.vfsfd.IsWritable() && (d.mode.Load()&0111 != 0) { 123 metric.SuspiciousOperationsMetric.Increment(&metric.SuspiciousOperationsTypeOpenedWriteExecuteFile) 124 } 125 if h.fd >= 0 { 126 fsmetric.GoferOpensHost.Increment() 127 } else { 128 fsmetric.GoferOpens9P.Increment() 129 } 130 return fd, nil 131 } 132 133 // Release implements vfs.FileDescriptionImpl.Release. 134 func (fd *specialFileFD) Release(ctx context.Context) { 135 if fd.haveQueue { 136 fdnotifier.RemoveFD(fd.handle.fd) 137 } 138 fd.releaseMu.Lock() 139 fd.handle.close(ctx) 140 fd.releaseMu.Unlock() 141 142 fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) 143 fs.syncMu.Lock() 144 fs.specialFileFDs.Remove(fd) 145 fs.syncMu.Unlock() 146 } 147 148 // OnClose implements vfs.FileDescriptionImpl.OnClose. 149 func (fd *specialFileFD) OnClose(ctx context.Context) error { 150 if !fd.vfsfd.IsWritable() { 151 return nil 152 } 153 return flush(ctx, fd.handle.fdLisa) 154 } 155 156 // Readiness implements waiter.Waitable.Readiness. 157 func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { 158 if fd.haveQueue { 159 return fdnotifier.NonBlockingPoll(fd.handle.fd, mask) 160 } 161 return fd.fileDescription.Readiness(mask) 162 } 163 164 // EventRegister implements waiter.Waitable.EventRegister. 165 func (fd *specialFileFD) EventRegister(e *waiter.Entry) error { 166 if fd.haveQueue { 167 fd.queue.EventRegister(e) 168 if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil { 169 fd.queue.EventUnregister(e) 170 return err 171 } 172 return nil 173 } 174 return fd.fileDescription.EventRegister(e) 175 } 176 177 // EventUnregister implements waiter.Waitable.EventUnregister. 178 func (fd *specialFileFD) EventUnregister(e *waiter.Entry) { 179 if fd.haveQueue { 180 fd.queue.EventUnregister(e) 181 if err := fdnotifier.UpdateFD(fd.handle.fd); err != nil { 182 panic(fmt.Sprint("UpdateFD:", err)) 183 } 184 return 185 } 186 fd.fileDescription.EventUnregister(e) 187 } 188 189 // Epollable implements FileDescriptionImpl.Epollable. 190 func (fd *specialFileFD) Epollable() bool { 191 if fd.haveQueue { 192 return true 193 } 194 return fd.fileDescription.Epollable() 195 } 196 197 func (fd *specialFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 198 if fd.isRegularFile { 199 d := fd.dentry() 200 return d.doAllocate(ctx, offset, length, func() error { 201 return fd.handle.allocate(ctx, mode, offset, length) 202 }) 203 } 204 return fd.FileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length) 205 } 206 207 // PRead implements vfs.FileDescriptionImpl.PRead. 208 func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 209 start := fsmetric.StartReadWait() 210 defer func() { 211 if fd.handle.fd >= 0 { 212 fsmetric.GoferReadsHost.Increment() 213 fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start) 214 } else { 215 fsmetric.GoferReads9P.Increment() 216 fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start) 217 } 218 }() 219 220 if fd.seekable && offset < 0 { 221 return 0, linuxerr.EINVAL 222 } 223 224 // Check that flags are supported. 225 // 226 // TODO(gvisor.dev/issue/2601): Support select preadv2 flags. 227 if opts.Flags&^linux.RWF_HIPRI != 0 { 228 return 0, linuxerr.EOPNOTSUPP 229 } 230 231 if d := fd.dentry(); d.cachedMetadataAuthoritative() { 232 d.touchAtime(fd.vfsfd.Mount()) 233 } 234 235 bufN := int64(0) 236 if fd.haveBuf.Load() != 0 { 237 var err error 238 fd.bufMu.Lock() 239 if len(fd.buf) != 0 { 240 var n int 241 n, err = dst.CopyOut(ctx, fd.buf) 242 dst = dst.DropFirst(n) 243 fd.buf = fd.buf[n:] 244 if len(fd.buf) == 0 { 245 fd.haveBuf.Store(0) 246 fd.buf = nil 247 } 248 bufN = int64(n) 249 if offset >= 0 { 250 offset += bufN 251 } 252 } 253 fd.bufMu.Unlock() 254 if err != nil { 255 return bufN, err 256 } 257 } 258 259 rw := getHandleReadWriter(ctx, &fd.handle, offset) 260 n, err := dst.CopyOutFrom(ctx, rw) 261 putHandleReadWriter(rw) 262 if linuxerr.Equals(linuxerr.EAGAIN, err) { 263 err = linuxerr.ErrWouldBlock 264 } 265 return bufN + n, err 266 } 267 268 // Read implements vfs.FileDescriptionImpl.Read. 269 func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 270 if !fd.seekable { 271 return fd.PRead(ctx, dst, -1, opts) 272 } 273 274 fd.mu.Lock() 275 n, err := fd.PRead(ctx, dst, fd.off, opts) 276 fd.off += n 277 fd.mu.Unlock() 278 return n, err 279 } 280 281 // PWrite implements vfs.FileDescriptionImpl.PWrite. 282 func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 283 n, _, err := fd.pwrite(ctx, src, offset, opts) 284 return n, err 285 } 286 287 // pwrite returns the number of bytes written, final offset, error. The final 288 // offset should be ignored by PWrite. 289 func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { 290 if fd.seekable && offset < 0 { 291 return 0, offset, linuxerr.EINVAL 292 } 293 294 // Check that flags are supported. 295 // 296 // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags. 297 if opts.Flags&^linux.RWF_HIPRI != 0 { 298 return 0, offset, linuxerr.EOPNOTSUPP 299 } 300 301 d := fd.dentry() 302 if fd.isRegularFile { 303 // If the regular file fd was opened with O_APPEND, make sure the file 304 // size is updated. There is a possible race here if size is modified 305 // externally after metadata cache is updated. 306 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() { 307 if err := d.updateMetadata(ctx); err != nil { 308 return 0, offset, err 309 } 310 } 311 312 // We need to hold the metadataMu *while* writing to a regular file. 313 d.metadataMu.Lock() 314 defer d.metadataMu.Unlock() 315 316 // Set offset to file size if the regular file was opened with O_APPEND. 317 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { 318 // Holding d.metadataMu is sufficient for reading d.size. 319 offset = int64(d.size.RacyLoad()) 320 } 321 limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) 322 if err != nil { 323 return 0, offset, err 324 } 325 src = src.TakeFirst64(limit) 326 } 327 328 if d.cachedMetadataAuthoritative() { 329 if fd.isRegularFile { 330 d.touchCMtimeLocked() 331 } else { 332 d.touchCMtime() 333 } 334 } 335 336 // handleReadWriter always writes to the remote file. So O_DIRECT is 337 // effectively always set. Invalidate pages in d.mappings that have been 338 // written to. 339 pgstart := hostarch.PageRoundDown(uint64(offset)) 340 pgend, ok := hostarch.PageRoundUp(uint64(offset + src.NumBytes())) 341 if !ok { 342 return 0, offset, linuxerr.EINVAL 343 } 344 mr := memmap.MappableRange{pgstart, pgend} 345 d.mapsMu.Lock() 346 d.mappings.Invalidate(mr, memmap.InvalidateOpts{}) 347 d.mapsMu.Unlock() 348 349 rw := getHandleReadWriter(ctx, &fd.handle, offset) 350 n, err := src.CopyInTo(ctx, rw) 351 putHandleReadWriter(rw) 352 if n > 0 && fd.vfsfd.StatusFlags()&(linux.O_DSYNC|linux.O_SYNC) != 0 { 353 // Note that if syncing the remote file fails, then we can't guarantee that 354 // any data was actually written with the semantics of O_DSYNC or 355 // O_SYNC, so we return zero bytes written. Compare Linux's 356 // mm/filemap.c:generic_file_write_iter() => 357 // include/linux/fs.h:generic_write_sync(). 358 if err := fd.sync(ctx, false /* forFilesystemSync */); err != nil { 359 return 0, offset, err 360 } 361 } 362 if linuxerr.Equals(linuxerr.EAGAIN, err) { 363 err = linuxerr.ErrWouldBlock 364 } 365 // Update offset if the offset is valid. 366 if offset >= 0 { 367 offset += n 368 } 369 // Update file size for regular files. 370 if fd.isRegularFile { 371 // d.metadataMu is already locked at this point. 372 if uint64(offset) > d.size.RacyLoad() { 373 d.dataMu.Lock() 374 defer d.dataMu.Unlock() 375 d.size.Store(uint64(offset)) 376 } 377 } 378 return int64(n), offset, err 379 } 380 381 // Write implements vfs.FileDescriptionImpl.Write. 382 func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 383 if !fd.seekable { 384 return fd.PWrite(ctx, src, -1, opts) 385 } 386 387 fd.mu.Lock() 388 n, off, err := fd.pwrite(ctx, src, fd.off, opts) 389 fd.off = off 390 fd.mu.Unlock() 391 return n, err 392 } 393 394 // Seek implements vfs.FileDescriptionImpl.Seek. 395 func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 396 if !fd.seekable { 397 return 0, linuxerr.ESPIPE 398 } 399 fd.mu.Lock() 400 defer fd.mu.Unlock() 401 newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence) 402 if err != nil { 403 return 0, err 404 } 405 fd.off = newOffset 406 return newOffset, nil 407 } 408 409 // Sync implements vfs.FileDescriptionImpl.Sync. 410 func (fd *specialFileFD) Sync(ctx context.Context) error { 411 return fd.sync(ctx, false /* forFilesystemSync */) 412 } 413 414 func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error { 415 // Locks to ensure it didn't race with fd.Release(). 416 fd.releaseMu.RLock() 417 defer fd.releaseMu.RUnlock() 418 419 if err := fd.handle.sync(ctx); err != nil { 420 if !forFilesystemSync { 421 return err 422 } 423 // Only return err if we can reasonably have expected sync to succeed 424 // (fd represents a regular file that was opened for writing). 425 if fd.isRegularFile && fd.vfsfd.IsWritable() { 426 return err 427 } 428 ctx.Debugf("gofer.specialFileFD.sync: syncing non-writable or non-regular-file FD failed: %v", err) 429 } 430 return nil 431 } 432 433 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. 434 func (fd *specialFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 435 if fd.handle.fd < 0 || fd.filesystem().opts.forcePageCache { 436 return linuxerr.ENODEV 437 } 438 // After this point, fd may be used as a memmap.Mappable and memmap.File. 439 fd.hostFileMapperInitOnce.Do(fd.hostFileMapper.Init) 440 return vfs.GenericConfigureMMap(&fd.vfsfd, fd, opts) 441 } 442 443 // AddMapping implements memmap.Mappable.AddMapping. 444 func (fd *specialFileFD) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 445 d := fd.dentry() 446 d.mapsMu.Lock() 447 defer d.mapsMu.Unlock() 448 d.mappings.AddMapping(ms, ar, offset, writable) 449 fd.hostFileMapper.IncRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())}) 450 return nil 451 } 452 453 // RemoveMapping implements memmap.Mappable.RemoveMapping. 454 func (fd *specialFileFD) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 455 d := fd.dentry() 456 d.mapsMu.Lock() 457 defer d.mapsMu.Unlock() 458 d.mappings.RemoveMapping(ms, ar, offset, writable) 459 fd.hostFileMapper.DecRefOn(memmap.MappableRange{offset, offset + uint64(ar.Length())}) 460 } 461 462 // CopyMapping implements memmap.Mappable.CopyMapping. 463 func (fd *specialFileFD) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 464 return fd.AddMapping(ctx, ms, dstAR, offset, writable) 465 } 466 467 // Translate implements memmap.Mappable.Translate. 468 func (fd *specialFileFD) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 469 mr := optional 470 if fd.filesystem().opts.limitHostFDTranslation { 471 mr = maxFillRange(required, optional) 472 } 473 return []memmap.Translation{ 474 { 475 Source: mr, 476 File: fd, 477 Offset: mr.Start, 478 Perms: hostarch.AnyAccess, 479 }, 480 }, nil 481 } 482 483 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 484 func (fd *specialFileFD) InvalidateUnsavable(ctx context.Context) error { 485 return nil 486 } 487 488 // IncRef implements memmap.File.IncRef. 489 func (fd *specialFileFD) IncRef(fr memmap.FileRange, memCgID uint32) { 490 fd.fileRefsMu.Lock() 491 defer fd.fileRefsMu.Unlock() 492 fd.fileRefs.IncRefAndAccount(fr, memCgID) 493 } 494 495 // DecRef implements memmap.File.DecRef. 496 func (fd *specialFileFD) DecRef(fr memmap.FileRange) { 497 fd.fileRefsMu.Lock() 498 defer fd.fileRefsMu.Unlock() 499 fd.fileRefs.DecRefAndAccount(fr) 500 } 501 502 // MapInternal implements memmap.File.MapInternal. 503 func (fd *specialFileFD) MapInternal(fr memmap.FileRange, at hostarch.AccessType) (safemem.BlockSeq, error) { 504 fd.requireHostFD() 505 return fd.hostFileMapper.MapInternal(fr, int(fd.handle.fd), at.Write) 506 } 507 508 // FD implements memmap.File.FD. 509 func (fd *specialFileFD) FD() int { 510 fd.requireHostFD() 511 return int(fd.handle.fd) 512 } 513 514 func (fd *specialFileFD) requireHostFD() { 515 if fd.handle.fd < 0 { 516 // This is possible if fd was successfully mmapped before saving, then 517 // was restored without a host FD. This is unrecoverable: without a 518 // host FD, we can't mmap this file post-restore. 519 panic("gofer.specialFileFD can no longer be memory-mapped without a host FD") 520 } 521 } 522 523 func (fd *specialFileFD) updateMetadata(ctx context.Context) error { 524 d := fd.dentry() 525 d.metadataMu.Lock() 526 defer d.metadataMu.Unlock() 527 return d.updateMetadataLocked(ctx, fd.handle) 528 }