github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/special_file.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "sync/atomic" 19 20 "golang.org/x/sys/unix" 21 "github.com/SagerNet/gvisor/pkg/abi/linux" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 24 "github.com/SagerNet/gvisor/pkg/fdnotifier" 25 "github.com/SagerNet/gvisor/pkg/metric" 26 "github.com/SagerNet/gvisor/pkg/p9" 27 "github.com/SagerNet/gvisor/pkg/safemem" 28 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 29 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 30 "github.com/SagerNet/gvisor/pkg/sync" 31 "github.com/SagerNet/gvisor/pkg/syserror" 32 "github.com/SagerNet/gvisor/pkg/usermem" 33 "github.com/SagerNet/gvisor/pkg/waiter" 34 ) 35 36 // specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device 37 // special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is 38 // in effect) regular files. specialFileFD differs from regularFileFD by using 39 // per-FD handles instead of shared per-dentry handles, and never buffering I/O. 40 // 41 // +stateify savable 42 type specialFileFD struct { 43 fileDescription 44 45 // handle is used for file I/O. handle is immutable. 46 handle handle `state:"nosave"` 47 48 // isRegularFile is true if this FD represents a regular file which is only 49 // possible when filesystemOptions.regularFilesUseSpecialFileFD is in 50 // effect. isRegularFile is immutable. 51 isRegularFile bool 52 53 // seekable is true if this file description represents a file for which 54 // file offset is significant, i.e. a regular file, character device or 55 // block device. seekable is immutable. 56 seekable bool 57 58 // haveQueue is true if this file description represents a file for which 59 // queue may send I/O readiness events. haveQueue is immutable. 60 haveQueue bool `state:"nosave"` 61 queue waiter.Queue 62 63 // If seekable is true, off is the file offset. off is protected by mu. 64 mu sync.Mutex `state:"nosave"` 65 off int64 66 67 // If haveBuf is non-zero, this FD represents a pipe, and buf contains data 68 // read from the pipe from previous calls to specialFileFD.savePipeData(). 69 // haveBuf and buf are protected by bufMu. haveBuf is accessed using atomic 70 // memory operations. 71 bufMu sync.Mutex `state:"nosave"` 72 haveBuf uint32 73 buf []byte 74 } 75 76 func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) { 77 ftype := d.fileType() 78 seekable := ftype == linux.S_IFREG || ftype == linux.S_IFCHR || ftype == linux.S_IFBLK 79 haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && h.fd >= 0 80 fd := &specialFileFD{ 81 handle: h, 82 isRegularFile: ftype == linux.S_IFREG, 83 seekable: seekable, 84 haveQueue: haveQueue, 85 } 86 fd.LockFD.Init(&d.locks) 87 if haveQueue { 88 if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil { 89 return nil, err 90 } 91 } 92 if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ 93 DenyPRead: !seekable, 94 DenyPWrite: !seekable, 95 }); err != nil { 96 if haveQueue { 97 fdnotifier.RemoveFD(h.fd) 98 } 99 return nil, err 100 } 101 d.fs.syncMu.Lock() 102 d.fs.specialFileFDs[fd] = struct{}{} 103 d.fs.syncMu.Unlock() 104 if fd.vfsfd.IsWritable() && (atomic.LoadUint32(&d.mode)&0111 != 0) { 105 metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file") 106 } 107 if h.fd >= 0 { 108 fsmetric.GoferOpensHost.Increment() 109 } else { 110 fsmetric.GoferOpens9P.Increment() 111 } 112 return fd, nil 113 } 114 115 // Release implements vfs.FileDescriptionImpl.Release. 116 func (fd *specialFileFD) Release(ctx context.Context) { 117 if fd.haveQueue { 118 fdnotifier.RemoveFD(fd.handle.fd) 119 } 120 fd.handle.close(ctx) 121 fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) 122 fs.syncMu.Lock() 123 delete(fs.specialFileFDs, fd) 124 fs.syncMu.Unlock() 125 } 126 127 // OnClose implements vfs.FileDescriptionImpl.OnClose. 128 func (fd *specialFileFD) OnClose(ctx context.Context) error { 129 if !fd.vfsfd.IsWritable() { 130 return nil 131 } 132 return fd.handle.file.flush(ctx) 133 } 134 135 // Readiness implements waiter.Waitable.Readiness. 136 func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { 137 if fd.haveQueue { 138 return fdnotifier.NonBlockingPoll(fd.handle.fd, mask) 139 } 140 return fd.fileDescription.Readiness(mask) 141 } 142 143 // EventRegister implements waiter.Waitable.EventRegister. 144 func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 145 if fd.haveQueue { 146 fd.queue.EventRegister(e, mask) 147 fdnotifier.UpdateFD(fd.handle.fd) 148 return 149 } 150 fd.fileDescription.EventRegister(e, mask) 151 } 152 153 // EventUnregister implements waiter.Waitable.EventUnregister. 154 func (fd *specialFileFD) EventUnregister(e *waiter.Entry) { 155 if fd.haveQueue { 156 fd.queue.EventUnregister(e) 157 fdnotifier.UpdateFD(fd.handle.fd) 158 return 159 } 160 fd.fileDescription.EventUnregister(e) 161 } 162 163 func (fd *specialFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 164 if fd.isRegularFile { 165 d := fd.dentry() 166 return d.doAllocate(ctx, offset, length, func() error { 167 return fd.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length) 168 }) 169 } 170 return fd.FileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length) 171 } 172 173 // PRead implements vfs.FileDescriptionImpl.PRead. 174 func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 175 start := fsmetric.StartReadWait() 176 defer func() { 177 if fd.handle.fd >= 0 { 178 fsmetric.GoferReadsHost.Increment() 179 fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start) 180 } else { 181 fsmetric.GoferReads9P.Increment() 182 fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start) 183 } 184 }() 185 186 if fd.seekable && offset < 0 { 187 return 0, linuxerr.EINVAL 188 } 189 190 // Check that flags are supported. 191 // 192 // TODO(github.com/SagerNet/issue/2601): Support select preadv2 flags. 193 if opts.Flags&^linux.RWF_HIPRI != 0 { 194 return 0, syserror.EOPNOTSUPP 195 } 196 197 if d := fd.dentry(); d.cachedMetadataAuthoritative() { 198 d.touchAtime(fd.vfsfd.Mount()) 199 } 200 201 bufN := int64(0) 202 if atomic.LoadUint32(&fd.haveBuf) != 0 { 203 var err error 204 fd.bufMu.Lock() 205 if len(fd.buf) != 0 { 206 var n int 207 n, err = dst.CopyOut(ctx, fd.buf) 208 dst = dst.DropFirst(n) 209 fd.buf = fd.buf[n:] 210 if len(fd.buf) == 0 { 211 atomic.StoreUint32(&fd.haveBuf, 0) 212 fd.buf = nil 213 } 214 bufN = int64(n) 215 if offset >= 0 { 216 offset += bufN 217 } 218 } 219 fd.bufMu.Unlock() 220 if err != nil { 221 return bufN, err 222 } 223 } 224 225 // Going through dst.CopyOutFrom() would hold MM locks around file 226 // operations of unknown duration. For regularFileFD, doing so is necessary 227 // to support mmap due to lock ordering; MM locks precede dentry.dataMu. 228 // That doesn't hold here since specialFileFD doesn't client-cache data. 229 // Just buffer the read instead. 230 buf := make([]byte, dst.NumBytes()) 231 n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) 232 if linuxerr.Equals(linuxerr.EAGAIN, err) { 233 err = syserror.ErrWouldBlock 234 } 235 if n == 0 { 236 return bufN, err 237 } 238 if cp, cperr := dst.CopyOut(ctx, buf[:n]); cperr != nil { 239 return bufN + int64(cp), cperr 240 } 241 return bufN + int64(n), err 242 } 243 244 // Read implements vfs.FileDescriptionImpl.Read. 245 func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 246 if !fd.seekable { 247 return fd.PRead(ctx, dst, -1, opts) 248 } 249 250 fd.mu.Lock() 251 n, err := fd.PRead(ctx, dst, fd.off, opts) 252 fd.off += n 253 fd.mu.Unlock() 254 return n, err 255 } 256 257 // PWrite implements vfs.FileDescriptionImpl.PWrite. 258 func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 259 n, _, err := fd.pwrite(ctx, src, offset, opts) 260 return n, err 261 } 262 263 // pwrite returns the number of bytes written, final offset, error. The final 264 // offset should be ignored by PWrite. 265 func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) { 266 if fd.seekable && offset < 0 { 267 return 0, offset, linuxerr.EINVAL 268 } 269 270 // Check that flags are supported. 271 // 272 // TODO(github.com/SagerNet/issue/2601): Support select pwritev2 flags. 273 if opts.Flags&^linux.RWF_HIPRI != 0 { 274 return 0, offset, syserror.EOPNOTSUPP 275 } 276 277 d := fd.dentry() 278 if fd.isRegularFile { 279 // If the regular file fd was opened with O_APPEND, make sure the file 280 // size is updated. There is a possible race here if size is modified 281 // externally after metadata cache is updated. 282 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() { 283 if err := d.updateFromGetattr(ctx); err != nil { 284 return 0, offset, err 285 } 286 } 287 288 // We need to hold the metadataMu *while* writing to a regular file. 289 d.metadataMu.Lock() 290 defer d.metadataMu.Unlock() 291 292 // Set offset to file size if the regular file was opened with O_APPEND. 293 if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 { 294 // Holding d.metadataMu is sufficient for reading d.size. 295 offset = int64(d.size) 296 } 297 limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes()) 298 if err != nil { 299 return 0, offset, err 300 } 301 src = src.TakeFirst64(limit) 302 } 303 304 if d.cachedMetadataAuthoritative() { 305 if fd.isRegularFile { 306 d.touchCMtimeLocked() 307 } else { 308 d.touchCMtime() 309 } 310 } 311 312 // Do a buffered write. See rationale in PRead. 313 buf := make([]byte, src.NumBytes()) 314 copied, copyErr := src.CopyIn(ctx, buf) 315 if copied == 0 && copyErr != nil { 316 // Only return the error if we didn't get any data. 317 return 0, offset, copyErr 318 } 319 n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf[:copied])), uint64(offset)) 320 if linuxerr.Equals(linuxerr.EAGAIN, err) { 321 err = syserror.ErrWouldBlock 322 } 323 // Update offset if the offset is valid. 324 if offset >= 0 { 325 offset += int64(n) 326 } 327 // Update file size for regular files. 328 if fd.isRegularFile { 329 // d.metadataMu is already locked at this point. 330 if uint64(offset) > d.size { 331 d.dataMu.Lock() 332 defer d.dataMu.Unlock() 333 atomic.StoreUint64(&d.size, uint64(offset)) 334 } 335 } 336 if err != nil { 337 return int64(n), offset, err 338 } 339 return int64(n), offset, copyErr 340 } 341 342 // Write implements vfs.FileDescriptionImpl.Write. 343 func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 344 if !fd.seekable { 345 return fd.PWrite(ctx, src, -1, opts) 346 } 347 348 fd.mu.Lock() 349 n, off, err := fd.pwrite(ctx, src, fd.off, opts) 350 fd.off = off 351 fd.mu.Unlock() 352 return n, err 353 } 354 355 // Seek implements vfs.FileDescriptionImpl.Seek. 356 func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 357 if !fd.seekable { 358 return 0, linuxerr.ESPIPE 359 } 360 fd.mu.Lock() 361 defer fd.mu.Unlock() 362 newOffset, err := regularFileSeekLocked(ctx, fd.dentry(), fd.off, offset, whence) 363 if err != nil { 364 return 0, err 365 } 366 fd.off = newOffset 367 return newOffset, nil 368 } 369 370 // Sync implements vfs.FileDescriptionImpl.Sync. 371 func (fd *specialFileFD) Sync(ctx context.Context) error { 372 return fd.sync(ctx, false /* forFilesystemSync */) 373 } 374 375 func (fd *specialFileFD) sync(ctx context.Context, forFilesystemSync bool) error { 376 err := func() error { 377 // If we have a host FD, fsyncing it is likely to be faster than an fsync 378 // RPC. 379 if fd.handle.fd >= 0 { 380 ctx.UninterruptibleSleepStart(false) 381 err := unix.Fsync(int(fd.handle.fd)) 382 ctx.UninterruptibleSleepFinish(false) 383 return err 384 } 385 return fd.handle.file.fsync(ctx) 386 }() 387 if err != nil { 388 if !forFilesystemSync { 389 return err 390 } 391 // Only return err if we can reasonably have expected sync to succeed 392 // (fd represents a regular file that was opened for writing). 393 if fd.isRegularFile && fd.vfsfd.IsWritable() { 394 return err 395 } 396 ctx.Debugf("gofer.specialFileFD.sync: syncing non-writable or non-regular-file FD failed: %v", err) 397 } 398 return nil 399 }