github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/pipe/vfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pipe 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/hostarch" 22 "github.com/SagerNet/gvisor/pkg/safemem" 23 "github.com/SagerNet/gvisor/pkg/sentry/arch" 24 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 25 "github.com/SagerNet/gvisor/pkg/sync" 26 "github.com/SagerNet/gvisor/pkg/syserror" 27 "github.com/SagerNet/gvisor/pkg/usermem" 28 "github.com/SagerNet/gvisor/pkg/waiter" 29 ) 30 31 // This file contains types enabling the pipe package to be used with the vfs 32 // package. 33 34 // VFSPipe represents the actual pipe, analagous to an inode. VFSPipes should 35 // not be copied. 36 // 37 // +stateify savable 38 type VFSPipe struct { 39 // mu protects the fields below. 40 mu sync.Mutex `state:"nosave"` 41 42 // pipe is the underlying pipe. 43 pipe Pipe 44 45 // Channels for synchronizing the creation of new readers and writers 46 // of this fifo. See waitFor and newHandleLocked. 47 // 48 // These are not saved/restored because all waiters are unblocked on 49 // save, and either automatically restart (via ERESTARTSYS) or return 50 // EINTR on resume. On restarts via ERESTARTSYS, the appropriate 51 // channel will be recreated. 52 rWakeup chan struct{} `state:"nosave"` 53 wWakeup chan struct{} `state:"nosave"` 54 } 55 56 // NewVFSPipe returns an initialized VFSPipe. 57 func NewVFSPipe(isNamed bool, sizeBytes int64) *VFSPipe { 58 var vp VFSPipe 59 initPipe(&vp.pipe, isNamed, sizeBytes) 60 return &vp 61 } 62 63 // ReaderWriterPair returns read-only and write-only FDs for vp. 64 // 65 // Preconditions: statusFlags should not contain an open access mode. 66 func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription, error) { 67 // Connected pipes share the same locks. 68 locks := &vfs.FileLocks{} 69 r, err := vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks) 70 if err != nil { 71 return nil, nil, err 72 } 73 w, err := vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) 74 if err != nil { 75 r.DecRef(ctx) 76 return nil, nil, err 77 } 78 return r, w, nil 79 } 80 81 // Allocate implements vfs.FileDescriptionImpl.Allocate. 82 func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error { 83 return linuxerr.ESPIPE 84 } 85 86 // Open opens the pipe represented by vp. 87 func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 88 vp.mu.Lock() 89 defer vp.mu.Unlock() 90 91 readable := vfs.MayReadFileWithOpenFlags(statusFlags) 92 writable := vfs.MayWriteFileWithOpenFlags(statusFlags) 93 if !readable && !writable { 94 return nil, linuxerr.EINVAL 95 } 96 97 fd, err := vp.newFD(mnt, vfsd, statusFlags, locks) 98 if err != nil { 99 return nil, err 100 } 101 102 // Named pipes have special blocking semantics during open: 103 // 104 // "Normally, opening the FIFO blocks until the other end is opened also. A 105 // process can open a FIFO in nonblocking mode. In this case, opening for 106 // read-only will succeed even if no-one has opened on the write side yet, 107 // opening for write-only will fail with ENXIO (no such device or address) 108 // unless the other end has already been opened. Under Linux, opening a 109 // FIFO for read and write will succeed both in blocking and nonblocking 110 // mode. POSIX leaves this behavior undefined. This can be used to open a 111 // FIFO for writing while there are no readers available." - fifo(7) 112 switch { 113 case readable && writable: 114 // Pipes opened for read-write always succeed without blocking. 115 newHandleLocked(&vp.rWakeup) 116 newHandleLocked(&vp.wWakeup) 117 118 case readable: 119 newHandleLocked(&vp.rWakeup) 120 // If this pipe is being opened as blocking and there's no 121 // writer, we have to wait for a writer to open the other end. 122 if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) { 123 fd.DecRef(ctx) 124 return nil, syserror.EINTR 125 } 126 127 case writable: 128 newHandleLocked(&vp.wWakeup) 129 130 if vp.pipe.isNamed && !vp.pipe.HasReaders() { 131 // Non-blocking, write-only opens fail with ENXIO when the read 132 // side isn't open yet. 133 if statusFlags&linux.O_NONBLOCK != 0 { 134 fd.DecRef(ctx) 135 return nil, linuxerr.ENXIO 136 } 137 // Wait for a reader to open the other end. 138 if !waitFor(&vp.mu, &vp.rWakeup, ctx) { 139 fd.DecRef(ctx) 140 return nil, syserror.EINTR 141 } 142 } 143 144 default: 145 panic("invalid pipe flags: must be readable, writable, or both") 146 } 147 148 return fd, nil 149 } 150 151 // Preconditions: vp.mu must be held. 152 func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 153 fd := &VFSPipeFD{ 154 pipe: &vp.pipe, 155 } 156 fd.LockFD.Init(locks) 157 if err := fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ 158 DenyPRead: true, 159 DenyPWrite: true, 160 UseDentryMetadata: true, 161 }); err != nil { 162 return nil, err 163 } 164 165 switch { 166 case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): 167 vp.pipe.rOpen() 168 vp.pipe.wOpen() 169 case fd.vfsfd.IsReadable(): 170 vp.pipe.rOpen() 171 case fd.vfsfd.IsWritable(): 172 vp.pipe.wOpen() 173 default: 174 panic("invalid pipe flags: must be readable, writable, or both") 175 } 176 177 return &fd.vfsfd, nil 178 } 179 180 // VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements 181 // non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to 182 // other FileDescriptions for splice(2) and tee(2). 183 // 184 // +stateify savable 185 type VFSPipeFD struct { 186 vfsfd vfs.FileDescription 187 vfs.FileDescriptionDefaultImpl 188 vfs.DentryMetadataFileDescriptionImpl 189 vfs.LockFD 190 191 pipe *Pipe 192 } 193 194 // Release implements vfs.FileDescriptionImpl.Release. 195 func (fd *VFSPipeFD) Release(context.Context) { 196 var event waiter.EventMask 197 if fd.vfsfd.IsReadable() { 198 fd.pipe.rClose() 199 event |= waiter.WritableEvents 200 } 201 if fd.vfsfd.IsWritable() { 202 fd.pipe.wClose() 203 event |= waiter.ReadableEvents | waiter.EventHUp 204 } 205 if event == 0 { 206 panic("invalid pipe flags: must be readable, writable, or both") 207 } 208 209 fd.pipe.Notify(event) 210 } 211 212 // Readiness implements waiter.Waitable.Readiness. 213 func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { 214 switch { 215 case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): 216 return fd.pipe.rwReadiness() 217 case fd.vfsfd.IsReadable(): 218 return fd.pipe.rReadiness() 219 case fd.vfsfd.IsWritable(): 220 return fd.pipe.wReadiness() 221 default: 222 panic("pipe FD is neither readable nor writable") 223 } 224 } 225 226 // Allocate implements vfs.FileDescriptionImpl.Allocate. 227 func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 228 return linuxerr.ESPIPE 229 } 230 231 // EventRegister implements waiter.Waitable.EventRegister. 232 func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 233 fd.pipe.EventRegister(e, mask) 234 } 235 236 // EventUnregister implements waiter.Waitable.EventUnregister. 237 func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) { 238 fd.pipe.EventUnregister(e) 239 } 240 241 // Read implements vfs.FileDescriptionImpl.Read. 242 func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { 243 return fd.pipe.Read(ctx, dst) 244 } 245 246 // Write implements vfs.FileDescriptionImpl.Write. 247 func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { 248 return fd.pipe.Write(ctx, src) 249 } 250 251 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 252 func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { 253 return fd.pipe.Ioctl(ctx, uio, args) 254 } 255 256 // PipeSize implements fcntl(F_GETPIPE_SZ). 257 func (fd *VFSPipeFD) PipeSize() int64 { 258 // Inline Pipe.FifoSize() since we don't have a fs.File. 259 fd.pipe.mu.Lock() 260 defer fd.pipe.mu.Unlock() 261 return fd.pipe.max 262 } 263 264 // SetPipeSize implements fcntl(F_SETPIPE_SZ). 265 func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { 266 return fd.pipe.SetFifoSize(size) 267 } 268 269 // SpliceToNonPipe performs a splice operation from fd to a non-pipe file. 270 func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) { 271 fd.pipe.mu.Lock() 272 273 // Cap the sequence at number of bytes actually available. 274 if count > fd.pipe.size { 275 count = fd.pipe.size 276 } 277 src := usermem.IOSequence{ 278 IO: fd, 279 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 280 } 281 282 var ( 283 n int64 284 err error 285 ) 286 if off == -1 { 287 n, err = out.Write(ctx, src, vfs.WriteOptions{}) 288 } else { 289 n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{}) 290 } 291 if n > 0 { 292 fd.pipe.consumeLocked(n) 293 } 294 295 fd.pipe.mu.Unlock() 296 297 if n > 0 { 298 fd.pipe.Notify(waiter.WritableEvents) 299 } 300 return n, err 301 } 302 303 // SpliceFromNonPipe performs a splice operation from a non-pipe file to fd. 304 func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { 305 dst := usermem.IOSequence{ 306 IO: fd, 307 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 308 } 309 310 var ( 311 n int64 312 err error 313 ) 314 fd.pipe.mu.Lock() 315 if off == -1 { 316 n, err = in.Read(ctx, dst, vfs.ReadOptions{}) 317 } else { 318 n, err = in.PRead(ctx, dst, off, vfs.ReadOptions{}) 319 } 320 fd.pipe.mu.Unlock() 321 322 if n > 0 { 323 fd.pipe.Notify(waiter.ReadableEvents) 324 } 325 return n, err 326 } 327 328 // CopyIn implements usermem.IO.CopyIn. Note that it is the caller's 329 // responsibility to call fd.pipe.consumeLocked() and 330 // fd.pipe.Notify(waiter.WritableEvents) after the read is completed. 331 // 332 // Preconditions: fd.pipe.mu must be locked. 333 func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { 334 n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) { 335 return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs) 336 }) 337 return int(n), err 338 } 339 340 // CopyOut implements usermem.IO.CopyOut. Note that it is the caller's 341 // responsibility to call fd.pipe.Notify(waiter.ReadableEvents) after the write 342 // is completed. 343 // 344 // Preconditions: fd.pipe.mu must be locked. 345 func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { 346 n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) { 347 return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) 348 }) 349 return int(n), err 350 } 351 352 // ZeroOut implements usermem.IO.ZeroOut. 353 // 354 // Preconditions: fd.pipe.mu must be locked. 355 func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { 356 n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) { 357 return safemem.ZeroSeq(dsts) 358 }) 359 return n, err 360 } 361 362 // CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's 363 // responsibility to call fd.pipe.consumeLocked() and 364 // fd.pipe.Notify(waiter.WritableEvents) after the read is completed. 365 // 366 // Preconditions: fd.pipe.mu must be locked. 367 func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { 368 return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) { 369 return dst.WriteFromBlocks(srcs) 370 }) 371 } 372 373 // CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's 374 // responsibility to call fd.pipe.Notify(waiter.ReadableEvents) after the write 375 // is completed. 376 // 377 // Preconditions: fd.pipe.mu must be locked. 378 func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { 379 return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) { 380 return src.ReadToBlocks(dsts) 381 }) 382 } 383 384 // SwapUint32 implements usermem.IO.SwapUint32. 385 func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { 386 // How did a pipe get passed as the virtual address space to futex(2)? 387 panic("VFSPipeFD.SwapUint32 called unexpectedly") 388 } 389 390 // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. 391 func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { 392 panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") 393 } 394 395 // LoadUint32 implements usermem.IO.LoadUint32. 396 func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { 397 panic("VFSPipeFD.LoadUint32 called unexpectedly") 398 } 399 400 // Splice reads up to count bytes from src and writes them to dst. It returns 401 // the number of bytes moved. 402 // 403 // Preconditions: count > 0. 404 func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 405 return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */) 406 } 407 408 // Tee reads up to count bytes from src and writes them to dst, without 409 // removing the read bytes from src. It returns the number of bytes copied. 410 // 411 // Preconditions: count > 0. 412 func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 413 return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */) 414 } 415 416 // Preconditions: count > 0. 417 func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { 418 if dst.pipe == src.pipe { 419 return 0, linuxerr.EINVAL 420 } 421 422 lockTwoPipes(dst.pipe, src.pipe) 423 n, err := dst.pipe.writeLocked(count, func(dsts safemem.BlockSeq) (uint64, error) { 424 n, err := src.pipe.peekLocked(int64(dsts.NumBytes()), func(srcs safemem.BlockSeq) (uint64, error) { 425 return safemem.CopySeq(dsts, srcs) 426 }) 427 if n > 0 && removeFromSrc { 428 src.pipe.consumeLocked(n) 429 } 430 return uint64(n), err 431 }) 432 dst.pipe.mu.Unlock() 433 src.pipe.mu.Unlock() 434 435 if n > 0 { 436 dst.pipe.Notify(waiter.ReadableEvents) 437 if removeFromSrc { 438 src.pipe.Notify(waiter.WritableEvents) 439 } 440 } 441 return n, err 442 }