gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/kernel/pipe/vfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pipe 16 17 import ( 18 "gvisor.dev/gvisor/pkg/abi/linux" 19 "gvisor.dev/gvisor/pkg/context" 20 "gvisor.dev/gvisor/pkg/errors/linuxerr" 21 "gvisor.dev/gvisor/pkg/hostarch" 22 "gvisor.dev/gvisor/pkg/log" 23 "gvisor.dev/gvisor/pkg/safemem" 24 "gvisor.dev/gvisor/pkg/sentry/arch" 25 "gvisor.dev/gvisor/pkg/sentry/vfs" 26 "gvisor.dev/gvisor/pkg/usermem" 27 "gvisor.dev/gvisor/pkg/waiter" 28 ) 29 30 // This file contains types enabling the pipe package to be used with the vfs 31 // package. 32 33 // VFSPipe represents the actual pipe, analogous to an inode. VFSPipes should 34 // not be copied. 35 // 36 // +stateify savable 37 type VFSPipe struct { 38 // pipe is the underlying pipe. 39 pipe Pipe 40 } 41 42 // NewVFSPipe returns an initialized VFSPipe. 43 func NewVFSPipe(isNamed bool, sizeBytes int64) *VFSPipe { 44 var vp VFSPipe 45 initPipe(&vp.pipe, isNamed, sizeBytes) 46 return &vp 47 } 48 49 // ReaderWriterPair returns read-only and write-only FDs for vp. 50 // 51 // Preconditions: statusFlags should not contain an open access mode. 52 func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription, error) { 53 // Connected pipes share the same locks. 54 locks := &vfs.FileLocks{} 55 r, err := vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks) 56 if err != nil { 57 return nil, nil, err 58 } 59 vp.pipe.rOpen() 60 w, err := vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) 61 if err != nil { 62 r.DecRef(ctx) 63 return nil, nil, err 64 } 65 vp.pipe.wOpen() 66 return r, w, nil 67 } 68 69 // Allocate implements vfs.FileDescriptionImpl.Allocate. 70 func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error { 71 return linuxerr.ESPIPE 72 } 73 74 // Open opens the pipe represented by vp. 75 func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 76 readable := vfs.MayReadFileWithOpenFlags(statusFlags) 77 writable := vfs.MayWriteFileWithOpenFlags(statusFlags) 78 if !readable && !writable { 79 return nil, linuxerr.EINVAL 80 } 81 82 fd, err := vp.newFD(mnt, vfsd, statusFlags, locks) 83 if err != nil { 84 return nil, err 85 } 86 87 // Named pipes have special blocking semantics during open: 88 // 89 // "Normally, opening the FIFO blocks until the other end is opened also. A 90 // process can open a FIFO in nonblocking mode. In this case, opening for 91 // read-only will succeed even if no-one has opened on the write side yet, 92 // opening for write-only will fail with ENXIO (no such device or address) 93 // unless the other end has already been opened. Under Linux, opening a 94 // FIFO for read and write will succeed both in blocking and nonblocking 95 // mode. POSIX leaves this behavior undefined. This can be used to open a 96 // FIFO for writing while there are no readers available." - fifo(7) 97 switch { 98 case readable && writable: 99 vp.pipe.rOpen() 100 vp.pipe.wOpen() 101 // Pipes opened for read-write always succeed without blocking. 102 103 case readable: 104 tWriters := vp.pipe.totalWriters.Load() 105 vp.pipe.rOpen() 106 // If this pipe is being opened as blocking and there's no 107 // writer, we have to wait for a writer to open the other end. 108 for vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && 109 tWriters == vp.pipe.totalWriters.Load() { 110 if !ctx.BlockOn((*waitWriters)(&vp.pipe), waiter.EventInternal) { 111 fd.DecRef(ctx) 112 return nil, linuxerr.EINTR 113 } 114 } 115 116 case writable: 117 tReaders := vp.pipe.totalReaders.Load() 118 vp.pipe.wOpen() 119 for vp.pipe.isNamed && !vp.pipe.HasReaders() && 120 tReaders == vp.pipe.totalReaders.Load() { 121 // Non-blocking, write-only opens fail with ENXIO when the read 122 // side isn't open yet. 123 if statusFlags&linux.O_NONBLOCK != 0 { 124 fd.DecRef(ctx) 125 return nil, linuxerr.ENXIO 126 } 127 if !ctx.BlockOn((*waitReaders)(&vp.pipe), waiter.EventInternal) { 128 fd.DecRef(ctx) 129 return nil, linuxerr.EINTR 130 } 131 } 132 133 default: 134 panic("invalid pipe flags: must be readable, writable, or both") 135 } 136 137 return fd, nil 138 } 139 140 // Preconditions: vp.mu must be held. 141 func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 142 fd := &VFSPipeFD{ 143 pipe: &vp.pipe, 144 } 145 fd.LockFD.Init(locks) 146 if err := fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ 147 DenyPRead: true, 148 DenyPWrite: true, 149 UseDentryMetadata: true, 150 }); err != nil { 151 return nil, err 152 } 153 154 return &fd.vfsfd, nil 155 } 156 157 // VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements 158 // non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to 159 // other FileDescriptions for splice(2) and tee(2). 160 // 161 // +stateify savable 162 type VFSPipeFD struct { 163 vfsfd vfs.FileDescription 164 vfs.FileDescriptionDefaultImpl 165 vfs.DentryMetadataFileDescriptionImpl 166 vfs.LockFD 167 168 pipe *Pipe 169 170 // lastAddr is the last hostarch.Addr at which a call to a 171 // VFSPipeFD.(usermem.IO) method ended. lastAddr is protected by pipe.mu. 172 lastAddr hostarch.Addr 173 } 174 175 // Release implements vfs.FileDescriptionImpl.Release. 176 func (fd *VFSPipeFD) Release(context.Context) { 177 var event waiter.EventMask 178 if fd.vfsfd.IsReadable() { 179 fd.pipe.rClose() 180 event |= waiter.WritableEvents 181 if !fd.pipe.HasReaders() { 182 event |= waiter.EventErr 183 } 184 } 185 if fd.vfsfd.IsWritable() { 186 fd.pipe.wClose() 187 event |= waiter.ReadableEvents | waiter.EventHUp 188 } 189 if event == 0 { 190 panic("invalid pipe flags: must be readable, writable, or both") 191 } 192 193 fd.pipe.queue.Notify(event) 194 } 195 196 // Readiness implements waiter.Waitable.Readiness. 197 func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { 198 switch { 199 case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): 200 return fd.pipe.rwReadiness() 201 case fd.vfsfd.IsReadable(): 202 return fd.pipe.rReadiness() 203 case fd.vfsfd.IsWritable(): 204 return fd.pipe.wReadiness() 205 default: 206 panic("pipe FD is neither readable nor writable") 207 } 208 } 209 210 // Allocate implements vfs.FileDescriptionImpl.Allocate. 211 func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 212 return linuxerr.ESPIPE 213 } 214 215 // EventRegister implements waiter.Waitable.EventRegister. 216 func (fd *VFSPipeFD) EventRegister(e *waiter.Entry) error { 217 fd.pipe.EventRegister(e) 218 219 // Notify synchronously. 220 e.NotifyEvent(fd.Readiness(^waiter.EventMask(0))) 221 return nil 222 } 223 224 // EventUnregister implements waiter.Waitable.EventUnregister. 225 func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) { 226 fd.pipe.EventUnregister(e) 227 } 228 229 // Epollable implements FileDescriptionImpl.Epollable. 230 func (fd *VFSPipeFD) Epollable() bool { 231 return true 232 } 233 234 // Read implements vfs.FileDescriptionImpl.Read. 235 func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { 236 return fd.pipe.Read(ctx, dst) 237 } 238 239 // Write implements vfs.FileDescriptionImpl.Write. 240 func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { 241 return fd.pipe.Write(ctx, src) 242 } 243 244 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 245 func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 246 return fd.pipe.Ioctl(ctx, uio, sysno, args) 247 } 248 249 // PipeSize implements fcntl(F_GETPIPE_SZ). 250 func (fd *VFSPipeFD) PipeSize() int64 { 251 // Inline Pipe.FifoSize() since we don't have a fs.File. 252 fd.pipe.mu.Lock() 253 defer fd.pipe.mu.Unlock() 254 return fd.pipe.max 255 } 256 257 // SetPipeSize implements fcntl(F_SETPIPE_SZ). 258 func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { 259 return fd.pipe.SetFifoSize(size) 260 } 261 262 // SpliceToNonPipe performs a splice operation from fd to a non-pipe file. 263 func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) { 264 fd.pipe.mu.Lock() 265 266 // Cap the sequence at number of bytes actually available. 267 if count > fd.pipe.size { 268 count = fd.pipe.size 269 } 270 src := usermem.IOSequence{ 271 IO: fd, 272 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 273 } 274 275 var ( 276 n int64 277 err error 278 ) 279 fd.lastAddr = 0 280 if off == -1 { 281 n, err = out.Write(ctx, src, vfs.WriteOptions{}) 282 } else { 283 n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{}) 284 } 285 // Implementations of out.[P]Write() that ignore written data (e.g. 286 // /dev/null) may skip calling src.CopyIn[To](), so: 287 // 288 // - We must call Pipe.consumeLocked() here rather than in fd.CopyIn[To](). 289 // 290 // - We must check if Pipe.peekLocked() would have returned ErrWouldBlock. 291 fd.pipe.consumeLocked(n) 292 if n == 0 && err == nil && fd.pipe.size == 0 && fd.pipe.HasWriters() { 293 err = linuxerr.ErrWouldBlock 294 } 295 296 fd.pipe.mu.Unlock() 297 298 if n > 0 { 299 fd.pipe.queue.Notify(waiter.WritableEvents) 300 } 301 return n, err 302 } 303 304 // SpliceFromNonPipe performs a splice operation from a non-pipe file to fd. 305 func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { 306 dst := usermem.IOSequence{ 307 IO: fd, 308 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 309 } 310 311 var ( 312 n int64 313 err error 314 ) 315 fd.pipe.mu.Lock() 316 fd.lastAddr = 0 317 if off == -1 { 318 n, err = in.Read(ctx, dst, vfs.ReadOptions{}) 319 } else { 320 n, err = in.PRead(ctx, dst, off, vfs.ReadOptions{}) 321 } 322 fd.pipe.mu.Unlock() 323 324 if n > 0 { 325 fd.pipe.queue.Notify(waiter.ReadableEvents) 326 } 327 return n, err 328 } 329 330 // CopyIn implements usermem.IO.CopyIn. Note that it is the caller's 331 // responsibility to call fd.pipe.Notify(waiter.WritableEvents) after the read 332 // is completed. 333 // 334 // Preconditions: fd.pipe.mu must be locked. 335 func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { 336 if addr != fd.lastAddr { 337 log.Traceback("Non-sequential VFSPipeFD.CopyIn: lastAddr=%#x addr=%#x", fd.lastAddr, addr) 338 return 0, linuxerr.EINVAL 339 } 340 n, err := fd.pipe.peekLocked(int64(addr), int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) { 341 return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs) 342 }) 343 fd.lastAddr = addr + hostarch.Addr(n) 344 return int(n), err 345 } 346 347 // CopyOut implements usermem.IO.CopyOut. Note that it is the caller's 348 // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the 349 // write is completed. 350 // 351 // Preconditions: fd.pipe.mu must be locked. 352 func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { 353 if addr != fd.lastAddr { 354 log.Traceback("Non-sequential VFSPipeFD.CopyOut: lastAddr=%#x addr=%#x", fd.lastAddr, addr) 355 return 0, linuxerr.EINVAL 356 } 357 n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) { 358 return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) 359 }) 360 fd.lastAddr = addr + hostarch.Addr(n) 361 return int(n), err 362 } 363 364 // ZeroOut implements usermem.IO.ZeroOut. 365 // 366 // Preconditions: fd.pipe.mu must be locked. 367 func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { 368 if addr != fd.lastAddr { 369 log.Traceback("Non-sequential VFSPipeFD.ZeroOut: lastAddr=%#x addr=%#x", fd.lastAddr, addr) 370 return 0, linuxerr.EINVAL 371 } 372 n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) { 373 return safemem.ZeroSeq(dsts) 374 }) 375 fd.lastAddr = addr + hostarch.Addr(n) 376 return n, err 377 } 378 379 // CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's 380 // responsibility to call fd.pipe.consumeLocked() and 381 // fd.pipe.queue.Notify(waiter.WritableEvents) after the read is completed. 382 // 383 // Preconditions: fd.pipe.mu must be locked. 384 func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { 385 total := int64(0) 386 for !ars.IsEmpty() { 387 ar := ars.Head() 388 if ar.Start != fd.lastAddr { 389 log.Traceback("Non-sequential VFSPipeFD.CopyInTo: lastAddr=%#x addr=%#x", fd.lastAddr, ar.Start) 390 return total, linuxerr.EINVAL 391 } 392 n, err := fd.pipe.peekLocked(int64(ar.Start), int64(ar.Length()), func(srcs safemem.BlockSeq) (uint64, error) { 393 return dst.WriteFromBlocks(srcs) 394 }) 395 fd.lastAddr = ar.Start + hostarch.Addr(n) 396 total += n 397 if err != nil { 398 return total, err 399 } 400 ars = ars.Tail() 401 } 402 return total, nil 403 } 404 405 // CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's 406 // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the 407 // write is completed. 408 // 409 // Preconditions: fd.pipe.mu must be locked. 410 func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { 411 total := int64(0) 412 for !ars.IsEmpty() { 413 ar := ars.Head() 414 if ar.Start != fd.lastAddr { 415 log.Traceback("Non-sequential VFSPipeFD.CopyOutFrom: lastAddr=%#x addr=%#x", fd.lastAddr, ar.Start) 416 return total, linuxerr.EINVAL 417 } 418 n, err := fd.pipe.writeLocked(int64(ar.Length()), func(dsts safemem.BlockSeq) (uint64, error) { 419 return src.ReadToBlocks(dsts) 420 }) 421 fd.lastAddr = ar.Start + hostarch.Addr(n) 422 total += n 423 if err != nil { 424 return total, err 425 } 426 ars = ars.Tail() 427 } 428 return total, nil 429 } 430 431 // SwapUint32 implements usermem.IO.SwapUint32. 432 func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { 433 // How did a pipe get passed as the virtual address space to futex(2)? 434 panic("VFSPipeFD.SwapUint32 called unexpectedly") 435 } 436 437 // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. 438 func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { 439 panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") 440 } 441 442 // LoadUint32 implements usermem.IO.LoadUint32. 443 func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { 444 panic("VFSPipeFD.LoadUint32 called unexpectedly") 445 } 446 447 // Splice reads up to count bytes from src and writes them to dst. It returns 448 // the number of bytes moved. 449 // 450 // Preconditions: count > 0. 451 func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 452 return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */) 453 } 454 455 // Tee reads up to count bytes from src and writes them to dst, without 456 // removing the read bytes from src. It returns the number of bytes copied. 457 // 458 // Preconditions: count > 0. 459 func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 460 return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */) 461 } 462 463 // Preconditions: count > 0. 464 func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { 465 if dst.pipe == src.pipe { 466 return 0, linuxerr.EINVAL 467 } 468 469 firstLocked, secondLocked := lockTwoPipes(dst.pipe, src.pipe) 470 n, err := dst.pipe.writeLocked(count, func(dsts safemem.BlockSeq) (uint64, error) { 471 n, err := src.pipe.peekLocked(0, int64(dsts.NumBytes()), func(srcs safemem.BlockSeq) (uint64, error) { 472 return safemem.CopySeq(dsts, srcs) 473 }) 474 if n > 0 && removeFromSrc { 475 src.pipe.consumeLocked(n) 476 } 477 return uint64(n), err 478 }) 479 secondLocked.mu.NestedUnlock(pipeLockPipe) 480 firstLocked.mu.Unlock() 481 482 if n > 0 { 483 dst.pipe.queue.Notify(waiter.ReadableEvents) 484 if removeFromSrc { 485 src.pipe.queue.Notify(waiter.WritableEvents) 486 } 487 } 488 return n, err 489 }