github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/pipe/vfs.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pipe 16 17 import ( 18 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 19 "github.com/nicocha30/gvisor-ligolo/pkg/context" 20 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 21 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 22 "github.com/nicocha30/gvisor-ligolo/pkg/safemem" 23 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch" 24 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 25 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 26 "github.com/nicocha30/gvisor-ligolo/pkg/waiter" 27 ) 28 29 // This file contains types enabling the pipe package to be used with the vfs 30 // package. 31 32 // VFSPipe represents the actual pipe, analogous to an inode. VFSPipes should 33 // not be copied. 34 // 35 // +stateify savable 36 type VFSPipe struct { 37 // pipe is the underlying pipe. 38 pipe Pipe 39 } 40 41 // NewVFSPipe returns an initialized VFSPipe. 42 func NewVFSPipe(isNamed bool, sizeBytes int64) *VFSPipe { 43 var vp VFSPipe 44 initPipe(&vp.pipe, isNamed, sizeBytes) 45 return &vp 46 } 47 48 // ReaderWriterPair returns read-only and write-only FDs for vp. 49 // 50 // Preconditions: statusFlags should not contain an open access mode. 51 func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription, error) { 52 // Connected pipes share the same locks. 53 locks := &vfs.FileLocks{} 54 r, err := vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks) 55 if err != nil { 56 return nil, nil, err 57 } 58 vp.pipe.rOpen() 59 w, err := vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks) 60 if err != nil { 61 r.DecRef(ctx) 62 return nil, nil, err 63 } 64 vp.pipe.wOpen() 65 return r, w, nil 66 } 67 68 // Allocate implements vfs.FileDescriptionImpl.Allocate. 69 func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error { 70 return linuxerr.ESPIPE 71 } 72 73 // Open opens the pipe represented by vp. 74 func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 75 readable := vfs.MayReadFileWithOpenFlags(statusFlags) 76 writable := vfs.MayWriteFileWithOpenFlags(statusFlags) 77 if !readable && !writable { 78 return nil, linuxerr.EINVAL 79 } 80 81 fd, err := vp.newFD(mnt, vfsd, statusFlags, locks) 82 if err != nil { 83 return nil, err 84 } 85 86 // Named pipes have special blocking semantics during open: 87 // 88 // "Normally, opening the FIFO blocks until the other end is opened also. A 89 // process can open a FIFO in nonblocking mode. In this case, opening for 90 // read-only will succeed even if no-one has opened on the write side yet, 91 // opening for write-only will fail with ENXIO (no such device or address) 92 // unless the other end has already been opened. Under Linux, opening a 93 // FIFO for read and write will succeed both in blocking and nonblocking 94 // mode. POSIX leaves this behavior undefined. This can be used to open a 95 // FIFO for writing while there are no readers available." - fifo(7) 96 switch { 97 case readable && writable: 98 vp.pipe.rOpen() 99 vp.pipe.wOpen() 100 // Pipes opened for read-write always succeed without blocking. 101 102 case readable: 103 tWriters := vp.pipe.totalWriters.Load() 104 vp.pipe.rOpen() 105 // If this pipe is being opened as blocking and there's no 106 // writer, we have to wait for a writer to open the other end. 107 for vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && 108 tWriters == vp.pipe.totalWriters.Load() { 109 if !ctx.BlockOn((*waitWriters)(&vp.pipe), waiter.EventInternal) { 110 fd.DecRef(ctx) 111 return nil, linuxerr.EINTR 112 } 113 } 114 115 case writable: 116 tReaders := vp.pipe.totalReaders.Load() 117 vp.pipe.wOpen() 118 for vp.pipe.isNamed && !vp.pipe.HasReaders() && 119 tReaders == vp.pipe.totalReaders.Load() { 120 // Non-blocking, write-only opens fail with ENXIO when the read 121 // side isn't open yet. 122 if statusFlags&linux.O_NONBLOCK != 0 { 123 fd.DecRef(ctx) 124 return nil, linuxerr.ENXIO 125 } 126 if !ctx.BlockOn((*waitReaders)(&vp.pipe), waiter.EventInternal) { 127 fd.DecRef(ctx) 128 return nil, linuxerr.EINTR 129 } 130 } 131 132 default: 133 panic("invalid pipe flags: must be readable, writable, or both") 134 } 135 136 return fd, nil 137 } 138 139 // Preconditions: vp.mu must be held. 140 func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) { 141 fd := &VFSPipeFD{ 142 pipe: &vp.pipe, 143 } 144 fd.LockFD.Init(locks) 145 if err := fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{ 146 DenyPRead: true, 147 DenyPWrite: true, 148 UseDentryMetadata: true, 149 }); err != nil { 150 return nil, err 151 } 152 153 return &fd.vfsfd, nil 154 } 155 156 // VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements 157 // non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to 158 // other FileDescriptions for splice(2) and tee(2). 159 // 160 // +stateify savable 161 type VFSPipeFD struct { 162 vfsfd vfs.FileDescription 163 vfs.FileDescriptionDefaultImpl 164 vfs.DentryMetadataFileDescriptionImpl 165 vfs.LockFD 166 167 pipe *Pipe 168 } 169 170 // Release implements vfs.FileDescriptionImpl.Release. 171 func (fd *VFSPipeFD) Release(context.Context) { 172 var event waiter.EventMask 173 if fd.vfsfd.IsReadable() { 174 fd.pipe.rClose() 175 event |= waiter.WritableEvents 176 } 177 if fd.vfsfd.IsWritable() { 178 fd.pipe.wClose() 179 event |= waiter.ReadableEvents | waiter.EventHUp 180 } 181 if event == 0 { 182 panic("invalid pipe flags: must be readable, writable, or both") 183 } 184 185 fd.pipe.queue.Notify(event) 186 } 187 188 // Readiness implements waiter.Waitable.Readiness. 189 func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask { 190 switch { 191 case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable(): 192 return fd.pipe.rwReadiness() 193 case fd.vfsfd.IsReadable(): 194 return fd.pipe.rReadiness() 195 case fd.vfsfd.IsWritable(): 196 return fd.pipe.wReadiness() 197 default: 198 panic("pipe FD is neither readable nor writable") 199 } 200 } 201 202 // Allocate implements vfs.FileDescriptionImpl.Allocate. 203 func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 204 return linuxerr.ESPIPE 205 } 206 207 // EventRegister implements waiter.Waitable.EventRegister. 208 func (fd *VFSPipeFD) EventRegister(e *waiter.Entry) error { 209 fd.pipe.EventRegister(e) 210 211 // Notify synchronously. 212 e.NotifyEvent(fd.Readiness(^waiter.EventMask(0))) 213 return nil 214 } 215 216 // EventUnregister implements waiter.Waitable.EventUnregister. 217 func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) { 218 fd.pipe.EventUnregister(e) 219 } 220 221 // Epollable implements FileDescriptionImpl.Epollable. 222 func (fd *VFSPipeFD) Epollable() bool { 223 return true 224 } 225 226 // Read implements vfs.FileDescriptionImpl.Read. 227 func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) { 228 return fd.pipe.Read(ctx, dst) 229 } 230 231 // Write implements vfs.FileDescriptionImpl.Write. 232 func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) { 233 return fd.pipe.Write(ctx, src) 234 } 235 236 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 237 func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 238 return fd.pipe.Ioctl(ctx, uio, sysno, args) 239 } 240 241 // PipeSize implements fcntl(F_GETPIPE_SZ). 242 func (fd *VFSPipeFD) PipeSize() int64 { 243 // Inline Pipe.FifoSize() since we don't have a fs.File. 244 fd.pipe.mu.Lock() 245 defer fd.pipe.mu.Unlock() 246 return fd.pipe.max 247 } 248 249 // SetPipeSize implements fcntl(F_SETPIPE_SZ). 250 func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) { 251 return fd.pipe.SetFifoSize(size) 252 } 253 254 // SpliceToNonPipe performs a splice operation from fd to a non-pipe file. 255 func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) { 256 fd.pipe.mu.Lock() 257 258 // Cap the sequence at number of bytes actually available. 259 if count > fd.pipe.size { 260 count = fd.pipe.size 261 } 262 src := usermem.IOSequence{ 263 IO: fd, 264 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 265 } 266 267 var ( 268 n int64 269 err error 270 ) 271 if off == -1 { 272 n, err = out.Write(ctx, src, vfs.WriteOptions{}) 273 } else { 274 n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{}) 275 } 276 if n > 0 { 277 fd.pipe.consumeLocked(n) 278 } 279 280 fd.pipe.mu.Unlock() 281 282 if n > 0 { 283 fd.pipe.queue.Notify(waiter.WritableEvents) 284 } 285 return n, err 286 } 287 288 // SpliceFromNonPipe performs a splice operation from a non-pipe file to fd. 289 func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) { 290 dst := usermem.IOSequence{ 291 IO: fd, 292 Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}), 293 } 294 295 var ( 296 n int64 297 err error 298 ) 299 fd.pipe.mu.Lock() 300 if off == -1 { 301 n, err = in.Read(ctx, dst, vfs.ReadOptions{}) 302 } else { 303 n, err = in.PRead(ctx, dst, off, vfs.ReadOptions{}) 304 } 305 fd.pipe.mu.Unlock() 306 307 if n > 0 { 308 fd.pipe.queue.Notify(waiter.ReadableEvents) 309 } 310 return n, err 311 } 312 313 // CopyIn implements usermem.IO.CopyIn. Note that it is the caller's 314 // responsibility to call fd.pipe.consumeLocked() and 315 // fd.pipe.Notify(waiter.WritableEvents) after the read is completed. 316 // 317 // Preconditions: fd.pipe.mu must be locked. 318 func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) { 319 n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) { 320 return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs) 321 }) 322 return int(n), err 323 } 324 325 // CopyOut implements usermem.IO.CopyOut. Note that it is the caller's 326 // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the 327 // write is completed. 328 // 329 // Preconditions: fd.pipe.mu must be locked. 330 func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) { 331 n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) { 332 return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src))) 333 }) 334 return int(n), err 335 } 336 337 // ZeroOut implements usermem.IO.ZeroOut. 338 // 339 // Preconditions: fd.pipe.mu must be locked. 340 func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) { 341 n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) { 342 return safemem.ZeroSeq(dsts) 343 }) 344 return n, err 345 } 346 347 // CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's 348 // responsibility to call fd.pipe.consumeLocked() and 349 // fd.pipe.queue.Notify(waiter.WritableEvents) after the read is completed. 350 // 351 // Preconditions: fd.pipe.mu must be locked. 352 func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) { 353 return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) { 354 return dst.WriteFromBlocks(srcs) 355 }) 356 } 357 358 // CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's 359 // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the 360 // write is completed. 361 // 362 // Preconditions: fd.pipe.mu must be locked. 363 func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) { 364 return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) { 365 return src.ReadToBlocks(dsts) 366 }) 367 } 368 369 // SwapUint32 implements usermem.IO.SwapUint32. 370 func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) { 371 // How did a pipe get passed as the virtual address space to futex(2)? 372 panic("VFSPipeFD.SwapUint32 called unexpectedly") 373 } 374 375 // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32. 376 func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) { 377 panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly") 378 } 379 380 // LoadUint32 implements usermem.IO.LoadUint32. 381 func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) { 382 panic("VFSPipeFD.LoadUint32 called unexpectedly") 383 } 384 385 // Splice reads up to count bytes from src and writes them to dst. It returns 386 // the number of bytes moved. 387 // 388 // Preconditions: count > 0. 389 func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 390 return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */) 391 } 392 393 // Tee reads up to count bytes from src and writes them to dst, without 394 // removing the read bytes from src. It returns the number of bytes copied. 395 // 396 // Preconditions: count > 0. 397 func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) { 398 return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */) 399 } 400 401 // Preconditions: count > 0. 402 func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) { 403 if dst.pipe == src.pipe { 404 return 0, linuxerr.EINVAL 405 } 406 407 firstLocked, secondLocked := lockTwoPipes(dst.pipe, src.pipe) 408 n, err := dst.pipe.writeLocked(count, func(dsts safemem.BlockSeq) (uint64, error) { 409 n, err := src.pipe.peekLocked(int64(dsts.NumBytes()), func(srcs safemem.BlockSeq) (uint64, error) { 410 return safemem.CopySeq(dsts, srcs) 411 }) 412 if n > 0 && removeFromSrc { 413 src.pipe.consumeLocked(n) 414 } 415 return uint64(n), err 416 }) 417 secondLocked.mu.NestedUnlock(pipeLockPipe) 418 firstLocked.mu.Unlock() 419 420 if n > 0 { 421 dst.pipe.queue.Notify(waiter.ReadableEvents) 422 if removeFromSrc { 423 src.pipe.queue.Notify(waiter.WritableEvents) 424 } 425 } 426 return n, err 427 }