github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/kernel/pipe/vfs.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipe
    16  
    17  import (
    18  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    19  	"github.com/MerlinKodo/gvisor/pkg/context"
    20  	"github.com/MerlinKodo/gvisor/pkg/errors/linuxerr"
    21  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    22  	"github.com/MerlinKodo/gvisor/pkg/safemem"
    23  	"github.com/MerlinKodo/gvisor/pkg/sentry/arch"
    24  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    25  	"github.com/MerlinKodo/gvisor/pkg/usermem"
    26  	"github.com/MerlinKodo/gvisor/pkg/waiter"
    27  )
    28  
    29  // This file contains types enabling the pipe package to be used with the vfs
    30  // package.
    31  
    32  // VFSPipe represents the actual pipe, analogous to an inode. VFSPipes should
    33  // not be copied.
    34  //
    35  // +stateify savable
    36  type VFSPipe struct {
    37  	// pipe is the underlying pipe.
    38  	pipe Pipe
    39  }
    40  
    41  // NewVFSPipe returns an initialized VFSPipe.
    42  func NewVFSPipe(isNamed bool, sizeBytes int64) *VFSPipe {
    43  	var vp VFSPipe
    44  	initPipe(&vp.pipe, isNamed, sizeBytes)
    45  	return &vp
    46  }
    47  
    48  // ReaderWriterPair returns read-only and write-only FDs for vp.
    49  //
    50  // Preconditions: statusFlags should not contain an open access mode.
    51  func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription, error) {
    52  	// Connected pipes share the same locks.
    53  	locks := &vfs.FileLocks{}
    54  	r, err := vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks)
    55  	if err != nil {
    56  		return nil, nil, err
    57  	}
    58  	vp.pipe.rOpen()
    59  	w, err := vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks)
    60  	if err != nil {
    61  		r.DecRef(ctx)
    62  		return nil, nil, err
    63  	}
    64  	vp.pipe.wOpen()
    65  	return r, w, nil
    66  }
    67  
    68  // Allocate implements vfs.FileDescriptionImpl.Allocate.
    69  func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error {
    70  	return linuxerr.ESPIPE
    71  }
    72  
    73  // Open opens the pipe represented by vp.
    74  func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
    75  	readable := vfs.MayReadFileWithOpenFlags(statusFlags)
    76  	writable := vfs.MayWriteFileWithOpenFlags(statusFlags)
    77  	if !readable && !writable {
    78  		return nil, linuxerr.EINVAL
    79  	}
    80  
    81  	fd, err := vp.newFD(mnt, vfsd, statusFlags, locks)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	// Named pipes have special blocking semantics during open:
    87  	//
    88  	// "Normally, opening the FIFO blocks until the other end is opened also. A
    89  	// process can open a FIFO in nonblocking mode. In this case, opening for
    90  	// read-only will succeed even if no-one has opened on the write side yet,
    91  	// opening for write-only will fail with ENXIO (no such device or address)
    92  	// unless the other end has already been opened. Under Linux, opening a
    93  	// FIFO for read and write will succeed both in blocking and nonblocking
    94  	// mode. POSIX leaves this behavior undefined. This can be used to open a
    95  	// FIFO for writing while there are no readers available." - fifo(7)
    96  	switch {
    97  	case readable && writable:
    98  		vp.pipe.rOpen()
    99  		vp.pipe.wOpen()
   100  		// Pipes opened for read-write always succeed without blocking.
   101  
   102  	case readable:
   103  		tWriters := vp.pipe.totalWriters.Load()
   104  		vp.pipe.rOpen()
   105  		// If this pipe is being opened as blocking and there's no
   106  		// writer, we have to wait for a writer to open the other end.
   107  		for vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() &&
   108  			tWriters == vp.pipe.totalWriters.Load() {
   109  			if !ctx.BlockOn((*waitWriters)(&vp.pipe), waiter.EventInternal) {
   110  				fd.DecRef(ctx)
   111  				return nil, linuxerr.EINTR
   112  			}
   113  		}
   114  
   115  	case writable:
   116  		tReaders := vp.pipe.totalReaders.Load()
   117  		vp.pipe.wOpen()
   118  		for vp.pipe.isNamed && !vp.pipe.HasReaders() &&
   119  			tReaders == vp.pipe.totalReaders.Load() {
   120  			// Non-blocking, write-only opens fail with ENXIO when the read
   121  			// side isn't open yet.
   122  			if statusFlags&linux.O_NONBLOCK != 0 {
   123  				fd.DecRef(ctx)
   124  				return nil, linuxerr.ENXIO
   125  			}
   126  			if !ctx.BlockOn((*waitReaders)(&vp.pipe), waiter.EventInternal) {
   127  				fd.DecRef(ctx)
   128  				return nil, linuxerr.EINTR
   129  			}
   130  		}
   131  
   132  	default:
   133  		panic("invalid pipe flags: must be readable, writable, or both")
   134  	}
   135  
   136  	return fd, nil
   137  }
   138  
   139  // Preconditions: vp.mu must be held.
   140  func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
   141  	fd := &VFSPipeFD{
   142  		pipe: &vp.pipe,
   143  	}
   144  	fd.LockFD.Init(locks)
   145  	if err := fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{
   146  		DenyPRead:         true,
   147  		DenyPWrite:        true,
   148  		UseDentryMetadata: true,
   149  	}); err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	return &fd.vfsfd, nil
   154  }
   155  
   156  // VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements
   157  // non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to
   158  // other FileDescriptions for splice(2) and tee(2).
   159  //
   160  // +stateify savable
   161  type VFSPipeFD struct {
   162  	vfsfd vfs.FileDescription
   163  	vfs.FileDescriptionDefaultImpl
   164  	vfs.DentryMetadataFileDescriptionImpl
   165  	vfs.LockFD
   166  
   167  	pipe *Pipe
   168  }
   169  
   170  // Release implements vfs.FileDescriptionImpl.Release.
   171  func (fd *VFSPipeFD) Release(context.Context) {
   172  	var event waiter.EventMask
   173  	if fd.vfsfd.IsReadable() {
   174  		fd.pipe.rClose()
   175  		event |= waiter.WritableEvents
   176  	}
   177  	if fd.vfsfd.IsWritable() {
   178  		fd.pipe.wClose()
   179  		event |= waiter.ReadableEvents | waiter.EventHUp
   180  	}
   181  	if event == 0 {
   182  		panic("invalid pipe flags: must be readable, writable, or both")
   183  	}
   184  
   185  	fd.pipe.queue.Notify(event)
   186  }
   187  
   188  // Readiness implements waiter.Waitable.Readiness.
   189  func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   190  	switch {
   191  	case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable():
   192  		return fd.pipe.rwReadiness()
   193  	case fd.vfsfd.IsReadable():
   194  		return fd.pipe.rReadiness()
   195  	case fd.vfsfd.IsWritable():
   196  		return fd.pipe.wReadiness()
   197  	default:
   198  		panic("pipe FD is neither readable nor writable")
   199  	}
   200  }
   201  
   202  // Allocate implements vfs.FileDescriptionImpl.Allocate.
   203  func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   204  	return linuxerr.ESPIPE
   205  }
   206  
   207  // EventRegister implements waiter.Waitable.EventRegister.
   208  func (fd *VFSPipeFD) EventRegister(e *waiter.Entry) error {
   209  	fd.pipe.EventRegister(e)
   210  
   211  	// Notify synchronously.
   212  	e.NotifyEvent(fd.Readiness(^waiter.EventMask(0)))
   213  	return nil
   214  }
   215  
   216  // EventUnregister implements waiter.Waitable.EventUnregister.
   217  func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) {
   218  	fd.pipe.EventUnregister(e)
   219  }
   220  
   221  // Epollable implements FileDescriptionImpl.Epollable.
   222  func (fd *VFSPipeFD) Epollable() bool {
   223  	return true
   224  }
   225  
   226  // Read implements vfs.FileDescriptionImpl.Read.
   227  func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
   228  	return fd.pipe.Read(ctx, dst)
   229  }
   230  
   231  // Write implements vfs.FileDescriptionImpl.Write.
   232  func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
   233  	return fd.pipe.Write(ctx, src)
   234  }
   235  
   236  // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
   237  func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) {
   238  	return fd.pipe.Ioctl(ctx, uio, sysno, args)
   239  }
   240  
   241  // PipeSize implements fcntl(F_GETPIPE_SZ).
   242  func (fd *VFSPipeFD) PipeSize() int64 {
   243  	// Inline Pipe.FifoSize() since we don't have a fs.File.
   244  	fd.pipe.mu.Lock()
   245  	defer fd.pipe.mu.Unlock()
   246  	return fd.pipe.max
   247  }
   248  
   249  // SetPipeSize implements fcntl(F_SETPIPE_SZ).
   250  func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) {
   251  	return fd.pipe.SetFifoSize(size)
   252  }
   253  
   254  // SpliceToNonPipe performs a splice operation from fd to a non-pipe file.
   255  func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) {
   256  	fd.pipe.mu.Lock()
   257  
   258  	// Cap the sequence at number of bytes actually available.
   259  	if count > fd.pipe.size {
   260  		count = fd.pipe.size
   261  	}
   262  	src := usermem.IOSequence{
   263  		IO:    fd,
   264  		Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
   265  	}
   266  
   267  	var (
   268  		n   int64
   269  		err error
   270  	)
   271  	if off == -1 {
   272  		n, err = out.Write(ctx, src, vfs.WriteOptions{})
   273  	} else {
   274  		n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{})
   275  	}
   276  	if n > 0 {
   277  		fd.pipe.consumeLocked(n)
   278  	}
   279  
   280  	fd.pipe.mu.Unlock()
   281  
   282  	if n > 0 {
   283  		fd.pipe.queue.Notify(waiter.WritableEvents)
   284  	}
   285  	return n, err
   286  }
   287  
   288  // SpliceFromNonPipe performs a splice operation from a non-pipe file to fd.
   289  func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) {
   290  	dst := usermem.IOSequence{
   291  		IO:    fd,
   292  		Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
   293  	}
   294  
   295  	var (
   296  		n   int64
   297  		err error
   298  	)
   299  	fd.pipe.mu.Lock()
   300  	if off == -1 {
   301  		n, err = in.Read(ctx, dst, vfs.ReadOptions{})
   302  	} else {
   303  		n, err = in.PRead(ctx, dst, off, vfs.ReadOptions{})
   304  	}
   305  	fd.pipe.mu.Unlock()
   306  
   307  	if n > 0 {
   308  		fd.pipe.queue.Notify(waiter.ReadableEvents)
   309  	}
   310  	return n, err
   311  }
   312  
   313  // CopyIn implements usermem.IO.CopyIn. Note that it is the caller's
   314  // responsibility to call fd.pipe.consumeLocked() and
   315  // fd.pipe.Notify(waiter.WritableEvents) after the read is completed.
   316  //
   317  // Preconditions: fd.pipe.mu must be locked.
   318  func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
   319  	n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) {
   320  		return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs)
   321  	})
   322  	return int(n), err
   323  }
   324  
   325  // CopyOut implements usermem.IO.CopyOut. Note that it is the caller's
   326  // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the
   327  // write is completed.
   328  //
   329  // Preconditions: fd.pipe.mu must be locked.
   330  func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
   331  	n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) {
   332  		return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
   333  	})
   334  	return int(n), err
   335  }
   336  
   337  // ZeroOut implements usermem.IO.ZeroOut.
   338  //
   339  // Preconditions: fd.pipe.mu must be locked.
   340  func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
   341  	n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) {
   342  		return safemem.ZeroSeq(dsts)
   343  	})
   344  	return n, err
   345  }
   346  
   347  // CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's
   348  // responsibility to call fd.pipe.consumeLocked() and
   349  // fd.pipe.queue.Notify(waiter.WritableEvents) after the read is completed.
   350  //
   351  // Preconditions: fd.pipe.mu must be locked.
   352  func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
   353  	return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) {
   354  		return dst.WriteFromBlocks(srcs)
   355  	})
   356  }
   357  
   358  // CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's
   359  // responsibility to call fd.pipe.queue.Notify(waiter.ReadableEvents) after the
   360  // write is completed.
   361  //
   362  // Preconditions: fd.pipe.mu must be locked.
   363  func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
   364  	return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) {
   365  		return src.ReadToBlocks(dsts)
   366  	})
   367  }
   368  
   369  // SwapUint32 implements usermem.IO.SwapUint32.
   370  func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
   371  	// How did a pipe get passed as the virtual address space to futex(2)?
   372  	panic("VFSPipeFD.SwapUint32 called unexpectedly")
   373  }
   374  
   375  // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
   376  func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
   377  	panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly")
   378  }
   379  
   380  // LoadUint32 implements usermem.IO.LoadUint32.
   381  func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
   382  	panic("VFSPipeFD.LoadUint32 called unexpectedly")
   383  }
   384  
   385  // Splice reads up to count bytes from src and writes them to dst. It returns
   386  // the number of bytes moved.
   387  //
   388  // Preconditions: count > 0.
   389  func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) {
   390  	return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */)
   391  }
   392  
   393  // Tee reads up to count bytes from src and writes them to dst, without
   394  // removing the read bytes from src. It returns the number of bytes copied.
   395  //
   396  // Preconditions: count > 0.
   397  func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) {
   398  	return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */)
   399  }
   400  
   401  // Preconditions: count > 0.
   402  func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) {
   403  	if dst.pipe == src.pipe {
   404  		return 0, linuxerr.EINVAL
   405  	}
   406  
   407  	firstLocked, secondLocked := lockTwoPipes(dst.pipe, src.pipe)
   408  	n, err := dst.pipe.writeLocked(count, func(dsts safemem.BlockSeq) (uint64, error) {
   409  		n, err := src.pipe.peekLocked(int64(dsts.NumBytes()), func(srcs safemem.BlockSeq) (uint64, error) {
   410  			return safemem.CopySeq(dsts, srcs)
   411  		})
   412  		if n > 0 && removeFromSrc {
   413  			src.pipe.consumeLocked(n)
   414  		}
   415  		return uint64(n), err
   416  	})
   417  	secondLocked.mu.NestedUnlock(pipeLockPipe)
   418  	firstLocked.mu.Unlock()
   419  
   420  	if n > 0 {
   421  		dst.pipe.queue.Notify(waiter.ReadableEvents)
   422  		if removeFromSrc {
   423  			src.pipe.queue.Notify(waiter.WritableEvents)
   424  		}
   425  	}
   426  	return n, err
   427  }