github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/pipe/vfs.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipe
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/hostarch"
    22  	"github.com/SagerNet/gvisor/pkg/safemem"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    25  	"github.com/SagerNet/gvisor/pkg/sync"
    26  	"github.com/SagerNet/gvisor/pkg/syserror"
    27  	"github.com/SagerNet/gvisor/pkg/usermem"
    28  	"github.com/SagerNet/gvisor/pkg/waiter"
    29  )
    30  
    31  // This file contains types enabling the pipe package to be used with the vfs
    32  // package.
    33  
    34  // VFSPipe represents the actual pipe, analagous to an inode. VFSPipes should
    35  // not be copied.
    36  //
    37  // +stateify savable
    38  type VFSPipe struct {
    39  	// mu protects the fields below.
    40  	mu sync.Mutex `state:"nosave"`
    41  
    42  	// pipe is the underlying pipe.
    43  	pipe Pipe
    44  
    45  	// Channels for synchronizing the creation of new readers and writers
    46  	// of this fifo. See waitFor and newHandleLocked.
    47  	//
    48  	// These are not saved/restored because all waiters are unblocked on
    49  	// save, and either automatically restart (via ERESTARTSYS) or return
    50  	// EINTR on resume. On restarts via ERESTARTSYS, the appropriate
    51  	// channel will be recreated.
    52  	rWakeup chan struct{} `state:"nosave"`
    53  	wWakeup chan struct{} `state:"nosave"`
    54  }
    55  
    56  // NewVFSPipe returns an initialized VFSPipe.
    57  func NewVFSPipe(isNamed bool, sizeBytes int64) *VFSPipe {
    58  	var vp VFSPipe
    59  	initPipe(&vp.pipe, isNamed, sizeBytes)
    60  	return &vp
    61  }
    62  
    63  // ReaderWriterPair returns read-only and write-only FDs for vp.
    64  //
    65  // Preconditions: statusFlags should not contain an open access mode.
    66  func (vp *VFSPipe) ReaderWriterPair(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32) (*vfs.FileDescription, *vfs.FileDescription, error) {
    67  	// Connected pipes share the same locks.
    68  	locks := &vfs.FileLocks{}
    69  	r, err := vp.newFD(mnt, vfsd, linux.O_RDONLY|statusFlags, locks)
    70  	if err != nil {
    71  		return nil, nil, err
    72  	}
    73  	w, err := vp.newFD(mnt, vfsd, linux.O_WRONLY|statusFlags, locks)
    74  	if err != nil {
    75  		r.DecRef(ctx)
    76  		return nil, nil, err
    77  	}
    78  	return r, w, nil
    79  }
    80  
    81  // Allocate implements vfs.FileDescriptionImpl.Allocate.
    82  func (*VFSPipe) Allocate(context.Context, uint64, uint64, uint64) error {
    83  	return linuxerr.ESPIPE
    84  }
    85  
    86  // Open opens the pipe represented by vp.
    87  func (vp *VFSPipe) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
    88  	vp.mu.Lock()
    89  	defer vp.mu.Unlock()
    90  
    91  	readable := vfs.MayReadFileWithOpenFlags(statusFlags)
    92  	writable := vfs.MayWriteFileWithOpenFlags(statusFlags)
    93  	if !readable && !writable {
    94  		return nil, linuxerr.EINVAL
    95  	}
    96  
    97  	fd, err := vp.newFD(mnt, vfsd, statusFlags, locks)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	// Named pipes have special blocking semantics during open:
   103  	//
   104  	// "Normally, opening the FIFO blocks until the other end is opened also. A
   105  	// process can open a FIFO in nonblocking mode. In this case, opening for
   106  	// read-only will succeed even if no-one has opened on the write side yet,
   107  	// opening for write-only will fail with ENXIO (no such device or address)
   108  	// unless the other end has already been opened. Under Linux, opening a
   109  	// FIFO for read and write will succeed both in blocking and nonblocking
   110  	// mode. POSIX leaves this behavior undefined. This can be used to open a
   111  	// FIFO for writing while there are no readers available." - fifo(7)
   112  	switch {
   113  	case readable && writable:
   114  		// Pipes opened for read-write always succeed without blocking.
   115  		newHandleLocked(&vp.rWakeup)
   116  		newHandleLocked(&vp.wWakeup)
   117  
   118  	case readable:
   119  		newHandleLocked(&vp.rWakeup)
   120  		// If this pipe is being opened as blocking and there's no
   121  		// writer, we have to wait for a writer to open the other end.
   122  		if vp.pipe.isNamed && statusFlags&linux.O_NONBLOCK == 0 && !vp.pipe.HasWriters() && !waitFor(&vp.mu, &vp.wWakeup, ctx) {
   123  			fd.DecRef(ctx)
   124  			return nil, syserror.EINTR
   125  		}
   126  
   127  	case writable:
   128  		newHandleLocked(&vp.wWakeup)
   129  
   130  		if vp.pipe.isNamed && !vp.pipe.HasReaders() {
   131  			// Non-blocking, write-only opens fail with ENXIO when the read
   132  			// side isn't open yet.
   133  			if statusFlags&linux.O_NONBLOCK != 0 {
   134  				fd.DecRef(ctx)
   135  				return nil, linuxerr.ENXIO
   136  			}
   137  			// Wait for a reader to open the other end.
   138  			if !waitFor(&vp.mu, &vp.rWakeup, ctx) {
   139  				fd.DecRef(ctx)
   140  				return nil, syserror.EINTR
   141  			}
   142  		}
   143  
   144  	default:
   145  		panic("invalid pipe flags: must be readable, writable, or both")
   146  	}
   147  
   148  	return fd, nil
   149  }
   150  
   151  // Preconditions: vp.mu must be held.
   152  func (vp *VFSPipe) newFD(mnt *vfs.Mount, vfsd *vfs.Dentry, statusFlags uint32, locks *vfs.FileLocks) (*vfs.FileDescription, error) {
   153  	fd := &VFSPipeFD{
   154  		pipe: &vp.pipe,
   155  	}
   156  	fd.LockFD.Init(locks)
   157  	if err := fd.vfsfd.Init(fd, statusFlags, mnt, vfsd, &vfs.FileDescriptionOptions{
   158  		DenyPRead:         true,
   159  		DenyPWrite:        true,
   160  		UseDentryMetadata: true,
   161  	}); err != nil {
   162  		return nil, err
   163  	}
   164  
   165  	switch {
   166  	case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable():
   167  		vp.pipe.rOpen()
   168  		vp.pipe.wOpen()
   169  	case fd.vfsfd.IsReadable():
   170  		vp.pipe.rOpen()
   171  	case fd.vfsfd.IsWritable():
   172  		vp.pipe.wOpen()
   173  	default:
   174  		panic("invalid pipe flags: must be readable, writable, or both")
   175  	}
   176  
   177  	return &fd.vfsfd, nil
   178  }
   179  
   180  // VFSPipeFD implements vfs.FileDescriptionImpl for pipes. It also implements
   181  // non-atomic usermem.IO methods, allowing it to be passed as usermem.IO to
   182  // other FileDescriptions for splice(2) and tee(2).
   183  //
   184  // +stateify savable
   185  type VFSPipeFD struct {
   186  	vfsfd vfs.FileDescription
   187  	vfs.FileDescriptionDefaultImpl
   188  	vfs.DentryMetadataFileDescriptionImpl
   189  	vfs.LockFD
   190  
   191  	pipe *Pipe
   192  }
   193  
   194  // Release implements vfs.FileDescriptionImpl.Release.
   195  func (fd *VFSPipeFD) Release(context.Context) {
   196  	var event waiter.EventMask
   197  	if fd.vfsfd.IsReadable() {
   198  		fd.pipe.rClose()
   199  		event |= waiter.WritableEvents
   200  	}
   201  	if fd.vfsfd.IsWritable() {
   202  		fd.pipe.wClose()
   203  		event |= waiter.ReadableEvents | waiter.EventHUp
   204  	}
   205  	if event == 0 {
   206  		panic("invalid pipe flags: must be readable, writable, or both")
   207  	}
   208  
   209  	fd.pipe.Notify(event)
   210  }
   211  
   212  // Readiness implements waiter.Waitable.Readiness.
   213  func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask {
   214  	switch {
   215  	case fd.vfsfd.IsReadable() && fd.vfsfd.IsWritable():
   216  		return fd.pipe.rwReadiness()
   217  	case fd.vfsfd.IsReadable():
   218  		return fd.pipe.rReadiness()
   219  	case fd.vfsfd.IsWritable():
   220  		return fd.pipe.wReadiness()
   221  	default:
   222  		panic("pipe FD is neither readable nor writable")
   223  	}
   224  }
   225  
   226  // Allocate implements vfs.FileDescriptionImpl.Allocate.
   227  func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
   228  	return linuxerr.ESPIPE
   229  }
   230  
   231  // EventRegister implements waiter.Waitable.EventRegister.
   232  func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
   233  	fd.pipe.EventRegister(e, mask)
   234  }
   235  
   236  // EventUnregister implements waiter.Waitable.EventUnregister.
   237  func (fd *VFSPipeFD) EventUnregister(e *waiter.Entry) {
   238  	fd.pipe.EventUnregister(e)
   239  }
   240  
   241  // Read implements vfs.FileDescriptionImpl.Read.
   242  func (fd *VFSPipeFD) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
   243  	return fd.pipe.Read(ctx, dst)
   244  }
   245  
   246  // Write implements vfs.FileDescriptionImpl.Write.
   247  func (fd *VFSPipeFD) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
   248  	return fd.pipe.Write(ctx, src)
   249  }
   250  
   251  // Ioctl implements vfs.FileDescriptionImpl.Ioctl.
   252  func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
   253  	return fd.pipe.Ioctl(ctx, uio, args)
   254  }
   255  
   256  // PipeSize implements fcntl(F_GETPIPE_SZ).
   257  func (fd *VFSPipeFD) PipeSize() int64 {
   258  	// Inline Pipe.FifoSize() since we don't have a fs.File.
   259  	fd.pipe.mu.Lock()
   260  	defer fd.pipe.mu.Unlock()
   261  	return fd.pipe.max
   262  }
   263  
   264  // SetPipeSize implements fcntl(F_SETPIPE_SZ).
   265  func (fd *VFSPipeFD) SetPipeSize(size int64) (int64, error) {
   266  	return fd.pipe.SetFifoSize(size)
   267  }
   268  
   269  // SpliceToNonPipe performs a splice operation from fd to a non-pipe file.
   270  func (fd *VFSPipeFD) SpliceToNonPipe(ctx context.Context, out *vfs.FileDescription, off, count int64) (int64, error) {
   271  	fd.pipe.mu.Lock()
   272  
   273  	// Cap the sequence at number of bytes actually available.
   274  	if count > fd.pipe.size {
   275  		count = fd.pipe.size
   276  	}
   277  	src := usermem.IOSequence{
   278  		IO:    fd,
   279  		Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
   280  	}
   281  
   282  	var (
   283  		n   int64
   284  		err error
   285  	)
   286  	if off == -1 {
   287  		n, err = out.Write(ctx, src, vfs.WriteOptions{})
   288  	} else {
   289  		n, err = out.PWrite(ctx, src, off, vfs.WriteOptions{})
   290  	}
   291  	if n > 0 {
   292  		fd.pipe.consumeLocked(n)
   293  	}
   294  
   295  	fd.pipe.mu.Unlock()
   296  
   297  	if n > 0 {
   298  		fd.pipe.Notify(waiter.WritableEvents)
   299  	}
   300  	return n, err
   301  }
   302  
   303  // SpliceFromNonPipe performs a splice operation from a non-pipe file to fd.
   304  func (fd *VFSPipeFD) SpliceFromNonPipe(ctx context.Context, in *vfs.FileDescription, off, count int64) (int64, error) {
   305  	dst := usermem.IOSequence{
   306  		IO:    fd,
   307  		Addrs: hostarch.AddrRangeSeqOf(hostarch.AddrRange{0, hostarch.Addr(count)}),
   308  	}
   309  
   310  	var (
   311  		n   int64
   312  		err error
   313  	)
   314  	fd.pipe.mu.Lock()
   315  	if off == -1 {
   316  		n, err = in.Read(ctx, dst, vfs.ReadOptions{})
   317  	} else {
   318  		n, err = in.PRead(ctx, dst, off, vfs.ReadOptions{})
   319  	}
   320  	fd.pipe.mu.Unlock()
   321  
   322  	if n > 0 {
   323  		fd.pipe.Notify(waiter.ReadableEvents)
   324  	}
   325  	return n, err
   326  }
   327  
   328  // CopyIn implements usermem.IO.CopyIn. Note that it is the caller's
   329  // responsibility to call fd.pipe.consumeLocked() and
   330  // fd.pipe.Notify(waiter.WritableEvents) after the read is completed.
   331  //
   332  // Preconditions: fd.pipe.mu must be locked.
   333  func (fd *VFSPipeFD) CopyIn(ctx context.Context, addr hostarch.Addr, dst []byte, opts usermem.IOOpts) (int, error) {
   334  	n, err := fd.pipe.peekLocked(int64(len(dst)), func(srcs safemem.BlockSeq) (uint64, error) {
   335  		return safemem.CopySeq(safemem.BlockSeqOf(safemem.BlockFromSafeSlice(dst)), srcs)
   336  	})
   337  	return int(n), err
   338  }
   339  
   340  // CopyOut implements usermem.IO.CopyOut. Note that it is the caller's
   341  // responsibility to call fd.pipe.Notify(waiter.ReadableEvents) after the write
   342  // is completed.
   343  //
   344  // Preconditions: fd.pipe.mu must be locked.
   345  func (fd *VFSPipeFD) CopyOut(ctx context.Context, addr hostarch.Addr, src []byte, opts usermem.IOOpts) (int, error) {
   346  	n, err := fd.pipe.writeLocked(int64(len(src)), func(dsts safemem.BlockSeq) (uint64, error) {
   347  		return safemem.CopySeq(dsts, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(src)))
   348  	})
   349  	return int(n), err
   350  }
   351  
   352  // ZeroOut implements usermem.IO.ZeroOut.
   353  //
   354  // Preconditions: fd.pipe.mu must be locked.
   355  func (fd *VFSPipeFD) ZeroOut(ctx context.Context, addr hostarch.Addr, toZero int64, opts usermem.IOOpts) (int64, error) {
   356  	n, err := fd.pipe.writeLocked(toZero, func(dsts safemem.BlockSeq) (uint64, error) {
   357  		return safemem.ZeroSeq(dsts)
   358  	})
   359  	return n, err
   360  }
   361  
   362  // CopyInTo implements usermem.IO.CopyInTo. Note that it is the caller's
   363  // responsibility to call fd.pipe.consumeLocked() and
   364  // fd.pipe.Notify(waiter.WritableEvents) after the read is completed.
   365  //
   366  // Preconditions: fd.pipe.mu must be locked.
   367  func (fd *VFSPipeFD) CopyInTo(ctx context.Context, ars hostarch.AddrRangeSeq, dst safemem.Writer, opts usermem.IOOpts) (int64, error) {
   368  	return fd.pipe.peekLocked(ars.NumBytes(), func(srcs safemem.BlockSeq) (uint64, error) {
   369  		return dst.WriteFromBlocks(srcs)
   370  	})
   371  }
   372  
   373  // CopyOutFrom implements usermem.IO.CopyOutFrom. Note that it is the caller's
   374  // responsibility to call fd.pipe.Notify(waiter.ReadableEvents) after the write
   375  // is completed.
   376  //
   377  // Preconditions: fd.pipe.mu must be locked.
   378  func (fd *VFSPipeFD) CopyOutFrom(ctx context.Context, ars hostarch.AddrRangeSeq, src safemem.Reader, opts usermem.IOOpts) (int64, error) {
   379  	return fd.pipe.writeLocked(ars.NumBytes(), func(dsts safemem.BlockSeq) (uint64, error) {
   380  		return src.ReadToBlocks(dsts)
   381  	})
   382  }
   383  
   384  // SwapUint32 implements usermem.IO.SwapUint32.
   385  func (fd *VFSPipeFD) SwapUint32(ctx context.Context, addr hostarch.Addr, new uint32, opts usermem.IOOpts) (uint32, error) {
   386  	// How did a pipe get passed as the virtual address space to futex(2)?
   387  	panic("VFSPipeFD.SwapUint32 called unexpectedly")
   388  }
   389  
   390  // CompareAndSwapUint32 implements usermem.IO.CompareAndSwapUint32.
   391  func (fd *VFSPipeFD) CompareAndSwapUint32(ctx context.Context, addr hostarch.Addr, old, new uint32, opts usermem.IOOpts) (uint32, error) {
   392  	panic("VFSPipeFD.CompareAndSwapUint32 called unexpectedly")
   393  }
   394  
   395  // LoadUint32 implements usermem.IO.LoadUint32.
   396  func (fd *VFSPipeFD) LoadUint32(ctx context.Context, addr hostarch.Addr, opts usermem.IOOpts) (uint32, error) {
   397  	panic("VFSPipeFD.LoadUint32 called unexpectedly")
   398  }
   399  
   400  // Splice reads up to count bytes from src and writes them to dst. It returns
   401  // the number of bytes moved.
   402  //
   403  // Preconditions: count > 0.
   404  func Splice(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) {
   405  	return spliceOrTee(ctx, dst, src, count, true /* removeFromSrc */)
   406  }
   407  
   408  // Tee reads up to count bytes from src and writes them to dst, without
   409  // removing the read bytes from src. It returns the number of bytes copied.
   410  //
   411  // Preconditions: count > 0.
   412  func Tee(ctx context.Context, dst, src *VFSPipeFD, count int64) (int64, error) {
   413  	return spliceOrTee(ctx, dst, src, count, false /* removeFromSrc */)
   414  }
   415  
   416  // Preconditions: count > 0.
   417  func spliceOrTee(ctx context.Context, dst, src *VFSPipeFD, count int64, removeFromSrc bool) (int64, error) {
   418  	if dst.pipe == src.pipe {
   419  		return 0, linuxerr.EINVAL
   420  	}
   421  
   422  	lockTwoPipes(dst.pipe, src.pipe)
   423  	n, err := dst.pipe.writeLocked(count, func(dsts safemem.BlockSeq) (uint64, error) {
   424  		n, err := src.pipe.peekLocked(int64(dsts.NumBytes()), func(srcs safemem.BlockSeq) (uint64, error) {
   425  			return safemem.CopySeq(dsts, srcs)
   426  		})
   427  		if n > 0 && removeFromSrc {
   428  			src.pipe.consumeLocked(n)
   429  		}
   430  		return uint64(n), err
   431  	})
   432  	dst.pipe.mu.Unlock()
   433  	src.pipe.mu.Unlock()
   434  
   435  	if n > 0 {
   436  		dst.pipe.Notify(waiter.ReadableEvents)
   437  		if removeFromSrc {
   438  			src.pipe.Notify(waiter.WritableEvents)
   439  		}
   440  	}
   441  	return n, err
   442  }