github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/pipe/pipe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pipe provides a pipe implementation.
    16  package pipe
    17  
    18  import (
    19  	"fmt"
    20  	"io"
    21  	"sync/atomic"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/SagerNet/gvisor/pkg/context"
    25  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    26  	"github.com/SagerNet/gvisor/pkg/hostarch"
    27  	"github.com/SagerNet/gvisor/pkg/safemem"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    29  	"github.com/SagerNet/gvisor/pkg/sync"
    30  	"github.com/SagerNet/gvisor/pkg/syserror"
    31  	"github.com/SagerNet/gvisor/pkg/waiter"
    32  )
    33  
    34  const (
    35  	// MinimumPipeSize is a hard limit of the minimum size of a pipe.
    36  	// It corresponds to fs/pipe.c:pipe_min_size.
    37  	MinimumPipeSize = hostarch.PageSize
    38  
    39  	// MaximumPipeSize is a hard limit on the maximum size of a pipe.
    40  	// It corresponds to fs/pipe.c:pipe_max_size.
    41  	MaximumPipeSize = 1048576
    42  
    43  	// DefaultPipeSize is the system-wide default size of a pipe in bytes.
    44  	// It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS.
    45  	DefaultPipeSize = 16 * hostarch.PageSize
    46  
    47  	// atomicIOBytes is the maximum number of bytes that the pipe will
    48  	// guarantee atomic reads or writes atomically.
    49  	// It corresponds to limits.h:PIPE_BUF.
    50  	atomicIOBytes = 4096
    51  )
    52  
    53  // Pipe is an encapsulation of a platform-independent pipe.
    54  // It manages a buffered byte queue shared between a reader/writer
    55  // pair.
    56  //
    57  // +stateify savable
    58  type Pipe struct {
    59  	waiter.Queue `state:"nosave"`
    60  
    61  	// isNamed indicates whether this is a named pipe.
    62  	//
    63  	// This value is immutable.
    64  	isNamed bool
    65  
    66  	// The number of active readers for this pipe.
    67  	//
    68  	// Access atomically.
    69  	readers int32
    70  
    71  	// The number of active writes for this pipe.
    72  	//
    73  	// Access atomically.
    74  	writers int32
    75  
    76  	// mu protects all pipe internal state below.
    77  	mu sync.Mutex `state:"nosave"`
    78  
    79  	// buf holds the pipe's data. buf is a circular buffer; the first valid
    80  	// byte in buf is at offset off, and the pipe contains size valid bytes.
    81  	// bufBlocks contains two identical safemem.Blocks representing buf; this
    82  	// avoids needing to heap-allocate a new safemem.Block slice when buf is
    83  	// resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks.
    84  	//
    85  	// These fields are protected by mu.
    86  	buf         []byte
    87  	bufBlocks   [2]safemem.Block `state:"nosave"`
    88  	bufBlockSeq safemem.BlockSeq `state:"nosave"`
    89  	off         int64
    90  	size        int64
    91  
    92  	// max is the maximum size of the pipe in bytes. When this max has been
    93  	// reached, writers will get EWOULDBLOCK.
    94  	//
    95  	// This is protected by mu.
    96  	max int64
    97  
    98  	// hadWriter indicates if this pipe ever had a writer. Note that this
    99  	// does not necessarily indicate there is *currently* a writer, just
   100  	// that there has been a writer at some point since the pipe was
   101  	// created.
   102  	//
   103  	// This is protected by mu.
   104  	hadWriter bool
   105  }
   106  
   107  // NewPipe initializes and returns a pipe.
   108  //
   109  // N.B. The size will be bounded.
   110  func NewPipe(isNamed bool, sizeBytes int64) *Pipe {
   111  	var p Pipe
   112  	initPipe(&p, isNamed, sizeBytes)
   113  	return &p
   114  }
   115  
   116  func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) {
   117  	if sizeBytes < MinimumPipeSize {
   118  		sizeBytes = MinimumPipeSize
   119  	}
   120  	if sizeBytes > MaximumPipeSize {
   121  		sizeBytes = MaximumPipeSize
   122  	}
   123  	pipe.isNamed = isNamed
   124  	pipe.max = sizeBytes
   125  }
   126  
   127  // NewConnectedPipe initializes a pipe and returns a pair of objects
   128  // representing the read and write ends of the pipe.
   129  func NewConnectedPipe(ctx context.Context, sizeBytes int64) (*fs.File, *fs.File) {
   130  	p := NewPipe(false /* isNamed */, sizeBytes)
   131  
   132  	// Build an fs.Dirent for the pipe which will be shared by both
   133  	// returned files.
   134  	perms := fs.FilePermissions{
   135  		User: fs.PermMask{Read: true, Write: true},
   136  	}
   137  	iops := NewInodeOperations(ctx, perms, p)
   138  	ino := pipeDevice.NextIno()
   139  	sattr := fs.StableAttr{
   140  		Type:      fs.Pipe,
   141  		DeviceID:  pipeDevice.DeviceID(),
   142  		InodeID:   ino,
   143  		BlockSize: int64(atomicIOBytes),
   144  	}
   145  	ms := fs.NewPseudoMountSource(ctx)
   146  	d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, ms, sattr), fmt.Sprintf("pipe:[%d]", ino))
   147  	// The p.Open calls below will each take a reference on the Dirent. We
   148  	// must drop the one we already have.
   149  	defer d.DecRef(ctx)
   150  	return p.Open(ctx, d, fs.FileFlags{Read: true}), p.Open(ctx, d, fs.FileFlags{Write: true})
   151  }
   152  
   153  // Open opens the pipe and returns a new file.
   154  //
   155  // Precondition: at least one of flags.Read or flags.Write must be set.
   156  func (p *Pipe) Open(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) *fs.File {
   157  	flags.NonSeekable = true
   158  	switch {
   159  	case flags.Read && flags.Write:
   160  		p.rOpen()
   161  		p.wOpen()
   162  		return fs.NewFile(ctx, d, flags, &ReaderWriter{
   163  			Pipe: p,
   164  		})
   165  	case flags.Read:
   166  		p.rOpen()
   167  		return fs.NewFile(ctx, d, flags, &Reader{
   168  			ReaderWriter: ReaderWriter{Pipe: p},
   169  		})
   170  	case flags.Write:
   171  		p.wOpen()
   172  		return fs.NewFile(ctx, d, flags, &Writer{
   173  			ReaderWriter: ReaderWriter{Pipe: p},
   174  		})
   175  	default:
   176  		// Precondition violated.
   177  		panic("invalid pipe flags")
   178  	}
   179  }
   180  
   181  // peekLocked passes the first count bytes in the pipe to f and returns its
   182  // result. If fewer than count bytes are available, the safemem.BlockSeq passed
   183  // to f will be less than count bytes in length.
   184  //
   185  // peekLocked does not mutate the pipe; if the read consumes bytes from the
   186  // pipe, then the caller is responsible for calling p.consumeLocked() and
   187  // p.Notify(waiter.WritableEvents). (The latter must be called with p.mu unlocked.)
   188  //
   189  // Preconditions:
   190  // * p.mu must be locked.
   191  // * This pipe must have readers.
   192  func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   193  	// Don't block for a zero-length read even if the pipe is empty.
   194  	if count == 0 {
   195  		return 0, nil
   196  	}
   197  
   198  	// Limit the amount of data read to the amount of data in the pipe.
   199  	if count > p.size {
   200  		if p.size == 0 {
   201  			if !p.HasWriters() {
   202  				return 0, io.EOF
   203  			}
   204  			return 0, syserror.ErrWouldBlock
   205  		}
   206  		count = p.size
   207  	}
   208  
   209  	// Prepare the view of the data to be read.
   210  	bs := p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(count))
   211  
   212  	// Perform the read.
   213  	done, err := f(bs)
   214  	return int64(done), err
   215  }
   216  
   217  // consumeLocked consumes the first n bytes in the pipe, such that they will no
   218  // longer be visible to future reads.
   219  //
   220  // Preconditions:
   221  // * p.mu must be locked.
   222  // * The pipe must contain at least n bytes.
   223  func (p *Pipe) consumeLocked(n int64) {
   224  	p.off += n
   225  	if max := int64(len(p.buf)); p.off >= max {
   226  		p.off -= max
   227  	}
   228  	p.size -= n
   229  }
   230  
   231  // writeLocked passes a safemem.BlockSeq representing the first count bytes of
   232  // unused space in the pipe to f and returns the result. If fewer than count
   233  // bytes are free, the safemem.BlockSeq passed to f will be less than count
   234  // bytes in length. If the pipe is full or otherwise cannot accomodate a write
   235  // of any number of bytes up to count, writeLocked returns ErrWouldBlock
   236  // without calling f.
   237  //
   238  // Unlike peekLocked, writeLocked assumes that f returns the number of bytes
   239  // written to the pipe, and increases the number of bytes stored in the pipe
   240  // accordingly. Callers are still responsible for calling
   241  // p.Notify(waiter.ReadableEvents) with p.mu unlocked.
   242  //
   243  // Preconditions:
   244  // * p.mu must be locked.
   245  func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   246  	// Can't write to a pipe with no readers.
   247  	if !p.HasReaders() {
   248  		return 0, unix.EPIPE
   249  	}
   250  
   251  	avail := p.max - p.size
   252  	if avail == 0 {
   253  		return 0, syserror.ErrWouldBlock
   254  	}
   255  	short := false
   256  	if count > avail {
   257  		// POSIX requires that a write smaller than atomicIOBytes
   258  		// (PIPE_BUF) be atomic, but requires no atomicity for writes
   259  		// larger than this.
   260  		if count <= atomicIOBytes {
   261  			return 0, syserror.ErrWouldBlock
   262  		}
   263  		count = avail
   264  		short = true
   265  	}
   266  
   267  	// Ensure that the buffer is big enough.
   268  	if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap {
   269  		// Allocate a new buffer.
   270  		newCap := oldCap * 2
   271  		if oldCap == 0 {
   272  			newCap = 8 // arbitrary; sending individual integers across pipes is relatively common
   273  		}
   274  		for newLen > newCap {
   275  			newCap *= 2
   276  		}
   277  		if newCap > p.max {
   278  			newCap = p.max
   279  		}
   280  		newBuf := make([]byte, newCap)
   281  		// Copy the old buffer's contents to the beginning of the new one.
   282  		safemem.CopySeq(
   283  			safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)),
   284  			p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size)))
   285  		// Switch to the new buffer.
   286  		p.buf = newBuf
   287  		p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf)
   288  		p.bufBlocks[1] = p.bufBlocks[0]
   289  		p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:])
   290  		p.off = 0
   291  	}
   292  
   293  	// Prepare the view of the space to be written.
   294  	woff := p.off + p.size
   295  	if woff >= int64(len(p.buf)) {
   296  		woff -= int64(len(p.buf))
   297  	}
   298  	bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count))
   299  
   300  	// Perform the write.
   301  	doneU64, err := f(bs)
   302  	done := int64(doneU64)
   303  	p.size += done
   304  	if done < count || err != nil {
   305  		return done, err
   306  	}
   307  
   308  	// If we shortened the write, adjust the returned error appropriately.
   309  	if short {
   310  		return done, syserror.ErrWouldBlock
   311  	}
   312  
   313  	return done, nil
   314  }
   315  
   316  // rOpen signals a new reader of the pipe.
   317  func (p *Pipe) rOpen() {
   318  	atomic.AddInt32(&p.readers, 1)
   319  }
   320  
   321  // wOpen signals a new writer of the pipe.
   322  func (p *Pipe) wOpen() {
   323  	p.mu.Lock()
   324  	defer p.mu.Unlock()
   325  	p.hadWriter = true
   326  	atomic.AddInt32(&p.writers, 1)
   327  }
   328  
   329  // rClose signals that a reader has closed their end of the pipe.
   330  func (p *Pipe) rClose() {
   331  	newReaders := atomic.AddInt32(&p.readers, -1)
   332  	if newReaders < 0 {
   333  		panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders))
   334  	}
   335  }
   336  
   337  // wClose signals that a writer has closed their end of the pipe.
   338  func (p *Pipe) wClose() {
   339  	newWriters := atomic.AddInt32(&p.writers, -1)
   340  	if newWriters < 0 {
   341  		panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters))
   342  	}
   343  }
   344  
   345  // HasReaders returns whether the pipe has any active readers.
   346  func (p *Pipe) HasReaders() bool {
   347  	return atomic.LoadInt32(&p.readers) > 0
   348  }
   349  
   350  // HasWriters returns whether the pipe has any active writers.
   351  func (p *Pipe) HasWriters() bool {
   352  	return atomic.LoadInt32(&p.writers) > 0
   353  }
   354  
   355  // rReadinessLocked calculates the read readiness.
   356  //
   357  // Precondition: mu must be held.
   358  func (p *Pipe) rReadinessLocked() waiter.EventMask {
   359  	ready := waiter.EventMask(0)
   360  	if p.HasReaders() && p.size != 0 {
   361  		ready |= waiter.ReadableEvents
   362  	}
   363  	if !p.HasWriters() && p.hadWriter {
   364  		// POLLHUP must be suppressed until the pipe has had at least one writer
   365  		// at some point. Otherwise a reader thread may poll and immediately get
   366  		// a POLLHUP before the writer ever opens the pipe, which the reader may
   367  		// interpret as the writer opening then closing the pipe.
   368  		ready |= waiter.EventHUp
   369  	}
   370  	return ready
   371  }
   372  
   373  // rReadiness returns a mask that states whether the read end of the pipe is
   374  // ready for reading.
   375  func (p *Pipe) rReadiness() waiter.EventMask {
   376  	p.mu.Lock()
   377  	defer p.mu.Unlock()
   378  	return p.rReadinessLocked()
   379  }
   380  
   381  // wReadinessLocked calculates the write readiness.
   382  //
   383  // Precondition: mu must be held.
   384  func (p *Pipe) wReadinessLocked() waiter.EventMask {
   385  	ready := waiter.EventMask(0)
   386  	if p.HasWriters() && p.size < p.max {
   387  		ready |= waiter.WritableEvents
   388  	}
   389  	if !p.HasReaders() {
   390  		ready |= waiter.EventErr
   391  	}
   392  	return ready
   393  }
   394  
   395  // wReadiness returns a mask that states whether the write end of the pipe
   396  // is ready for writing.
   397  func (p *Pipe) wReadiness() waiter.EventMask {
   398  	p.mu.Lock()
   399  	defer p.mu.Unlock()
   400  	return p.wReadinessLocked()
   401  }
   402  
   403  // rwReadiness returns a mask that states whether a read-write handle to the
   404  // pipe is ready for IO.
   405  func (p *Pipe) rwReadiness() waiter.EventMask {
   406  	p.mu.Lock()
   407  	defer p.mu.Unlock()
   408  	return p.rReadinessLocked() | p.wReadinessLocked()
   409  }
   410  
   411  // queued returns the amount of queued data.
   412  func (p *Pipe) queued() int64 {
   413  	p.mu.Lock()
   414  	defer p.mu.Unlock()
   415  	return p.queuedLocked()
   416  }
   417  
   418  func (p *Pipe) queuedLocked() int64 {
   419  	return p.size
   420  }
   421  
   422  // FifoSize implements fs.FifoSizer.FifoSize.
   423  func (p *Pipe) FifoSize(context.Context, *fs.File) (int64, error) {
   424  	p.mu.Lock()
   425  	defer p.mu.Unlock()
   426  	return p.max, nil
   427  }
   428  
   429  // SetFifoSize implements fs.FifoSizer.SetFifoSize.
   430  func (p *Pipe) SetFifoSize(size int64) (int64, error) {
   431  	if size < 0 {
   432  		return 0, linuxerr.EINVAL
   433  	}
   434  	if size < MinimumPipeSize {
   435  		size = MinimumPipeSize // Per spec.
   436  	}
   437  	if size > MaximumPipeSize {
   438  		return 0, linuxerr.EPERM
   439  	}
   440  	p.mu.Lock()
   441  	defer p.mu.Unlock()
   442  	if size < p.size {
   443  		return 0, linuxerr.EBUSY
   444  	}
   445  	p.max = size
   446  	return size, nil
   447  }