github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/pipe/pipe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pipe provides a pipe implementation.
    16  package pipe
    17  
    18  import (
    19  	"fmt"
    20  	"io"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/metacubex/gvisor/pkg/atomicbitops"
    24  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    25  	"github.com/metacubex/gvisor/pkg/hostarch"
    26  	"github.com/metacubex/gvisor/pkg/safemem"
    27  	"github.com/metacubex/gvisor/pkg/waiter"
    28  )
    29  
    30  const (
    31  	// MinimumPipeSize is a hard limit of the minimum size of a pipe.
    32  	// It corresponds to fs/pipe.c:pipe_min_size.
    33  	MinimumPipeSize = hostarch.PageSize
    34  
    35  	// MaximumPipeSize is a hard limit on the maximum size of a pipe.
    36  	// It corresponds to fs/pipe.c:pipe_max_size.
    37  	MaximumPipeSize = 1048576
    38  
    39  	// DefaultPipeSize is the system-wide default size of a pipe in bytes.
    40  	// It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS.
    41  	DefaultPipeSize = 16 * hostarch.PageSize
    42  
    43  	// atomicIOBytes is the maximum number of bytes that the pipe will
    44  	// guarantee atomic reads or writes atomically.
    45  	// It corresponds to limits.h:PIPE_BUF.
    46  	atomicIOBytes = 4096
    47  )
    48  
    49  // waitReaders is a wrapper around Pipe.
    50  //
    51  // This is used for ctx.Block operations that require the synchronization of
    52  // readers and writers, along with the careful grabbing and releasing of locks.
    53  type waitReaders Pipe
    54  
    55  // Readiness implements waiter.Waitable.Readiness.
    56  func (wq *waitReaders) Readiness(mask waiter.EventMask) waiter.EventMask {
    57  	return ((*Pipe)(wq)).rwReadiness() & mask
    58  }
    59  
    60  // EventRegister implements waiter.Waitable.EventRegister.
    61  func (wq *waitReaders) EventRegister(e *waiter.Entry) error {
    62  	((*Pipe)(wq)).queue.EventRegister(e)
    63  
    64  	// Notify synchronously.
    65  	if ((*Pipe)(wq)).HasReaders() {
    66  		e.NotifyEvent(waiter.EventInternal)
    67  	}
    68  
    69  	return nil
    70  }
    71  
    72  // EventUnregister implements waiter.Waitable.EventUnregister.
    73  func (wq *waitReaders) EventUnregister(e *waiter.Entry) {
    74  	((*Pipe)(wq)).queue.EventUnregister(e)
    75  }
    76  
    77  // waitWriters is a wrapper around Pipe.
    78  //
    79  // This is used for ctx.Block operations that require the synchronization of
    80  // readers and writers, along with the careful grabbing and releasing of locks.
    81  type waitWriters Pipe
    82  
    83  // Readiness implements waiter.Waitable.Readiness.
    84  func (wq *waitWriters) Readiness(mask waiter.EventMask) waiter.EventMask {
    85  	return ((*Pipe)(wq)).rwReadiness() & mask
    86  }
    87  
    88  // EventRegister implements waiter.Waitable.EventRegister.
    89  func (wq *waitWriters) EventRegister(e *waiter.Entry) error {
    90  	((*Pipe)(wq)).queue.EventRegister(e)
    91  
    92  	// Notify synchronously.
    93  	if ((*Pipe)(wq)).HasWriters() {
    94  		e.NotifyEvent(waiter.EventInternal)
    95  	}
    96  
    97  	return nil
    98  }
    99  
   100  // EventUnregister implements waiter.Waitable.EventUnregister.
   101  func (wq *waitWriters) EventUnregister(e *waiter.Entry) {
   102  	((*Pipe)(wq)).queue.EventUnregister(e)
   103  }
   104  
   105  // Pipe is an encapsulation of a platform-independent pipe.
   106  // It manages a buffered byte queue shared between a reader/writer
   107  // pair.
   108  //
   109  // +stateify savable
   110  type Pipe struct {
   111  	// queue is the waiter queue.
   112  	queue waiter.Queue
   113  
   114  	// isNamed indicates whether this is a named pipe.
   115  	//
   116  	// This value is immutable.
   117  	isNamed bool
   118  
   119  	// The number of active readers for this pipe.
   120  	readers atomicbitops.Int32
   121  
   122  	// The total number of readers for this pipe.
   123  	totalReaders atomicbitops.Int32
   124  
   125  	// The number of active writers for this pipe.
   126  	writers atomicbitops.Int32
   127  
   128  	// The total number of writers for this pipe.
   129  	totalWriters atomicbitops.Int32
   130  
   131  	// mu protects all pipe internal state below.
   132  	mu pipeMutex `state:"nosave"`
   133  
   134  	// buf holds the pipe's data. buf is a circular buffer; the first valid
   135  	// byte in buf is at offset off, and the pipe contains size valid bytes.
   136  	// bufBlocks contains two identical safemem.Blocks representing buf; this
   137  	// avoids needing to heap-allocate a new safemem.Block slice when buf is
   138  	// resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks.
   139  	//
   140  	// These fields are protected by mu.
   141  	buf         []byte
   142  	bufBlocks   [2]safemem.Block `state:"nosave"`
   143  	bufBlockSeq safemem.BlockSeq `state:"nosave"`
   144  	off         int64
   145  	size        int64
   146  
   147  	// max is the maximum size of the pipe in bytes. When this max has been
   148  	// reached, writers will get EWOULDBLOCK.
   149  	//
   150  	// This is protected by mu.
   151  	max int64
   152  
   153  	// hadWriter indicates if this pipe ever had a writer. Note that this
   154  	// does not necessarily indicate there is *currently* a writer, just
   155  	// that there has been a writer at some point since the pipe was
   156  	// created.
   157  	//
   158  	// This is protected by mu.
   159  	hadWriter bool
   160  }
   161  
   162  // NewPipe initializes and returns a pipe.
   163  //
   164  // N.B. The size will be bounded.
   165  func NewPipe(isNamed bool, sizeBytes int64) *Pipe {
   166  	var p Pipe
   167  	initPipe(&p, isNamed, sizeBytes)
   168  	return &p
   169  }
   170  
   171  func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) {
   172  	if sizeBytes < MinimumPipeSize {
   173  		sizeBytes = MinimumPipeSize
   174  	}
   175  	if sizeBytes > MaximumPipeSize {
   176  		sizeBytes = MaximumPipeSize
   177  	}
   178  	pipe.isNamed = isNamed
   179  	pipe.max = sizeBytes
   180  }
   181  
   182  // peekLocked passes the first count bytes in the pipe, starting at offset off,
   183  // to f and returns its result. If fewer than count bytes are available, the
   184  // safemem.BlockSeq passed to f will be less than count bytes in length.
   185  //
   186  // peekLocked does not mutate the pipe; if the read consumes bytes from the
   187  // pipe, then the caller is responsible for calling p.consumeLocked() and
   188  // p.queue.Notify(waiter.WritableEvents). (The latter must be called with p.mu
   189  // unlocked.)
   190  //
   191  // Preconditions:
   192  //   - p.mu must be locked.
   193  //   - This pipe must have readers.
   194  //   - off <= p.size.
   195  func (p *Pipe) peekLocked(off, count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   196  	// Don't block for a zero-length read even if the pipe is empty.
   197  	if count == 0 {
   198  		return 0, nil
   199  	}
   200  
   201  	// Limit the amount of data read to the amount of data in the pipe.
   202  	if rem := p.size - off; count > rem {
   203  		if rem == 0 {
   204  			if !p.HasWriters() {
   205  				return 0, io.EOF
   206  			}
   207  			return 0, linuxerr.ErrWouldBlock
   208  		}
   209  		count = rem
   210  	}
   211  
   212  	// Prepare the view of the data to be read.
   213  	pipeOff := p.off + off
   214  	if max := int64(len(p.buf)); pipeOff >= max {
   215  		pipeOff -= max
   216  	}
   217  	bs := p.bufBlockSeq.DropFirst64(uint64(pipeOff)).TakeFirst64(uint64(count))
   218  
   219  	// Perform the read.
   220  	done, err := f(bs)
   221  	return int64(done), err
   222  }
   223  
   224  // consumeLocked consumes the first n bytes in the pipe, such that they will no
   225  // longer be visible to future reads.
   226  //
   227  // Preconditions:
   228  //   - p.mu must be locked.
   229  //   - The pipe must contain at least n bytes.
   230  func (p *Pipe) consumeLocked(n int64) {
   231  	p.off += n
   232  	if max := int64(len(p.buf)); p.off >= max {
   233  		p.off -= max
   234  	}
   235  	p.size -= n
   236  }
   237  
   238  // writeLocked passes a safemem.BlockSeq representing the first count bytes of
   239  // unused space in the pipe to f and returns the result. If fewer than count
   240  // bytes are free, the safemem.BlockSeq passed to f will be less than count
   241  // bytes in length. If the pipe is full or otherwise cannot accommodate a write
   242  // of any number of bytes up to count, writeLocked returns ErrWouldBlock
   243  // without calling f.
   244  //
   245  // Unlike peekLocked, writeLocked assumes that f returns the number of bytes
   246  // written to the pipe, and increases the number of bytes stored in the pipe
   247  // accordingly. Callers are still responsible for calling
   248  // p.queue.Notify(waiter.ReadableEvents) with p.mu unlocked.
   249  //
   250  // Preconditions:
   251  //   - p.mu must be locked.
   252  func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   253  	// Can't write to a pipe with no readers.
   254  	if !p.HasReaders() {
   255  		return 0, unix.EPIPE
   256  	}
   257  
   258  	avail := p.max - p.size
   259  	if avail == 0 {
   260  		return 0, linuxerr.ErrWouldBlock
   261  	}
   262  	short := false
   263  	if count > avail {
   264  		// POSIX requires that a write smaller than atomicIOBytes
   265  		// (PIPE_BUF) be atomic, but requires no atomicity for writes
   266  		// larger than this.
   267  		if count <= atomicIOBytes {
   268  			return 0, linuxerr.ErrWouldBlock
   269  		}
   270  		count = avail
   271  		short = true
   272  	}
   273  
   274  	// Ensure that the buffer is big enough.
   275  	if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap {
   276  		// Allocate a new buffer.
   277  		newCap := oldCap * 2
   278  		if oldCap == 0 {
   279  			newCap = 8 // arbitrary; sending individual integers across pipes is relatively common
   280  		}
   281  		for newLen > newCap {
   282  			newCap *= 2
   283  		}
   284  		if newCap > p.max {
   285  			newCap = p.max
   286  		}
   287  		newBuf := make([]byte, newCap)
   288  		// Copy the old buffer's contents to the beginning of the new one.
   289  		safemem.CopySeq(
   290  			safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)),
   291  			p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size)))
   292  		// Switch to the new buffer.
   293  		p.buf = newBuf
   294  		p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf)
   295  		p.bufBlocks[1] = p.bufBlocks[0]
   296  		p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:])
   297  		p.off = 0
   298  	}
   299  
   300  	// Prepare the view of the space to be written.
   301  	woff := p.off + p.size
   302  	if woff >= int64(len(p.buf)) {
   303  		woff -= int64(len(p.buf))
   304  	}
   305  	bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count))
   306  
   307  	// Perform the write.
   308  	doneU64, err := f(bs)
   309  	done := int64(doneU64)
   310  	p.size += done
   311  	if done < count || err != nil {
   312  		return done, err
   313  	}
   314  
   315  	// If we shortened the write, adjust the returned error appropriately.
   316  	if short {
   317  		return done, linuxerr.ErrWouldBlock
   318  	}
   319  
   320  	return done, nil
   321  }
   322  
   323  // rOpen signals a new reader of the pipe.
   324  func (p *Pipe) rOpen() {
   325  	p.readers.Add(1)
   326  	p.totalReaders.Add(1)
   327  
   328  	// Notify for blocking openers.
   329  	p.queue.Notify(waiter.EventInternal)
   330  }
   331  
   332  // wOpen signals a new writer of the pipe.
   333  func (p *Pipe) wOpen() {
   334  	p.mu.Lock()
   335  	p.hadWriter = true
   336  	p.writers.Add(1)
   337  	p.totalWriters.Add(1)
   338  	p.mu.Unlock()
   339  
   340  	// Notify for blocking openers.
   341  	p.queue.Notify(waiter.EventInternal)
   342  }
   343  
   344  // rClose signals that a reader has closed their end of the pipe.
   345  func (p *Pipe) rClose() {
   346  	if newReaders := p.readers.Add(-1); newReaders < 0 {
   347  		panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders))
   348  	}
   349  }
   350  
   351  // wClose signals that a writer has closed their end of the pipe.
   352  func (p *Pipe) wClose() {
   353  	if newWriters := p.writers.Add(-1); newWriters < 0 {
   354  		panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters))
   355  	}
   356  }
   357  
   358  // HasReaders returns whether the pipe has any active readers.
   359  func (p *Pipe) HasReaders() bool {
   360  	return p.readers.Load() > 0
   361  }
   362  
   363  // HasWriters returns whether the pipe has any active writers.
   364  func (p *Pipe) HasWriters() bool {
   365  	return p.writers.Load() > 0
   366  }
   367  
   368  // rReadinessLocked calculates the read readiness.
   369  //
   370  // Precondition: mu must be held.
   371  func (p *Pipe) rReadinessLocked() waiter.EventMask {
   372  	ready := waiter.EventMask(0)
   373  	if p.HasReaders() && p.size != 0 {
   374  		ready |= waiter.ReadableEvents
   375  	}
   376  	if !p.HasWriters() && p.hadWriter {
   377  		// POLLHUP must be suppressed until the pipe has had at least one writer
   378  		// at some point. Otherwise a reader thread may poll and immediately get
   379  		// a POLLHUP before the writer ever opens the pipe, which the reader may
   380  		// interpret as the writer opening then closing the pipe.
   381  		ready |= waiter.EventHUp
   382  	}
   383  	return ready
   384  }
   385  
   386  // rReadiness returns a mask that states whether the read end of the pipe is
   387  // ready for reading.
   388  func (p *Pipe) rReadiness() waiter.EventMask {
   389  	p.mu.Lock()
   390  	defer p.mu.Unlock()
   391  	return p.rReadinessLocked()
   392  }
   393  
   394  // wReadinessLocked calculates the write readiness.
   395  //
   396  // Precondition: mu must be held.
   397  func (p *Pipe) wReadinessLocked() waiter.EventMask {
   398  	ready := waiter.EventMask(0)
   399  	if p.HasWriters() && p.size < p.max {
   400  		ready |= waiter.WritableEvents
   401  	}
   402  	if !p.HasReaders() {
   403  		ready |= waiter.EventErr
   404  	}
   405  	return ready
   406  }
   407  
   408  // wReadiness returns a mask that states whether the write end of the pipe
   409  // is ready for writing.
   410  func (p *Pipe) wReadiness() waiter.EventMask {
   411  	p.mu.Lock()
   412  	defer p.mu.Unlock()
   413  	return p.wReadinessLocked()
   414  }
   415  
   416  // rwReadiness returns a mask that states whether a read-write handle to the
   417  // pipe is ready for IO.
   418  func (p *Pipe) rwReadiness() waiter.EventMask {
   419  	p.mu.Lock()
   420  	defer p.mu.Unlock()
   421  	return p.rReadinessLocked() | p.wReadinessLocked()
   422  }
   423  
   424  // EventRegister implements waiter.Waitable.EventRegister.
   425  func (p *Pipe) EventRegister(e *waiter.Entry) error {
   426  	p.queue.EventRegister(e)
   427  	return nil
   428  }
   429  
   430  // EventUnregister implements waiter.Waitable.EventUnregister.
   431  func (p *Pipe) EventUnregister(e *waiter.Entry) {
   432  	p.queue.EventUnregister(e)
   433  }
   434  
   435  // queued returns the amount of queued data.
   436  func (p *Pipe) queued() int64 {
   437  	p.mu.Lock()
   438  	defer p.mu.Unlock()
   439  	return p.queuedLocked()
   440  }
   441  
   442  func (p *Pipe) queuedLocked() int64 {
   443  	return p.size
   444  }
   445  
   446  // SetFifoSize implements fs.FifoSizer.SetFifoSize.
   447  func (p *Pipe) SetFifoSize(size int64) (int64, error) {
   448  	if size < 0 {
   449  		return 0, linuxerr.EINVAL
   450  	}
   451  	if size < MinimumPipeSize {
   452  		size = MinimumPipeSize // Per spec.
   453  	}
   454  	if size > MaximumPipeSize {
   455  		return 0, linuxerr.EPERM
   456  	}
   457  	p.mu.Lock()
   458  	defer p.mu.Unlock()
   459  	if size < p.size {
   460  		return 0, linuxerr.EBUSY
   461  	}
   462  	p.max = size
   463  	return size, nil
   464  }