github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/kernel/pipe/pipe.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pipe provides a pipe implementation.
    16  package pipe
    17  
    18  import (
    19  	"fmt"
    20  	"io"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/nicocha30/gvisor-ligolo/pkg/atomicbitops"
    24  	"github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr"
    25  	"github.com/nicocha30/gvisor-ligolo/pkg/hostarch"
    26  	"github.com/nicocha30/gvisor-ligolo/pkg/safemem"
    27  	"github.com/nicocha30/gvisor-ligolo/pkg/waiter"
    28  )
    29  
    30  const (
    31  	// MinimumPipeSize is a hard limit of the minimum size of a pipe.
    32  	// It corresponds to fs/pipe.c:pipe_min_size.
    33  	MinimumPipeSize = hostarch.PageSize
    34  
    35  	// MaximumPipeSize is a hard limit on the maximum size of a pipe.
    36  	// It corresponds to fs/pipe.c:pipe_max_size.
    37  	MaximumPipeSize = 1048576
    38  
    39  	// DefaultPipeSize is the system-wide default size of a pipe in bytes.
    40  	// It corresponds to pipe_fs_i.h:PIPE_DEF_BUFFERS.
    41  	DefaultPipeSize = 16 * hostarch.PageSize
    42  
    43  	// atomicIOBytes is the maximum number of bytes that the pipe will
    44  	// guarantee atomic reads or writes atomically.
    45  	// It corresponds to limits.h:PIPE_BUF.
    46  	atomicIOBytes = 4096
    47  )
    48  
    49  // waitReaders is a wrapper around Pipe.
    50  //
    51  // This is used for ctx.Block operations that require the synchronization of
    52  // readers and writers, along with the careful grabbing and releasing of locks.
    53  type waitReaders Pipe
    54  
    55  // Readiness implements waiter.Waitable.Readiness.
    56  func (wq *waitReaders) Readiness(mask waiter.EventMask) waiter.EventMask {
    57  	return ((*Pipe)(wq)).rwReadiness() & mask
    58  }
    59  
    60  // EventRegister implements waiter.Waitable.EventRegister.
    61  func (wq *waitReaders) EventRegister(e *waiter.Entry) error {
    62  	((*Pipe)(wq)).queue.EventRegister(e)
    63  
    64  	// Notify synchronously.
    65  	if ((*Pipe)(wq)).HasReaders() {
    66  		e.NotifyEvent(waiter.EventInternal)
    67  	}
    68  
    69  	return nil
    70  }
    71  
    72  // EventUnregister implements waiter.Waitable.EventUnregister.
    73  func (wq *waitReaders) EventUnregister(e *waiter.Entry) {
    74  	((*Pipe)(wq)).queue.EventUnregister(e)
    75  }
    76  
    77  // waitWriters is a wrapper around Pipe.
    78  //
    79  // This is used for ctx.Block operations that require the synchronization of
    80  // readers and writers, along with the careful grabbing and releasing of locks.
    81  type waitWriters Pipe
    82  
    83  // Readiness implements waiter.Waitable.Readiness.
    84  func (wq *waitWriters) Readiness(mask waiter.EventMask) waiter.EventMask {
    85  	return ((*Pipe)(wq)).rwReadiness() & mask
    86  }
    87  
    88  // EventRegister implements waiter.Waitable.EventRegister.
    89  func (wq *waitWriters) EventRegister(e *waiter.Entry) error {
    90  	((*Pipe)(wq)).queue.EventRegister(e)
    91  
    92  	// Notify synchronously.
    93  	if ((*Pipe)(wq)).HasWriters() {
    94  		e.NotifyEvent(waiter.EventInternal)
    95  	}
    96  
    97  	return nil
    98  }
    99  
   100  // EventUnregister implements waiter.Waitable.EventUnregister.
   101  func (wq *waitWriters) EventUnregister(e *waiter.Entry) {
   102  	((*Pipe)(wq)).queue.EventUnregister(e)
   103  }
   104  
   105  // Pipe is an encapsulation of a platform-independent pipe.
   106  // It manages a buffered byte queue shared between a reader/writer
   107  // pair.
   108  //
   109  // +stateify savable
   110  type Pipe struct {
   111  	// queue is the waiter queue.
   112  	queue waiter.Queue
   113  
   114  	// isNamed indicates whether this is a named pipe.
   115  	//
   116  	// This value is immutable.
   117  	isNamed bool
   118  
   119  	// The number of active readers for this pipe.
   120  	readers atomicbitops.Int32
   121  
   122  	// The total number of readers for this pipe.
   123  	totalReaders atomicbitops.Int32
   124  
   125  	// The number of active writers for this pipe.
   126  	writers atomicbitops.Int32
   127  
   128  	// The total number of writers for this pipe.
   129  	totalWriters atomicbitops.Int32
   130  
   131  	// mu protects all pipe internal state below.
   132  	mu pipeMutex `state:"nosave"`
   133  
   134  	// buf holds the pipe's data. buf is a circular buffer; the first valid
   135  	// byte in buf is at offset off, and the pipe contains size valid bytes.
   136  	// bufBlocks contains two identical safemem.Blocks representing buf; this
   137  	// avoids needing to heap-allocate a new safemem.Block slice when buf is
   138  	// resized. bufBlockSeq is a safemem.BlockSeq representing bufBlocks.
   139  	//
   140  	// These fields are protected by mu.
   141  	buf         []byte
   142  	bufBlocks   [2]safemem.Block `state:"nosave"`
   143  	bufBlockSeq safemem.BlockSeq `state:"nosave"`
   144  	off         int64
   145  	size        int64
   146  
   147  	// max is the maximum size of the pipe in bytes. When this max has been
   148  	// reached, writers will get EWOULDBLOCK.
   149  	//
   150  	// This is protected by mu.
   151  	max int64
   152  
   153  	// hadWriter indicates if this pipe ever had a writer. Note that this
   154  	// does not necessarily indicate there is *currently* a writer, just
   155  	// that there has been a writer at some point since the pipe was
   156  	// created.
   157  	//
   158  	// This is protected by mu.
   159  	hadWriter bool
   160  }
   161  
   162  // NewPipe initializes and returns a pipe.
   163  //
   164  // N.B. The size will be bounded.
   165  func NewPipe(isNamed bool, sizeBytes int64) *Pipe {
   166  	var p Pipe
   167  	initPipe(&p, isNamed, sizeBytes)
   168  	return &p
   169  }
   170  
   171  func initPipe(pipe *Pipe, isNamed bool, sizeBytes int64) {
   172  	if sizeBytes < MinimumPipeSize {
   173  		sizeBytes = MinimumPipeSize
   174  	}
   175  	if sizeBytes > MaximumPipeSize {
   176  		sizeBytes = MaximumPipeSize
   177  	}
   178  	pipe.isNamed = isNamed
   179  	pipe.max = sizeBytes
   180  }
   181  
   182  // peekLocked passes the first count bytes in the pipe to f and returns its
   183  // result. If fewer than count bytes are available, the safemem.BlockSeq passed
   184  // to f will be less than count bytes in length.
   185  //
   186  // peekLocked does not mutate the pipe; if the read consumes bytes from the
   187  // pipe, then the caller is responsible for calling p.consumeLocked() and
   188  // p.queue.Notify(waiter.WritableEvents). (The latter must be called with p.mu
   189  // unlocked.)
   190  //
   191  // Preconditions:
   192  //   - p.mu must be locked.
   193  //   - This pipe must have readers.
   194  func (p *Pipe) peekLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   195  	// Don't block for a zero-length read even if the pipe is empty.
   196  	if count == 0 {
   197  		return 0, nil
   198  	}
   199  
   200  	// Limit the amount of data read to the amount of data in the pipe.
   201  	if count > p.size {
   202  		if p.size == 0 {
   203  			if !p.HasWriters() {
   204  				return 0, io.EOF
   205  			}
   206  			return 0, linuxerr.ErrWouldBlock
   207  		}
   208  		count = p.size
   209  	}
   210  
   211  	// Prepare the view of the data to be read.
   212  	bs := p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(count))
   213  
   214  	// Perform the read.
   215  	done, err := f(bs)
   216  	return int64(done), err
   217  }
   218  
   219  // consumeLocked consumes the first n bytes in the pipe, such that they will no
   220  // longer be visible to future reads.
   221  //
   222  // Preconditions:
   223  //   - p.mu must be locked.
   224  //   - The pipe must contain at least n bytes.
   225  func (p *Pipe) consumeLocked(n int64) {
   226  	p.off += n
   227  	if max := int64(len(p.buf)); p.off >= max {
   228  		p.off -= max
   229  	}
   230  	p.size -= n
   231  }
   232  
   233  // writeLocked passes a safemem.BlockSeq representing the first count bytes of
   234  // unused space in the pipe to f and returns the result. If fewer than count
   235  // bytes are free, the safemem.BlockSeq passed to f will be less than count
   236  // bytes in length. If the pipe is full or otherwise cannot accomodate a write
   237  // of any number of bytes up to count, writeLocked returns ErrWouldBlock
   238  // without calling f.
   239  //
   240  // Unlike peekLocked, writeLocked assumes that f returns the number of bytes
   241  // written to the pipe, and increases the number of bytes stored in the pipe
   242  // accordingly. Callers are still responsible for calling
   243  // p.queue.Notify(waiter.ReadableEvents) with p.mu unlocked.
   244  //
   245  // Preconditions:
   246  //   - p.mu must be locked.
   247  func (p *Pipe) writeLocked(count int64, f func(safemem.BlockSeq) (uint64, error)) (int64, error) {
   248  	// Can't write to a pipe with no readers.
   249  	if !p.HasReaders() {
   250  		return 0, unix.EPIPE
   251  	}
   252  
   253  	avail := p.max - p.size
   254  	if avail == 0 {
   255  		return 0, linuxerr.ErrWouldBlock
   256  	}
   257  	short := false
   258  	if count > avail {
   259  		// POSIX requires that a write smaller than atomicIOBytes
   260  		// (PIPE_BUF) be atomic, but requires no atomicity for writes
   261  		// larger than this.
   262  		if count <= atomicIOBytes {
   263  			return 0, linuxerr.ErrWouldBlock
   264  		}
   265  		count = avail
   266  		short = true
   267  	}
   268  
   269  	// Ensure that the buffer is big enough.
   270  	if newLen, oldCap := p.size+count, int64(len(p.buf)); newLen > oldCap {
   271  		// Allocate a new buffer.
   272  		newCap := oldCap * 2
   273  		if oldCap == 0 {
   274  			newCap = 8 // arbitrary; sending individual integers across pipes is relatively common
   275  		}
   276  		for newLen > newCap {
   277  			newCap *= 2
   278  		}
   279  		if newCap > p.max {
   280  			newCap = p.max
   281  		}
   282  		newBuf := make([]byte, newCap)
   283  		// Copy the old buffer's contents to the beginning of the new one.
   284  		safemem.CopySeq(
   285  			safemem.BlockSeqOf(safemem.BlockFromSafeSlice(newBuf)),
   286  			p.bufBlockSeq.DropFirst64(uint64(p.off)).TakeFirst64(uint64(p.size)))
   287  		// Switch to the new buffer.
   288  		p.buf = newBuf
   289  		p.bufBlocks[0] = safemem.BlockFromSafeSlice(newBuf)
   290  		p.bufBlocks[1] = p.bufBlocks[0]
   291  		p.bufBlockSeq = safemem.BlockSeqFromSlice(p.bufBlocks[:])
   292  		p.off = 0
   293  	}
   294  
   295  	// Prepare the view of the space to be written.
   296  	woff := p.off + p.size
   297  	if woff >= int64(len(p.buf)) {
   298  		woff -= int64(len(p.buf))
   299  	}
   300  	bs := p.bufBlockSeq.DropFirst64(uint64(woff)).TakeFirst64(uint64(count))
   301  
   302  	// Perform the write.
   303  	doneU64, err := f(bs)
   304  	done := int64(doneU64)
   305  	p.size += done
   306  	if done < count || err != nil {
   307  		return done, err
   308  	}
   309  
   310  	// If we shortened the write, adjust the returned error appropriately.
   311  	if short {
   312  		return done, linuxerr.ErrWouldBlock
   313  	}
   314  
   315  	return done, nil
   316  }
   317  
   318  // rOpen signals a new reader of the pipe.
   319  func (p *Pipe) rOpen() {
   320  	p.readers.Add(1)
   321  	p.totalReaders.Add(1)
   322  
   323  	// Notify for blocking openers.
   324  	p.queue.Notify(waiter.EventInternal)
   325  }
   326  
   327  // wOpen signals a new writer of the pipe.
   328  func (p *Pipe) wOpen() {
   329  	p.mu.Lock()
   330  	p.hadWriter = true
   331  	p.writers.Add(1)
   332  	p.totalWriters.Add(1)
   333  	p.mu.Unlock()
   334  
   335  	// Notify for blocking openers.
   336  	p.queue.Notify(waiter.EventInternal)
   337  }
   338  
   339  // rClose signals that a reader has closed their end of the pipe.
   340  func (p *Pipe) rClose() {
   341  	if newReaders := p.readers.Add(-1); newReaders < 0 {
   342  		panic(fmt.Sprintf("Refcounting bug, pipe has negative readers: %v", newReaders))
   343  	}
   344  }
   345  
   346  // wClose signals that a writer has closed their end of the pipe.
   347  func (p *Pipe) wClose() {
   348  	if newWriters := p.writers.Add(-1); newWriters < 0 {
   349  		panic(fmt.Sprintf("Refcounting bug, pipe has negative writers: %v.", newWriters))
   350  	}
   351  }
   352  
   353  // HasReaders returns whether the pipe has any active readers.
   354  func (p *Pipe) HasReaders() bool {
   355  	return p.readers.Load() > 0
   356  }
   357  
   358  // HasWriters returns whether the pipe has any active writers.
   359  func (p *Pipe) HasWriters() bool {
   360  	return p.writers.Load() > 0
   361  }
   362  
   363  // rReadinessLocked calculates the read readiness.
   364  //
   365  // Precondition: mu must be held.
   366  func (p *Pipe) rReadinessLocked() waiter.EventMask {
   367  	ready := waiter.EventMask(0)
   368  	if p.HasReaders() && p.size != 0 {
   369  		ready |= waiter.ReadableEvents
   370  	}
   371  	if !p.HasWriters() && p.hadWriter {
   372  		// POLLHUP must be suppressed until the pipe has had at least one writer
   373  		// at some point. Otherwise a reader thread may poll and immediately get
   374  		// a POLLHUP before the writer ever opens the pipe, which the reader may
   375  		// interpret as the writer opening then closing the pipe.
   376  		ready |= waiter.EventHUp
   377  	}
   378  	return ready
   379  }
   380  
   381  // rReadiness returns a mask that states whether the read end of the pipe is
   382  // ready for reading.
   383  func (p *Pipe) rReadiness() waiter.EventMask {
   384  	p.mu.Lock()
   385  	defer p.mu.Unlock()
   386  	return p.rReadinessLocked()
   387  }
   388  
   389  // wReadinessLocked calculates the write readiness.
   390  //
   391  // Precondition: mu must be held.
   392  func (p *Pipe) wReadinessLocked() waiter.EventMask {
   393  	ready := waiter.EventMask(0)
   394  	if p.HasWriters() && p.size < p.max {
   395  		ready |= waiter.WritableEvents
   396  	}
   397  	if !p.HasReaders() {
   398  		ready |= waiter.EventErr
   399  	}
   400  	return ready
   401  }
   402  
   403  // wReadiness returns a mask that states whether the write end of the pipe
   404  // is ready for writing.
   405  func (p *Pipe) wReadiness() waiter.EventMask {
   406  	p.mu.Lock()
   407  	defer p.mu.Unlock()
   408  	return p.wReadinessLocked()
   409  }
   410  
   411  // rwReadiness returns a mask that states whether a read-write handle to the
   412  // pipe is ready for IO.
   413  func (p *Pipe) rwReadiness() waiter.EventMask {
   414  	p.mu.Lock()
   415  	defer p.mu.Unlock()
   416  	return p.rReadinessLocked() | p.wReadinessLocked()
   417  }
   418  
   419  // EventRegister implements waiter.Waitable.EventRegister.
   420  func (p *Pipe) EventRegister(e *waiter.Entry) error {
   421  	p.queue.EventRegister(e)
   422  	return nil
   423  }
   424  
   425  // EventUnregister implements waiter.Waitable.EventUnregister.
   426  func (p *Pipe) EventUnregister(e *waiter.Entry) {
   427  	p.queue.EventUnregister(e)
   428  }
   429  
   430  // queued returns the amount of queued data.
   431  func (p *Pipe) queued() int64 {
   432  	p.mu.Lock()
   433  	defer p.mu.Unlock()
   434  	return p.queuedLocked()
   435  }
   436  
   437  func (p *Pipe) queuedLocked() int64 {
   438  	return p.size
   439  }
   440  
   441  // SetFifoSize implements fs.FifoSizer.SetFifoSize.
   442  func (p *Pipe) SetFifoSize(size int64) (int64, error) {
   443  	if size < 0 {
   444  		return 0, linuxerr.EINVAL
   445  	}
   446  	if size < MinimumPipeSize {
   447  		size = MinimumPipeSize // Per spec.
   448  	}
   449  	if size > MaximumPipeSize {
   450  		return 0, linuxerr.EPERM
   451  	}
   452  	p.mu.Lock()
   453  	defer p.mu.Unlock()
   454  	if size < p.size {
   455  		return 0, linuxerr.EBUSY
   456  	}
   457  	p.max = size
   458  	return size, nil
   459  }