github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/internal/poll/splice_linux.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package poll
     6  
     7  import (
     8  	"runtime"
     9  	"sync"
    10  	"syscall"
    11  	"unsafe"
    12  )
    13  
    14  const (
    15  	// maxSpliceSize is the maximum amount of data Splice asks
    16  	// the kernel to move in a single call to splice(2).
    17  	// We use 1MB as Splice writes data through a pipe, and 1MB is the default maximum pipe buffer size,
    18  	// which is determined by /proc/sys/fs/pipe-max-size.
    19  	maxSpliceSize = 1 << 20
    20  )
    21  
    22  // Splice transfers at most remain bytes of data from src to dst, using the
    23  // splice system call to minimize copies of data from and to userspace.
    24  //
    25  // Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer.
    26  // src and dst must both be stream-oriented sockets.
    27  //
    28  // If err != nil, sc is the system call which caused the error.
    29  func Splice(dst, src *FD, remain int64) (written int64, handled bool, sc string, err error) {
    30  	p, sc, err := getPipe()
    31  	if err != nil {
    32  		return 0, false, sc, err
    33  	}
    34  	defer putPipe(p)
    35  	var inPipe, n int
    36  	for err == nil && remain > 0 {
    37  		max := maxSpliceSize
    38  		if int64(max) > remain {
    39  			max = int(remain)
    40  		}
    41  		inPipe, err = spliceDrain(p.wfd, src, max)
    42  		// The operation is considered handled if splice returns no
    43  		// error, or an error other than EINVAL. An EINVAL means the
    44  		// kernel does not support splice for the socket type of src.
    45  		// The failed syscall does not consume any data so it is safe
    46  		// to fall back to a generic copy.
    47  		//
    48  		// spliceDrain should never return EAGAIN, so if err != nil,
    49  		// Splice cannot continue.
    50  		//
    51  		// If inPipe == 0 && err == nil, src is at EOF, and the
    52  		// transfer is complete.
    53  		handled = handled || (err != syscall.EINVAL)
    54  		if err != nil || inPipe == 0 {
    55  			break
    56  		}
    57  		p.data += inPipe
    58  
    59  		n, err = splicePump(dst, p.rfd, inPipe)
    60  		if n > 0 {
    61  			written += int64(n)
    62  			remain -= int64(n)
    63  			p.data -= n
    64  		}
    65  	}
    66  	if err != nil {
    67  		return written, handled, "splice", err
    68  	}
    69  	return written, true, "", nil
    70  }
    71  
    72  // spliceDrain moves data from a socket to a pipe.
    73  //
    74  // Invariant: when entering spliceDrain, the pipe is empty. It is either in its
    75  // initial state, or splicePump has emptied it previously.
    76  //
    77  // Given this, spliceDrain can reasonably assume that the pipe is ready for
    78  // writing, so if splice returns EAGAIN, it must be because the socket is not
    79  // ready for reading.
    80  //
    81  // If spliceDrain returns (0, nil), src is at EOF.
    82  func spliceDrain(pipefd int, sock *FD, max int) (int, error) {
    83  	if err := sock.readLock(); err != nil {
    84  		return 0, err
    85  	}
    86  	defer sock.readUnlock()
    87  	if err := sock.pd.prepareRead(sock.isFile); err != nil {
    88  		return 0, err
    89  	}
    90  	for {
    91  		n, err := splice(pipefd, sock.Sysfd, max, 0)
    92  		if err == syscall.EINTR {
    93  			continue
    94  		}
    95  		if err != syscall.EAGAIN {
    96  			return n, err
    97  		}
    98  		if sock.pd.pollable() {
    99  			if err := sock.pd.waitRead(sock.isFile); err != nil {
   100  				return n, err
   101  			}
   102  		}
   103  	}
   104  }
   105  
   106  // splicePump moves all the buffered data from a pipe to a socket.
   107  //
   108  // Invariant: when entering splicePump, there are exactly inPipe
   109  // bytes of data in the pipe, from a previous call to spliceDrain.
   110  //
   111  // By analogy to the condition from spliceDrain, splicePump
   112  // only needs to poll the socket for readiness, if splice returns
   113  // EAGAIN.
   114  //
   115  // If splicePump cannot move all the data in a single call to
   116  // splice(2), it loops over the buffered data until it has written
   117  // all of it to the socket. This behavior is similar to the Write
   118  // step of an io.Copy in userspace.
   119  func splicePump(sock *FD, pipefd int, inPipe int) (int, error) {
   120  	if err := sock.writeLock(); err != nil {
   121  		return 0, err
   122  	}
   123  	defer sock.writeUnlock()
   124  	if err := sock.pd.prepareWrite(sock.isFile); err != nil {
   125  		return 0, err
   126  	}
   127  	written := 0
   128  	for inPipe > 0 {
   129  		n, err := splice(sock.Sysfd, pipefd, inPipe, 0)
   130  		// Here, the condition n == 0 && err == nil should never be
   131  		// observed, since Splice controls the write side of the pipe.
   132  		if n > 0 {
   133  			inPipe -= n
   134  			written += n
   135  			continue
   136  		}
   137  		if err != syscall.EAGAIN {
   138  			return written, err
   139  		}
   140  		if sock.pd.pollable() {
   141  			if err := sock.pd.waitWrite(sock.isFile); err != nil {
   142  				return written, err
   143  			}
   144  		}
   145  	}
   146  	return written, nil
   147  }
   148  
   149  // splice wraps the splice system call. Since the current implementation
   150  // only uses splice on sockets and pipes, the offset arguments are unused.
   151  // splice returns int instead of int64, because callers never ask it to
   152  // move more data in a single call than can fit in an int32.
   153  func splice(out int, in int, max int, flags int) (int, error) {
   154  	n, err := syscall.Splice(in, nil, out, nil, max, flags)
   155  	return int(n), err
   156  }
   157  
   158  type splicePipeFields struct {
   159  	rfd  int
   160  	wfd  int
   161  	data int
   162  }
   163  
   164  type splicePipe struct {
   165  	splicePipeFields
   166  
   167  	// We want to use a finalizer, so ensure that the size is
   168  	// large enough to not use the tiny allocator.
   169  	_ [24 - unsafe.Sizeof(splicePipeFields{})%24]byte
   170  }
   171  
   172  // splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers.
   173  // The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up
   174  // a finalizer for each pipe to close its file descriptors before the actual GC.
   175  var splicePipePool = sync.Pool{New: newPoolPipe}
   176  
   177  func newPoolPipe() any {
   178  	// Discard the error which occurred during the creation of pipe buffer,
   179  	// redirecting the data transmission to the conventional way utilizing read() + write() as a fallback.
   180  	p := newPipe()
   181  	if p == nil {
   182  		return nil
   183  	}
   184  	runtime.SetFinalizer(p, destroyPipe)
   185  	return p
   186  }
   187  
   188  // getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache.
   189  //
   190  // Note that it may fail to create a new pipe buffer by newPipe(), in which case getPipe() will return a generic error
   191  // and system call name splice in a string as the indication.
   192  func getPipe() (*splicePipe, string, error) {
   193  	v := splicePipePool.Get()
   194  	if v == nil {
   195  		return nil, "splice", syscall.EINVAL
   196  	}
   197  	return v.(*splicePipe), "", nil
   198  }
   199  
   200  func putPipe(p *splicePipe) {
   201  	// If there is still data left in the pipe,
   202  	// then close and discard it instead of putting it back into the pool.
   203  	if p.data != 0 {
   204  		runtime.SetFinalizer(p, nil)
   205  		destroyPipe(p)
   206  		return
   207  	}
   208  	splicePipePool.Put(p)
   209  }
   210  
   211  // newPipe sets up a pipe for a splice operation.
   212  func newPipe() *splicePipe {
   213  	var fds [2]int
   214  	if err := syscall.Pipe2(fds[:], syscall.O_CLOEXEC|syscall.O_NONBLOCK); err != nil {
   215  		return nil
   216  	}
   217  
   218  	// Splice will loop writing maxSpliceSize bytes from the source to the pipe,
   219  	// and then write those bytes from the pipe to the destination.
   220  	// Set the pipe buffer size to maxSpliceSize to optimize that.
   221  	// Ignore errors here, as a smaller buffer size will work,
   222  	// although it will require more system calls.
   223  	fcntl(fds[0], syscall.F_SETPIPE_SZ, maxSpliceSize)
   224  
   225  	return &splicePipe{splicePipeFields: splicePipeFields{rfd: fds[0], wfd: fds[1]}}
   226  }
   227  
   228  // destroyPipe destroys a pipe.
   229  func destroyPipe(p *splicePipe) {
   230  	CloseFunc(p.rfd)
   231  	CloseFunc(p.wfd)
   232  }