github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/internal/poll/splice_linux.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package poll 6 7 import ( 8 "runtime" 9 "sync" 10 "syscall" 11 "unsafe" 12 ) 13 14 const ( 15 // maxSpliceSize is the maximum amount of data Splice asks 16 // the kernel to move in a single call to splice(2). 17 // We use 1MB as Splice writes data through a pipe, and 1MB is the default maximum pipe buffer size, 18 // which is determined by /proc/sys/fs/pipe-max-size. 19 maxSpliceSize = 1 << 20 20 ) 21 22 // Splice transfers at most remain bytes of data from src to dst, using the 23 // splice system call to minimize copies of data from and to userspace. 24 // 25 // Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer. 26 // src and dst must both be stream-oriented sockets. 27 // 28 // If err != nil, sc is the system call which caused the error. 29 func Splice(dst, src *FD, remain int64) (written int64, handled bool, sc string, err error) { 30 p, sc, err := getPipe() 31 if err != nil { 32 return 0, false, sc, err 33 } 34 defer putPipe(p) 35 var inPipe, n int 36 for err == nil && remain > 0 { 37 max := maxSpliceSize 38 if int64(max) > remain { 39 max = int(remain) 40 } 41 inPipe, err = spliceDrain(p.wfd, src, max) 42 // The operation is considered handled if splice returns no 43 // error, or an error other than EINVAL. An EINVAL means the 44 // kernel does not support splice for the socket type of src. 45 // The failed syscall does not consume any data so it is safe 46 // to fall back to a generic copy. 47 // 48 // spliceDrain should never return EAGAIN, so if err != nil, 49 // Splice cannot continue. 50 // 51 // If inPipe == 0 && err == nil, src is at EOF, and the 52 // transfer is complete. 53 handled = handled || (err != syscall.EINVAL) 54 if err != nil || inPipe == 0 { 55 break 56 } 57 p.data += inPipe 58 59 n, err = splicePump(dst, p.rfd, inPipe) 60 if n > 0 { 61 written += int64(n) 62 remain -= int64(n) 63 p.data -= n 64 } 65 } 66 if err != nil { 67 return written, handled, "splice", err 68 } 69 return written, true, "", nil 70 } 71 72 // spliceDrain moves data from a socket to a pipe. 73 // 74 // Invariant: when entering spliceDrain, the pipe is empty. It is either in its 75 // initial state, or splicePump has emptied it previously. 76 // 77 // Given this, spliceDrain can reasonably assume that the pipe is ready for 78 // writing, so if splice returns EAGAIN, it must be because the socket is not 79 // ready for reading. 80 // 81 // If spliceDrain returns (0, nil), src is at EOF. 82 func spliceDrain(pipefd int, sock *FD, max int) (int, error) { 83 if err := sock.readLock(); err != nil { 84 return 0, err 85 } 86 defer sock.readUnlock() 87 if err := sock.pd.prepareRead(sock.isFile); err != nil { 88 return 0, err 89 } 90 for { 91 n, err := splice(pipefd, sock.Sysfd, max, 0) 92 if err == syscall.EINTR { 93 continue 94 } 95 if err != syscall.EAGAIN { 96 return n, err 97 } 98 if sock.pd.pollable() { 99 if err := sock.pd.waitRead(sock.isFile); err != nil { 100 return n, err 101 } 102 } 103 } 104 } 105 106 // splicePump moves all the buffered data from a pipe to a socket. 107 // 108 // Invariant: when entering splicePump, there are exactly inPipe 109 // bytes of data in the pipe, from a previous call to spliceDrain. 110 // 111 // By analogy to the condition from spliceDrain, splicePump 112 // only needs to poll the socket for readiness, if splice returns 113 // EAGAIN. 114 // 115 // If splicePump cannot move all the data in a single call to 116 // splice(2), it loops over the buffered data until it has written 117 // all of it to the socket. This behavior is similar to the Write 118 // step of an io.Copy in userspace. 119 func splicePump(sock *FD, pipefd int, inPipe int) (int, error) { 120 if err := sock.writeLock(); err != nil { 121 return 0, err 122 } 123 defer sock.writeUnlock() 124 if err := sock.pd.prepareWrite(sock.isFile); err != nil { 125 return 0, err 126 } 127 written := 0 128 for inPipe > 0 { 129 n, err := splice(sock.Sysfd, pipefd, inPipe, 0) 130 // Here, the condition n == 0 && err == nil should never be 131 // observed, since Splice controls the write side of the pipe. 132 if n > 0 { 133 inPipe -= n 134 written += n 135 continue 136 } 137 if err != syscall.EAGAIN { 138 return written, err 139 } 140 if sock.pd.pollable() { 141 if err := sock.pd.waitWrite(sock.isFile); err != nil { 142 return written, err 143 } 144 } 145 } 146 return written, nil 147 } 148 149 // splice wraps the splice system call. Since the current implementation 150 // only uses splice on sockets and pipes, the offset arguments are unused. 151 // splice returns int instead of int64, because callers never ask it to 152 // move more data in a single call than can fit in an int32. 153 func splice(out int, in int, max int, flags int) (int, error) { 154 n, err := syscall.Splice(in, nil, out, nil, max, flags) 155 return int(n), err 156 } 157 158 type splicePipeFields struct { 159 rfd int 160 wfd int 161 data int 162 } 163 164 type splicePipe struct { 165 splicePipeFields 166 167 // We want to use a finalizer, so ensure that the size is 168 // large enough to not use the tiny allocator. 169 _ [24 - unsafe.Sizeof(splicePipeFields{})%24]byte 170 } 171 172 // splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers. 173 // The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up 174 // a finalizer for each pipe to close its file descriptors before the actual GC. 175 var splicePipePool = sync.Pool{New: newPoolPipe} 176 177 func newPoolPipe() any { 178 // Discard the error which occurred during the creation of pipe buffer, 179 // redirecting the data transmission to the conventional way utilizing read() + write() as a fallback. 180 p := newPipe() 181 if p == nil { 182 return nil 183 } 184 runtime.SetFinalizer(p, destroyPipe) 185 return p 186 } 187 188 // getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache. 189 // 190 // Note that it may fail to create a new pipe buffer by newPipe(), in which case getPipe() will return a generic error 191 // and system call name splice in a string as the indication. 192 func getPipe() (*splicePipe, string, error) { 193 v := splicePipePool.Get() 194 if v == nil { 195 return nil, "splice", syscall.EINVAL 196 } 197 return v.(*splicePipe), "", nil 198 } 199 200 func putPipe(p *splicePipe) { 201 // If there is still data left in the pipe, 202 // then close and discard it instead of putting it back into the pool. 203 if p.data != 0 { 204 runtime.SetFinalizer(p, nil) 205 destroyPipe(p) 206 return 207 } 208 splicePipePool.Put(p) 209 } 210 211 // newPipe sets up a pipe for a splice operation. 212 func newPipe() *splicePipe { 213 var fds [2]int 214 if err := syscall.Pipe2(fds[:], syscall.O_CLOEXEC|syscall.O_NONBLOCK); err != nil { 215 return nil 216 } 217 218 // Splice will loop writing maxSpliceSize bytes from the source to the pipe, 219 // and then write those bytes from the pipe to the destination. 220 // Set the pipe buffer size to maxSpliceSize to optimize that. 221 // Ignore errors here, as a smaller buffer size will work, 222 // although it will require more system calls. 223 fcntl(fds[0], syscall.F_SETPIPE_SZ, maxSpliceSize) 224 225 return &splicePipe{splicePipeFields: splicePipeFields{rfd: fds[0], wfd: fds[1]}} 226 } 227 228 // destroyPipe destroys a pipe. 229 func destroyPipe(p *splicePipe) { 230 CloseFunc(p.rfd) 231 CloseFunc(p.wfd) 232 }