github.com/AndrienkoAleksandr/go@v0.0.19/src/intern/poll/splice_linux.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package poll 6 7 import ( 8 "internal/syscall/unix" 9 "runtime" 10 "sync" 11 "syscall" 12 "unsafe" 13 ) 14 15 const ( 16 // maxSpliceSize is the maximum amount of data Splice asks 17 // the kernel to move in a single call to splice(2). 18 // We use 1MB as Splice writes data through a pipe, and 1MB is the default maximum pipe buffer size, 19 // which is determined by /proc/sys/fs/pipe-max-size. 20 maxSpliceSize = 1 << 20 21 ) 22 23 // Splice transfers at most remain bytes of data from src to dst, using the 24 // splice system call to minimize copies of data from and to userspace. 25 // 26 // Splice gets a pipe buffer from the pool or creates a new one if needed, to serve as a buffer for the data transfer. 27 // src and dst must both be stream-oriented sockets. 28 // 29 // If err != nil, sc is the system call which caused the error. 30 func Splice(dst, src *FD, remain int64) (written int64, handled bool, sc string, err error) { 31 p, sc, err := getPipe() 32 if err != nil { 33 return 0, false, sc, err 34 } 35 defer putPipe(p) 36 var inPipe, n int 37 for err == nil && remain > 0 { 38 max := maxSpliceSize 39 if int64(max) > remain { 40 max = int(remain) 41 } 42 inPipe, err = spliceDrain(p.wfd, src, max) 43 // The operation is considered handled if splice returns no 44 // error, or an error other than EINVAL. An EINVAL means the 45 // kernel does not support splice for the socket type of src. 46 // The failed syscall does not consume any data so it is safe 47 // to fall back to a generic copy. 48 // 49 // spliceDrain should never return EAGAIN, so if err != nil, 50 // Splice cannot continue. 51 // 52 // If inPipe == 0 && err == nil, src is at EOF, and the 53 // transfer is complete. 54 handled = handled || (err != syscall.EINVAL) 55 if err != nil || inPipe == 0 { 56 break 57 } 58 p.data += inPipe 59 60 n, err = splicePump(dst, p.rfd, inPipe) 61 if n > 0 { 62 written += int64(n) 63 remain -= int64(n) 64 p.data -= n 65 } 66 } 67 if err != nil { 68 return written, handled, "splice", err 69 } 70 return written, true, "", nil 71 } 72 73 // spliceDrain moves data from a socket to a pipe. 74 // 75 // Invariant: when entering spliceDrain, the pipe is empty. It is either in its 76 // initial state, or splicePump has emptied it previously. 77 // 78 // Given this, spliceDrain can reasonably assume that the pipe is ready for 79 // writing, so if splice returns EAGAIN, it must be because the socket is not 80 // ready for reading. 81 // 82 // If spliceDrain returns (0, nil), src is at EOF. 83 func spliceDrain(pipefd int, sock *FD, max int) (int, error) { 84 if err := sock.readLock(); err != nil { 85 return 0, err 86 } 87 defer sock.readUnlock() 88 if err := sock.pd.prepareRead(sock.isFile); err != nil { 89 return 0, err 90 } 91 for { 92 n, err := splice(pipefd, sock.Sysfd, max, 0) 93 if err == syscall.EINTR { 94 continue 95 } 96 if err != syscall.EAGAIN { 97 return n, err 98 } 99 if sock.pd.pollable() { 100 if err := sock.pd.waitRead(sock.isFile); err != nil { 101 return n, err 102 } 103 } 104 } 105 } 106 107 // splicePump moves all the buffered data from a pipe to a socket. 108 // 109 // Invariant: when entering splicePump, there are exactly inPipe 110 // bytes of data in the pipe, from a previous call to spliceDrain. 111 // 112 // By analogy to the condition from spliceDrain, splicePump 113 // only needs to poll the socket for readiness, if splice returns 114 // EAGAIN. 115 // 116 // If splicePump cannot move all the data in a single call to 117 // splice(2), it loops over the buffered data until it has written 118 // all of it to the socket. This behavior is similar to the Write 119 // step of an io.Copy in userspace. 120 func splicePump(sock *FD, pipefd int, inPipe int) (int, error) { 121 if err := sock.writeLock(); err != nil { 122 return 0, err 123 } 124 defer sock.writeUnlock() 125 if err := sock.pd.prepareWrite(sock.isFile); err != nil { 126 return 0, err 127 } 128 written := 0 129 for inPipe > 0 { 130 n, err := splice(sock.Sysfd, pipefd, inPipe, 0) 131 // Here, the condition n == 0 && err == nil should never be 132 // observed, since Splice controls the write side of the pipe. 133 if n > 0 { 134 inPipe -= n 135 written += n 136 continue 137 } 138 if err != syscall.EAGAIN { 139 return written, err 140 } 141 if sock.pd.pollable() { 142 if err := sock.pd.waitWrite(sock.isFile); err != nil { 143 return written, err 144 } 145 } 146 } 147 return written, nil 148 } 149 150 // splice wraps the splice system call. Since the current implementation 151 // only uses splice on sockets and pipes, the offset arguments are unused. 152 // splice returns int instead of int64, because callers never ask it to 153 // move more data in a single call than can fit in an int32. 154 func splice(out int, in int, max int, flags int) (int, error) { 155 n, err := syscall.Splice(in, nil, out, nil, max, flags) 156 return int(n), err 157 } 158 159 type splicePipeFields struct { 160 rfd int 161 wfd int 162 data int 163 } 164 165 type splicePipe struct { 166 splicePipeFields 167 168 // We want to use a finalizer, so ensure that the size is 169 // large enough to not use the tiny allocator. 170 _ [24 - unsafe.Sizeof(splicePipeFields{})%24]byte 171 } 172 173 // splicePipePool caches pipes to avoid high-frequency construction and destruction of pipe buffers. 174 // The garbage collector will free all pipes in the sync.Pool periodically, thus we need to set up 175 // a finalizer for each pipe to close its file descriptors before the actual GC. 176 var splicePipePool = sync.Pool{New: newPoolPipe} 177 178 func newPoolPipe() any { 179 // Discard the error which occurred during the creation of pipe buffer, 180 // redirecting the data transmission to the conventional way utilizing read() + write() as a fallback. 181 p := newPipe() 182 if p == nil { 183 return nil 184 } 185 runtime.SetFinalizer(p, destroyPipe) 186 return p 187 } 188 189 // getPipe tries to acquire a pipe buffer from the pool or create a new one with newPipe() if it gets nil from the cache. 190 // 191 // Note that it may fail to create a new pipe buffer by newPipe(), in which case getPipe() will return a generic error 192 // and system call name splice in a string as the indication. 193 func getPipe() (*splicePipe, string, error) { 194 v := splicePipePool.Get() 195 if v == nil { 196 return nil, "splice", syscall.EINVAL 197 } 198 return v.(*splicePipe), "", nil 199 } 200 201 func putPipe(p *splicePipe) { 202 // If there is still data left in the pipe, 203 // then close and discard it instead of putting it back into the pool. 204 if p.data != 0 { 205 runtime.SetFinalizer(p, nil) 206 destroyPipe(p) 207 return 208 } 209 splicePipePool.Put(p) 210 } 211 212 // newPipe sets up a pipe for a splice operation. 213 func newPipe() *splicePipe { 214 var fds [2]int 215 if err := syscall.Pipe2(fds[:], syscall.O_CLOEXEC|syscall.O_NONBLOCK); err != nil { 216 return nil 217 } 218 219 // Splice will loop writing maxSpliceSize bytes from the source to the pipe, 220 // and then write those bytes from the pipe to the destination. 221 // Set the pipe buffer size to maxSpliceSize to optimize that. 222 // Ignore errors here, as a smaller buffer size will work, 223 // although it will require more system calls. 224 unix.Fcntl(fds[0], syscall.F_SETPIPE_SZ, maxSpliceSize) 225 226 return &splicePipe{splicePipeFields: splicePipeFields{rfd: fds[0], wfd: fds[1]}} 227 } 228 229 // destroyPipe destroys a pipe. 230 func destroyPipe(p *splicePipe) { 231 CloseFunc(p.rfd) 232 CloseFunc(p.wfd) 233 }