github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_splice.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 20 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 21 "github.com/SagerNet/gvisor/pkg/sentry/arch" 22 "github.com/SagerNet/gvisor/pkg/sentry/fs" 23 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 24 "github.com/SagerNet/gvisor/pkg/syserror" 25 "github.com/SagerNet/gvisor/pkg/waiter" 26 ) 27 28 // doSplice implements a blocking splice operation. 29 func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { 30 if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 || (opts.SrcStart+opts.Length < 0) { 31 return 0, linuxerr.EINVAL 32 } 33 if opts.Length == 0 { 34 return 0, nil 35 } 36 if opts.Length > int64(kernel.MAX_RW_COUNT) { 37 opts.Length = int64(kernel.MAX_RW_COUNT) 38 } 39 40 var ( 41 n int64 42 err error 43 inCh chan struct{} 44 outCh chan struct{} 45 ) 46 47 for { 48 n, err = fs.Splice(t, outFile, inFile, opts) 49 if n != 0 || err != syserror.ErrWouldBlock { 50 break 51 } else if err == syserror.ErrWouldBlock && nonBlocking { 52 break 53 } 54 55 // Note that the blocking behavior here is a bit different than the 56 // normal pattern. Because we need to have both data to read and data 57 // to write simultaneously, we actually explicitly block on both of 58 // these cases in turn before returning to the splice operation. 59 if inFile.Readiness(EventMaskRead) == 0 { 60 if inCh == nil { 61 inCh = make(chan struct{}, 1) 62 inW, _ := waiter.NewChannelEntry(inCh) 63 inFile.EventRegister(&inW, EventMaskRead) 64 defer inFile.EventUnregister(&inW) 65 // Need to refresh readiness. 66 continue 67 } 68 if err = t.Block(inCh); err != nil { 69 break 70 } 71 } 72 // Don't bother checking readiness of the outFile, because it's not a 73 // guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds 74 // can be "ready" but will reject writes of certain sizes with 75 // EWOULDBLOCK. 76 if outCh == nil { 77 outCh = make(chan struct{}, 1) 78 outW, _ := waiter.NewChannelEntry(outCh) 79 outFile.EventRegister(&outW, EventMaskWrite) 80 defer outFile.EventUnregister(&outW) 81 // We might be ready to write now. Try again before 82 // blocking. 83 continue 84 } 85 if err = t.Block(outCh); err != nil { 86 break 87 } 88 } 89 90 if n > 0 { 91 // On Linux, inotify behavior is not very consistent with splice(2). We try 92 // our best to emulate Linux for very basic calls to splice, where for some 93 // reason, events are generated for output files, but not input files. 94 outFile.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 95 } 96 return n, err 97 } 98 99 // Sendfile implements linux system call sendfile(2). 100 func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 101 outFD := args[0].Int() 102 inFD := args[1].Int() 103 offsetAddr := args[2].Pointer() 104 count := int64(args[3].SizeT()) 105 106 // Get files. 107 inFile := t.GetFile(inFD) 108 if inFile == nil { 109 return 0, nil, linuxerr.EBADF 110 } 111 defer inFile.DecRef(t) 112 113 if !inFile.Flags().Read { 114 return 0, nil, linuxerr.EBADF 115 } 116 117 outFile := t.GetFile(outFD) 118 if outFile == nil { 119 return 0, nil, linuxerr.EBADF 120 } 121 defer outFile.DecRef(t) 122 123 if !outFile.Flags().Write { 124 return 0, nil, linuxerr.EBADF 125 } 126 127 // Verify that the outfile Append flag is not set. 128 if outFile.Flags().Append { 129 return 0, nil, linuxerr.EINVAL 130 } 131 132 // Verify that we have a regular infile. This is a requirement; the 133 // same check appears in Linux (fs/splice.c:splice_direct_to_actor). 134 if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) { 135 return 0, nil, linuxerr.EINVAL 136 } 137 138 var ( 139 n int64 140 err error 141 ) 142 if offsetAddr != 0 { 143 // Verify that when offset address is not null, infile must be 144 // seekable. The fs.Splice routine itself validates basic read. 145 if !inFile.Flags().Pread { 146 return 0, nil, linuxerr.ESPIPE 147 } 148 149 // Copy in the offset. 150 var offset int64 151 if _, err := primitive.CopyInt64In(t, offsetAddr, &offset); err != nil { 152 return 0, nil, err 153 } 154 155 // Do the splice. 156 n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ 157 Length: count, 158 SrcOffset: true, 159 SrcStart: int64(offset), 160 }, outFile.Flags().NonBlocking) 161 162 // Copy out the new offset. 163 if _, err := primitive.CopyInt64Out(t, offsetAddr, offset+n); err != nil { 164 return 0, nil, err 165 } 166 } else { 167 // Send data using splice. 168 n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{ 169 Length: count, 170 }, outFile.Flags().NonBlocking) 171 } 172 173 // Sendfile can't lose any data because inFD is always a regual file. 174 if n != 0 { 175 err = nil 176 } 177 178 // We can only pass a single file to handleIOError, so pick inFile 179 // arbitrarily. This is used only for debugging purposes. 180 return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "sendfile", inFile) 181 } 182 183 // Splice implements splice(2). 184 func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 185 inFD := args[0].Int() 186 inOffset := args[1].Pointer() 187 outFD := args[2].Int() 188 outOffset := args[3].Pointer() 189 count := int64(args[4].SizeT()) 190 flags := args[5].Int() 191 192 // Check for invalid flags. 193 if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { 194 return 0, nil, linuxerr.EINVAL 195 } 196 197 // Get files. 198 outFile := t.GetFile(outFD) 199 if outFile == nil { 200 return 0, nil, linuxerr.EBADF 201 } 202 defer outFile.DecRef(t) 203 204 inFile := t.GetFile(inFD) 205 if inFile == nil { 206 return 0, nil, linuxerr.EBADF 207 } 208 defer inFile.DecRef(t) 209 210 // The operation is non-blocking if anything is non-blocking. 211 // 212 // N.B. This is a rather simplistic heuristic that avoids some 213 // poor edge case behavior since the exact semantics here are 214 // underspecified and vary between versions of Linux itself. 215 nonBlock := inFile.Flags().NonBlocking || outFile.Flags().NonBlocking || (flags&linux.SPLICE_F_NONBLOCK != 0) 216 217 // Construct our options. 218 // 219 // Note that exactly one of the underlying buffers must be a pipe. We 220 // don't actually have this constraint internally, but we enforce it 221 // for the semantics of the call. 222 opts := fs.SpliceOpts{ 223 Length: count, 224 } 225 inFileAttr := inFile.Dirent.Inode.StableAttr 226 outFileAttr := outFile.Dirent.Inode.StableAttr 227 switch { 228 case fs.IsPipe(inFileAttr) && !fs.IsPipe(outFileAttr): 229 if inOffset != 0 { 230 return 0, nil, linuxerr.ESPIPE 231 } 232 if outOffset != 0 { 233 if !outFile.Flags().Pwrite { 234 return 0, nil, linuxerr.EINVAL 235 } 236 237 var offset int64 238 if _, err := primitive.CopyInt64In(t, outOffset, &offset); err != nil { 239 return 0, nil, err 240 } 241 242 // Use the destination offset. 243 opts.DstOffset = true 244 opts.DstStart = offset 245 } 246 case !fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): 247 if outOffset != 0 { 248 return 0, nil, linuxerr.ESPIPE 249 } 250 if inOffset != 0 { 251 if !inFile.Flags().Pread { 252 return 0, nil, linuxerr.EINVAL 253 } 254 255 var offset int64 256 if _, err := primitive.CopyInt64In(t, inOffset, &offset); err != nil { 257 return 0, nil, err 258 } 259 260 // Use the source offset. 261 opts.SrcOffset = true 262 opts.SrcStart = offset 263 } 264 case fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr): 265 if inOffset != 0 || outOffset != 0 { 266 return 0, nil, linuxerr.ESPIPE 267 } 268 269 // We may not refer to the same pipe; otherwise it's a continuous loop. 270 if inFileAttr.InodeID == outFileAttr.InodeID { 271 return 0, nil, linuxerr.EINVAL 272 } 273 default: 274 return 0, nil, linuxerr.EINVAL 275 } 276 277 // Splice data. 278 n, err := doSplice(t, outFile, inFile, opts, nonBlock) 279 280 // Special files can have additional requirements for granularity. For 281 // example, read from eventfd returns EINVAL if a size is less 8 bytes. 282 // Inotify is another example. read will return EINVAL is a buffer is 283 // too small to return the next event, but a size of an event isn't 284 // fixed, it is sizeof(struct inotify_event) + {NAME_LEN} + 1. 285 if n != 0 && err != nil && (fs.IsAnonymous(inFileAttr) || fs.IsAnonymous(outFileAttr)) { 286 err = nil 287 } 288 289 // See above; inFile is chosen arbitrarily here. 290 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "splice", inFile) 291 } 292 293 // Tee imlements tee(2). 294 func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 295 inFD := args[0].Int() 296 outFD := args[1].Int() 297 count := int64(args[2].SizeT()) 298 flags := args[3].Int() 299 300 // Check for invalid flags. 301 if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 { 302 return 0, nil, linuxerr.EINVAL 303 } 304 305 // Get files. 306 outFile := t.GetFile(outFD) 307 if outFile == nil { 308 return 0, nil, linuxerr.EBADF 309 } 310 defer outFile.DecRef(t) 311 312 inFile := t.GetFile(inFD) 313 if inFile == nil { 314 return 0, nil, linuxerr.EBADF 315 } 316 defer inFile.DecRef(t) 317 318 // All files must be pipes. 319 if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) { 320 return 0, nil, linuxerr.EINVAL 321 } 322 323 // We may not refer to the same pipe; see above. 324 if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID { 325 return 0, nil, linuxerr.EINVAL 326 } 327 328 // The operation is non-blocking if anything is non-blocking. 329 nonBlock := inFile.Flags().NonBlocking || outFile.Flags().NonBlocking || (flags&linux.SPLICE_F_NONBLOCK != 0) 330 331 // Splice data. 332 n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{ 333 Length: count, 334 Dup: true, 335 }, nonBlock) 336 337 // Tee doesn't change a state of inFD, so it can't lose any data. 338 if n != 0 { 339 err = nil 340 } 341 342 // See above; inFile is chosen arbitrarily here. 343 return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "tee", inFile) 344 }