github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_read.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "time" 19 20 "github.com/SagerNet/gvisor/pkg/abi/linux" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/sentry/arch" 23 "github.com/SagerNet/gvisor/pkg/sentry/fs" 24 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 25 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 26 "github.com/SagerNet/gvisor/pkg/sentry/socket" 27 "github.com/SagerNet/gvisor/pkg/syserror" 28 "github.com/SagerNet/gvisor/pkg/usermem" 29 "github.com/SagerNet/gvisor/pkg/waiter" 30 ) 31 32 // LINT.IfChange 33 34 const ( 35 // EventMaskRead contains events that can be triggered on reads. 36 EventMaskRead = waiter.ReadableEvents | waiter.EventHUp | waiter.EventErr 37 ) 38 39 // Read implements linux syscall read(2). Note that we try to get a buffer that 40 // is exactly the size requested because some applications like qemu expect 41 // they can do large reads all at once. Bug for bug. Same for other read 42 // calls below. 43 func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 44 fd := args[0].Int() 45 addr := args[1].Pointer() 46 size := args[2].SizeT() 47 48 file := t.GetFile(fd) 49 if file == nil { 50 return 0, nil, linuxerr.EBADF 51 } 52 defer file.DecRef(t) 53 54 // Check that the file is readable. 55 if !file.Flags().Read { 56 return 0, nil, linuxerr.EBADF 57 } 58 59 // Check that the size is legitimate. 60 si := int(size) 61 if si < 0 { 62 return 0, nil, linuxerr.EINVAL 63 } 64 65 // Get the destination of the read. 66 dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ 67 AddressSpaceActive: true, 68 }) 69 if err != nil { 70 return 0, nil, err 71 } 72 73 n, err := readv(t, file, dst) 74 t.IOUsage().AccountReadSyscall(n) 75 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "read", file) 76 } 77 78 // Readahead implements readahead(2). 79 func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 80 fd := args[0].Int() 81 offset := args[1].Int64() 82 size := args[2].SizeT() 83 84 file := t.GetFile(fd) 85 if file == nil { 86 return 0, nil, linuxerr.EBADF 87 } 88 defer file.DecRef(t) 89 90 // Check that the file is readable. 91 if !file.Flags().Read { 92 return 0, nil, linuxerr.EBADF 93 } 94 95 // Check that the size is valid. 96 if int(size) < 0 { 97 return 0, nil, linuxerr.EINVAL 98 } 99 100 // Check that the offset is legitimate and does not overflow. 101 if offset < 0 || offset+int64(size) < 0 { 102 return 0, nil, linuxerr.EINVAL 103 } 104 105 // Return EINVAL; if the underlying file type does not support readahead, 106 // then Linux will return EINVAL to indicate as much. In the future, we 107 // may extend this function to actually support readahead hints. 108 return 0, nil, linuxerr.EINVAL 109 } 110 111 // Pread64 implements linux syscall pread64(2). 112 func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 113 fd := args[0].Int() 114 addr := args[1].Pointer() 115 size := args[2].SizeT() 116 offset := args[3].Int64() 117 118 file := t.GetFile(fd) 119 if file == nil { 120 return 0, nil, linuxerr.EBADF 121 } 122 defer file.DecRef(t) 123 124 // Check that the offset is legitimate and does not overflow. 125 if offset < 0 || offset+int64(size) < 0 { 126 return 0, nil, linuxerr.EINVAL 127 } 128 129 // Is reading at an offset supported? 130 if !file.Flags().Pread { 131 return 0, nil, linuxerr.ESPIPE 132 } 133 134 // Check that the file is readable. 135 if !file.Flags().Read { 136 return 0, nil, linuxerr.EBADF 137 } 138 139 // Check that the size is legitimate. 140 si := int(size) 141 if si < 0 { 142 return 0, nil, linuxerr.EINVAL 143 } 144 145 // Get the destination of the read. 146 dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ 147 AddressSpaceActive: true, 148 }) 149 if err != nil { 150 return 0, nil, err 151 } 152 153 n, err := preadv(t, file, dst, offset) 154 t.IOUsage().AccountReadSyscall(n) 155 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file) 156 } 157 158 // Readv implements linux syscall readv(2). 159 func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 160 fd := args[0].Int() 161 addr := args[1].Pointer() 162 iovcnt := int(args[2].Int()) 163 164 file := t.GetFile(fd) 165 if file == nil { 166 return 0, nil, linuxerr.EBADF 167 } 168 defer file.DecRef(t) 169 170 // Check that the file is readable. 171 if !file.Flags().Read { 172 return 0, nil, linuxerr.EBADF 173 } 174 175 // Read the iovecs that specify the destination of the read. 176 dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 177 AddressSpaceActive: true, 178 }) 179 if err != nil { 180 return 0, nil, err 181 } 182 183 n, err := readv(t, file, dst) 184 t.IOUsage().AccountReadSyscall(n) 185 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "readv", file) 186 } 187 188 // Preadv implements linux syscall preadv(2). 189 func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 190 fd := args[0].Int() 191 addr := args[1].Pointer() 192 iovcnt := int(args[2].Int()) 193 offset := args[3].Int64() 194 195 file := t.GetFile(fd) 196 if file == nil { 197 return 0, nil, linuxerr.EBADF 198 } 199 defer file.DecRef(t) 200 201 // Check that the offset is legitimate. 202 if offset < 0 { 203 return 0, nil, linuxerr.EINVAL 204 } 205 206 // Is reading at an offset supported? 207 if !file.Flags().Pread { 208 return 0, nil, linuxerr.ESPIPE 209 } 210 211 // Check that the file is readable. 212 if !file.Flags().Read { 213 return 0, nil, linuxerr.EBADF 214 } 215 216 // Read the iovecs that specify the destination of the read. 217 dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 218 AddressSpaceActive: true, 219 }) 220 if err != nil { 221 return 0, nil, err 222 } 223 224 n, err := preadv(t, file, dst, offset) 225 t.IOUsage().AccountReadSyscall(n) 226 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file) 227 } 228 229 // Preadv2 implements linux syscall preadv2(2). 230 func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 231 // While the syscall is 232 // preadv2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags) 233 // the linux internal call 234 // (https://elixir.bootlin.com/linux/v4.18/source/fs/read_write.c#L1248) 235 // splits the offset argument into a high/low value for compatibility with 236 // 32-bit architectures. The flags argument is the 5th argument. 237 238 fd := args[0].Int() 239 addr := args[1].Pointer() 240 iovcnt := int(args[2].Int()) 241 offset := args[3].Int64() 242 flags := int(args[5].Int()) 243 244 file := t.GetFile(fd) 245 if file == nil { 246 return 0, nil, linuxerr.EBADF 247 } 248 defer file.DecRef(t) 249 250 // Check that the offset is legitimate. 251 if offset < -1 { 252 return 0, nil, linuxerr.EINVAL 253 } 254 255 // Is reading at an offset supported? 256 if offset > -1 && !file.Flags().Pread { 257 return 0, nil, linuxerr.ESPIPE 258 } 259 260 // Check that the file is readable. 261 if !file.Flags().Read { 262 return 0, nil, linuxerr.EBADF 263 } 264 265 // Check flags field. 266 // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is 267 // accepted as a valid flag argument for preadv2. 268 if flags&^linux.RWF_VALID != 0 { 269 return 0, nil, syserror.EOPNOTSUPP 270 } 271 272 // Read the iovecs that specify the destination of the read. 273 dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 274 AddressSpaceActive: true, 275 }) 276 if err != nil { 277 return 0, nil, err 278 } 279 280 // If preadv2 is called with an offset of -1, readv is called. 281 if offset == -1 { 282 n, err := readv(t, file, dst) 283 t.IOUsage().AccountReadSyscall(n) 284 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file) 285 } 286 287 n, err := preadv(t, file, dst, offset) 288 t.IOUsage().AccountReadSyscall(n) 289 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file) 290 } 291 292 func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) { 293 n, err := f.Readv(t, dst) 294 if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { 295 if n > 0 { 296 // Queue notification if we read anything. 297 f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) 298 } 299 return n, err 300 } 301 302 // Sockets support read timeouts. 303 var haveDeadline bool 304 var deadline ktime.Time 305 if s, ok := f.FileOperations.(socket.Socket); ok { 306 dl := s.RecvTimeout() 307 if dl < 0 && err == syserror.ErrWouldBlock { 308 return n, err 309 } 310 if dl > 0 { 311 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 312 haveDeadline = true 313 } 314 } 315 316 // Register for notifications. 317 w, ch := waiter.NewChannelEntry(nil) 318 f.EventRegister(&w, EventMaskRead) 319 320 total := n 321 for { 322 // Shorten dst to reflect bytes previously read. 323 dst = dst.DropFirst64(n) 324 325 // Issue the request and break out if it completes with anything 326 // other than "would block". 327 n, err = f.Readv(t, dst) 328 total += n 329 if err != syserror.ErrWouldBlock { 330 break 331 } 332 333 // Wait for a notification that we should retry. 334 if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 335 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 336 err = syserror.ErrWouldBlock 337 } 338 break 339 } 340 } 341 342 f.EventUnregister(&w) 343 344 if total > 0 { 345 // Queue notification if we read anything. 346 f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) 347 } 348 349 return total, err 350 } 351 352 func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 353 n, err := f.Preadv(t, dst, offset) 354 if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { 355 if n > 0 { 356 // Queue notification if we read anything. 357 f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) 358 } 359 return n, err 360 } 361 362 // Register for notifications. 363 w, ch := waiter.NewChannelEntry(nil) 364 f.EventRegister(&w, EventMaskRead) 365 366 total := n 367 for { 368 // Shorten dst to reflect bytes previously read. 369 dst = dst.DropFirst64(n) 370 371 // Issue the request and break out if it completes with anything 372 // other than "would block". 373 n, err = f.Preadv(t, dst, offset+total) 374 total += n 375 if err != syserror.ErrWouldBlock { 376 break 377 } 378 379 // Wait for a notification that we should retry. 380 if err = t.Block(ch); err != nil { 381 break 382 } 383 } 384 385 f.EventUnregister(&w) 386 387 if total > 0 { 388 // Queue notification if we read anything. 389 f.Dirent.InotifyEvent(linux.IN_ACCESS, 0) 390 } 391 392 return total, err 393 } 394 395 // LINT.ThenChange(vfs2/read_write.go)