github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_write.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "time" 19 20 "github.com/SagerNet/gvisor/pkg/abi/linux" 21 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 22 "github.com/SagerNet/gvisor/pkg/sentry/arch" 23 "github.com/SagerNet/gvisor/pkg/sentry/fs" 24 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 25 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 26 "github.com/SagerNet/gvisor/pkg/sentry/socket" 27 "github.com/SagerNet/gvisor/pkg/syserror" 28 "github.com/SagerNet/gvisor/pkg/usermem" 29 "github.com/SagerNet/gvisor/pkg/waiter" 30 ) 31 32 // LINT.IfChange 33 34 const ( 35 // EventMaskWrite contains events that can be triggered on writes. 36 // 37 // Note that EventHUp is not going to happen for pipes but may for 38 // implementations of poll on some sockets, see net/core/datagram.c. 39 EventMaskWrite = waiter.EventOut | waiter.EventHUp | waiter.EventErr 40 ) 41 42 // Write implements linux syscall write(2). 43 func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 44 fd := args[0].Int() 45 addr := args[1].Pointer() 46 size := args[2].SizeT() 47 48 file := t.GetFile(fd) 49 if file == nil { 50 return 0, nil, linuxerr.EBADF 51 } 52 defer file.DecRef(t) 53 54 // Check that the file is writable. 55 if !file.Flags().Write { 56 return 0, nil, linuxerr.EBADF 57 } 58 59 // Check that the size is legitimate. 60 si := int(size) 61 if si < 0 { 62 return 0, nil, linuxerr.EINVAL 63 } 64 65 // Get the source of the write. 66 src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ 67 AddressSpaceActive: true, 68 }) 69 if err != nil { 70 return 0, nil, err 71 } 72 73 n, err := writev(t, file, src) 74 t.IOUsage().AccountWriteSyscall(n) 75 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "write", file) 76 } 77 78 // Pwrite64 implements linux syscall pwrite64(2). 79 func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 80 fd := args[0].Int() 81 addr := args[1].Pointer() 82 size := args[2].SizeT() 83 offset := args[3].Int64() 84 85 file := t.GetFile(fd) 86 if file == nil { 87 return 0, nil, linuxerr.EBADF 88 } 89 defer file.DecRef(t) 90 91 // Check that the offset is legitimate and does not overflow. 92 if offset < 0 || offset+int64(size) < 0 { 93 return 0, nil, linuxerr.EINVAL 94 } 95 96 // Is writing at an offset supported? 97 if !file.Flags().Pwrite { 98 return 0, nil, linuxerr.ESPIPE 99 } 100 101 // Check that the file is writable. 102 if !file.Flags().Write { 103 return 0, nil, linuxerr.EBADF 104 } 105 106 // Check that the size is legitimate. 107 si := int(size) 108 if si < 0 { 109 return 0, nil, linuxerr.EINVAL 110 } 111 112 // Get the source of the write. 113 src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{ 114 AddressSpaceActive: true, 115 }) 116 if err != nil { 117 return 0, nil, err 118 } 119 120 n, err := pwritev(t, file, src, offset) 121 t.IOUsage().AccountWriteSyscall(n) 122 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file) 123 } 124 125 // Writev implements linux syscall writev(2). 126 func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 127 fd := args[0].Int() 128 addr := args[1].Pointer() 129 iovcnt := int(args[2].Int()) 130 131 file := t.GetFile(fd) 132 if file == nil { 133 return 0, nil, linuxerr.EBADF 134 } 135 defer file.DecRef(t) 136 137 // Check that the file is writable. 138 if !file.Flags().Write { 139 return 0, nil, linuxerr.EBADF 140 } 141 142 // Read the iovecs that specify the source of the write. 143 src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 144 AddressSpaceActive: true, 145 }) 146 if err != nil { 147 return 0, nil, err 148 } 149 150 n, err := writev(t, file, src) 151 t.IOUsage().AccountWriteSyscall(n) 152 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "writev", file) 153 } 154 155 // Pwritev implements linux syscall pwritev(2). 156 func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 157 fd := args[0].Int() 158 addr := args[1].Pointer() 159 iovcnt := int(args[2].Int()) 160 offset := args[3].Int64() 161 162 file := t.GetFile(fd) 163 if file == nil { 164 return 0, nil, linuxerr.EBADF 165 } 166 defer file.DecRef(t) 167 168 // Check that the offset is legitimate. 169 if offset < 0 { 170 return 0, nil, linuxerr.EINVAL 171 } 172 173 // Is writing at an offset supported? 174 if !file.Flags().Pwrite { 175 return 0, nil, linuxerr.ESPIPE 176 } 177 178 // Check that the file is writable. 179 if !file.Flags().Write { 180 return 0, nil, linuxerr.EBADF 181 } 182 183 // Read the iovecs that specify the source of the write. 184 src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 185 AddressSpaceActive: true, 186 }) 187 if err != nil { 188 return 0, nil, err 189 } 190 191 n, err := pwritev(t, file, src, offset) 192 t.IOUsage().AccountWriteSyscall(n) 193 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file) 194 } 195 196 // Pwritev2 implements linux syscall pwritev2(2). 197 func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 198 // While the syscall is 199 // pwritev2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags) 200 // the linux internal call 201 // (https://elixir.bootlin.com/linux/v4.18/source/fs/read_write.c#L1354) 202 // splits the offset argument into a high/low value for compatibility with 203 // 32-bit architectures. The flags argument is the 5th argument. 204 205 fd := args[0].Int() 206 addr := args[1].Pointer() 207 iovcnt := int(args[2].Int()) 208 offset := args[3].Int64() 209 flags := int(args[5].Int()) 210 211 if int(args[4].Int())&0x4 == 1 { 212 return 0, nil, linuxerr.EACCES 213 } 214 215 file := t.GetFile(fd) 216 if file == nil { 217 return 0, nil, linuxerr.EBADF 218 } 219 defer file.DecRef(t) 220 221 // Check that the offset is legitimate. 222 if offset < -1 { 223 return 0, nil, linuxerr.EINVAL 224 } 225 226 // Is writing at an offset supported? 227 if offset > -1 && !file.Flags().Pwrite { 228 return 0, nil, linuxerr.ESPIPE 229 } 230 231 // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is 232 // accepted as a valid flag argument for pwritev2. 233 if flags&^linux.RWF_VALID != 0 { 234 return uintptr(flags), nil, syserror.EOPNOTSUPP 235 } 236 237 // Check that the file is writeable. 238 if !file.Flags().Write { 239 return 0, nil, linuxerr.EBADF 240 } 241 242 // Read the iovecs that specify the source of the write. 243 src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{ 244 AddressSpaceActive: true, 245 }) 246 if err != nil { 247 return 0, nil, err 248 } 249 250 // If pwritev2 is called with an offset of -1, writev is called. 251 if offset == -1 { 252 n, err := writev(t, file, src) 253 t.IOUsage().AccountWriteSyscall(n) 254 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file) 255 } 256 257 n, err := pwritev(t, file, src, offset) 258 t.IOUsage().AccountWriteSyscall(n) 259 return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file) 260 } 261 262 func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) { 263 n, err := f.Writev(t, src) 264 if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { 265 if n > 0 { 266 // Queue notification if we wrote anything. 267 f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 268 } 269 return n, err 270 } 271 272 // Sockets support write timeouts. 273 var haveDeadline bool 274 var deadline ktime.Time 275 if s, ok := f.FileOperations.(socket.Socket); ok { 276 dl := s.SendTimeout() 277 if dl < 0 && err == syserror.ErrWouldBlock { 278 return n, err 279 } 280 if dl > 0 { 281 deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond) 282 haveDeadline = true 283 } 284 } 285 286 // Register for notifications. 287 w, ch := waiter.NewChannelEntry(nil) 288 f.EventRegister(&w, EventMaskWrite) 289 290 total := n 291 for { 292 // Shorten src to reflect bytes previously written. 293 src = src.DropFirst64(n) 294 295 // Issue the request and break out if it completes with 296 // anything other than "would block". 297 n, err = f.Writev(t, src) 298 total += n 299 if err != syserror.ErrWouldBlock { 300 break 301 } 302 303 // Wait for a notification that we should retry. 304 if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil { 305 if linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 306 err = syserror.ErrWouldBlock 307 } 308 break 309 } 310 } 311 312 f.EventUnregister(&w) 313 314 if total > 0 { 315 // Queue notification if we wrote anything. 316 f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 317 } 318 319 return total, err 320 } 321 322 func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (int64, error) { 323 n, err := f.Pwritev(t, src, offset) 324 if err != syserror.ErrWouldBlock || f.Flags().NonBlocking { 325 if n > 0 { 326 // Queue notification if we wrote anything. 327 f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 328 } 329 return n, err 330 } 331 332 // Register for notifications. 333 w, ch := waiter.NewChannelEntry(nil) 334 f.EventRegister(&w, EventMaskWrite) 335 336 total := n 337 for { 338 // Shorten src to reflect bytes previously written. 339 src = src.DropFirst64(n) 340 341 // Issue the request and break out if it completes with 342 // anything other than "would block". 343 n, err = f.Pwritev(t, src, offset+total) 344 total += n 345 if err != syserror.ErrWouldBlock { 346 break 347 } 348 349 // Wait for a notification that we should retry. 350 if err = t.Block(ch); err != nil { 351 break 352 } 353 } 354 355 f.EventUnregister(&w) 356 357 if total > 0 { 358 // Queue notification if we wrote anything. 359 f.Dirent.InotifyEvent(linux.IN_MODIFY, 0) 360 } 361 362 return total, err 363 } 364 365 // LINT.ThenChange(vfs2/read_write.go)