github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_aio.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "github.com/SagerNet/gvisor/pkg/abi/linux" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/hostarch" 22 "github.com/SagerNet/gvisor/pkg/marshal/primitive" 23 "github.com/SagerNet/gvisor/pkg/sentry/arch" 24 "github.com/SagerNet/gvisor/pkg/sentry/fs" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/eventfd" 27 ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time" 28 "github.com/SagerNet/gvisor/pkg/sentry/mm" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 "github.com/SagerNet/gvisor/pkg/usermem" 31 ) 32 33 // IoSetup implements linux syscall io_setup(2). 34 func IoSetup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 35 nrEvents := args[0].Int() 36 idAddr := args[1].Pointer() 37 38 // Linux uses the native long as the aio ID. 39 // 40 // The context pointer _must_ be zero initially. 41 var idIn uint64 42 if _, err := primitive.CopyUint64In(t, idAddr, &idIn); err != nil { 43 return 0, nil, err 44 } 45 if idIn != 0 { 46 return 0, nil, linuxerr.EINVAL 47 } 48 49 id, err := t.MemoryManager().NewAIOContext(t, uint32(nrEvents)) 50 if err != nil { 51 return 0, nil, err 52 } 53 54 // Copy out the new ID. 55 if _, err := primitive.CopyUint64Out(t, idAddr, id); err != nil { 56 t.MemoryManager().DestroyAIOContext(t, id) 57 return 0, nil, err 58 } 59 60 return 0, nil, nil 61 } 62 63 // IoDestroy implements linux syscall io_destroy(2). 64 func IoDestroy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 65 id := args[0].Uint64() 66 67 ctx := t.MemoryManager().DestroyAIOContext(t, id) 68 if ctx == nil { 69 // Does not exist. 70 return 0, nil, linuxerr.EINVAL 71 } 72 73 // Drain completed requests amd wait for pending requests until there are no 74 // more. 75 for { 76 ctx.Drain() 77 78 ch := ctx.WaitChannel() 79 if ch == nil { 80 // No more requests, we're done. 81 return 0, nil, nil 82 } 83 // The task cannot be interrupted during the wait. Equivalent to 84 // TASK_UNINTERRUPTIBLE in Linux. 85 t.UninterruptibleSleepStart(true /* deactivate */) 86 <-ch 87 t.UninterruptibleSleepFinish(true /* activate */) 88 } 89 } 90 91 // IoGetevents implements linux syscall io_getevents(2). 92 func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 93 id := args[0].Uint64() 94 minEvents := args[1].Int() 95 events := args[2].Int() 96 eventsAddr := args[3].Pointer() 97 timespecAddr := args[4].Pointer() 98 99 // Sanity check arguments. 100 if minEvents < 0 || minEvents > events { 101 return 0, nil, linuxerr.EINVAL 102 } 103 104 ctx, ok := t.MemoryManager().LookupAIOContext(t, id) 105 if !ok { 106 return 0, nil, linuxerr.EINVAL 107 } 108 109 // Setup the timeout. 110 var haveDeadline bool 111 var deadline ktime.Time 112 if timespecAddr != 0 { 113 d, err := copyTimespecIn(t, timespecAddr) 114 if err != nil { 115 return 0, nil, err 116 } 117 if !d.Valid() { 118 return 0, nil, linuxerr.EINVAL 119 } 120 deadline = t.Kernel().MonotonicClock().Now().Add(d.ToDuration()) 121 haveDeadline = true 122 } 123 124 // Loop over all requests. 125 for count := int32(0); count < events; count++ { 126 // Get a request, per semantics. 127 var v interface{} 128 if count >= minEvents { 129 var ok bool 130 v, ok = ctx.PopRequest() 131 if !ok { 132 return uintptr(count), nil, nil 133 } 134 } else { 135 var err error 136 v, err = waitForRequest(ctx, t, haveDeadline, deadline) 137 if err != nil { 138 if count > 0 || linuxerr.Equals(linuxerr.ETIMEDOUT, err) { 139 return uintptr(count), nil, nil 140 } 141 return 0, nil, syserror.ConvertIntr(err, syserror.EINTR) 142 } 143 } 144 145 ev := v.(*linux.IOEvent) 146 147 // Copy out the result. 148 if _, err := ev.CopyOut(t, eventsAddr); err != nil { 149 if count > 0 { 150 return uintptr(count), nil, nil 151 } 152 // Nothing done. 153 return 0, nil, err 154 } 155 156 // Keep rolling. 157 eventsAddr += hostarch.Addr(linux.IOEventSize) 158 } 159 160 // Everything finished. 161 return uintptr(events), nil, nil 162 } 163 164 func waitForRequest(ctx *mm.AIOContext, t *kernel.Task, haveDeadline bool, deadline ktime.Time) (interface{}, error) { 165 for { 166 if v, ok := ctx.PopRequest(); ok { 167 // Request was readily available. Just return it. 168 return v, nil 169 } 170 171 // Need to wait for request completion. 172 done := ctx.WaitChannel() 173 if done == nil { 174 // Context has been destroyed. 175 return nil, linuxerr.EINVAL 176 } 177 if err := t.BlockWithDeadline(done, haveDeadline, deadline); err != nil { 178 return nil, err 179 } 180 } 181 } 182 183 // memoryFor returns appropriate memory for the given callback. 184 func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) { 185 bytes := int(cb.Bytes) 186 if bytes < 0 { 187 // Linux also requires that this field fit in ssize_t. 188 return usermem.IOSequence{}, linuxerr.EINVAL 189 } 190 191 // Since this I/O will be asynchronous with respect to t's task goroutine, 192 // we have no guarantee that t's AddressSpace will be active during the 193 // I/O. 194 switch cb.OpCode { 195 case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE: 196 return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ 197 AddressSpaceActive: false, 198 }) 199 200 case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV: 201 return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{ 202 AddressSpaceActive: false, 203 }) 204 205 case linux.IOCB_CMD_FSYNC, linux.IOCB_CMD_FDSYNC, linux.IOCB_CMD_NOOP: 206 return usermem.IOSequence{}, nil 207 208 default: 209 // Not a supported command. 210 return usermem.IOSequence{}, linuxerr.EINVAL 211 } 212 } 213 214 // IoCancel implements linux syscall io_cancel(2). 215 // 216 // It is not presently supported (ENOSYS indicates no support on this 217 // architecture). 218 func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 219 return 0, nil, syserror.ENOSYS 220 } 221 222 // LINT.IfChange 223 224 func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback { 225 return func(ctx context.Context) { 226 if actx.Dead() { 227 actx.CancelPendingRequest() 228 return 229 } 230 ev := &linux.IOEvent{ 231 Data: cb.Data, 232 Obj: uint64(cbAddr), 233 } 234 235 var err error 236 switch cb.OpCode { 237 case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV: 238 ev.Result, err = file.Preadv(ctx, ioseq, cb.Offset) 239 case linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV: 240 ev.Result, err = file.Pwritev(ctx, ioseq, cb.Offset) 241 case linux.IOCB_CMD_FSYNC: 242 err = file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll) 243 case linux.IOCB_CMD_FDSYNC: 244 err = file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncData) 245 } 246 247 // Update the result. 248 if err != nil { 249 err = handleIOError(t, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", file) 250 ev.Result = -int64(kernel.ExtractErrno(err, 0)) 251 } 252 253 file.DecRef(ctx) 254 255 // Queue the result for delivery. 256 actx.FinishRequest(ev) 257 258 // Notify the event file if one was specified. This needs to happen 259 // *after* queueing the result to avoid racing with the thread we may 260 // wake up. 261 if eventFile != nil { 262 eventFile.FileOperations.(*eventfd.EventOperations).Signal(1) 263 eventFile.DecRef(ctx) 264 } 265 } 266 } 267 268 // submitCallback processes a single callback. 269 func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error { 270 file := t.GetFile(cb.FD) 271 if file == nil { 272 // File not found. 273 return linuxerr.EBADF 274 } 275 defer file.DecRef(t) 276 277 // Was there an eventFD? Extract it. 278 var eventFile *fs.File 279 if cb.Flags&linux.IOCB_FLAG_RESFD != 0 { 280 eventFile = t.GetFile(cb.ResFD) 281 if eventFile == nil { 282 // Bad FD. 283 return linuxerr.EBADF 284 } 285 defer eventFile.DecRef(t) 286 287 // Check that it is an eventfd. 288 if _, ok := eventFile.FileOperations.(*eventfd.EventOperations); !ok { 289 // Not an event FD. 290 return linuxerr.EINVAL 291 } 292 } 293 294 ioseq, err := memoryFor(t, cb) 295 if err != nil { 296 return err 297 } 298 299 // Check offset for reads/writes. 300 switch cb.OpCode { 301 case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV: 302 if cb.Offset < 0 { 303 return linuxerr.EINVAL 304 } 305 } 306 307 // Prepare the request. 308 ctx, ok := t.MemoryManager().LookupAIOContext(t, id) 309 if !ok { 310 return linuxerr.EINVAL 311 } 312 if err := ctx.Prepare(); err != nil { 313 return err 314 } 315 316 if eventFile != nil { 317 // The request is set. Make sure there's a ref on the file. 318 // 319 // This is necessary when the callback executes on completion, 320 // which is also what will release this reference. 321 eventFile.IncRef() 322 } 323 324 // Perform the request asynchronously. 325 file.IncRef() 326 t.QueueAIO(getAIOCallback(t, file, cbAddr, cb, ioseq, ctx, eventFile)) 327 328 // All set. 329 return nil 330 } 331 332 // IoSubmit implements linux syscall io_submit(2). 333 func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 334 id := args[0].Uint64() 335 nrEvents := args[1].Int() 336 addr := args[2].Pointer() 337 338 if nrEvents < 0 { 339 return 0, nil, linuxerr.EINVAL 340 } 341 342 for i := int32(0); i < nrEvents; i++ { 343 // Copy in the callback address. 344 var cbAddr hostarch.Addr 345 switch t.Arch().Width() { 346 case 8: 347 var cbAddrP primitive.Uint64 348 if _, err := cbAddrP.CopyIn(t, addr); err != nil { 349 if i > 0 { 350 // Some successful. 351 return uintptr(i), nil, nil 352 } 353 // Nothing done. 354 return 0, nil, err 355 } 356 cbAddr = hostarch.Addr(cbAddrP) 357 default: 358 return 0, nil, syserror.ENOSYS 359 } 360 361 // Copy in this callback. 362 var cb linux.IOCallback 363 if _, err := cb.CopyIn(t, cbAddr); err != nil { 364 365 if i > 0 { 366 // Some have been successful. 367 return uintptr(i), nil, nil 368 } 369 // Nothing done. 370 return 0, nil, err 371 } 372 373 // Process this callback. 374 if err := submitCallback(t, id, &cb, cbAddr); err != nil { 375 if i > 0 { 376 // Partial success. 377 return uintptr(i), nil, nil 378 } 379 // Nothing done. 380 return 0, nil, err 381 } 382 383 // Advance to the next one. 384 addr += hostarch.Addr(t.Arch().Width()) 385 } 386 387 return uintptr(nrEvents), nil, nil 388 } 389 390 // LINT.ThenChange(vfs2/aio.go)