github.com/ii64/gouring@v0.4.1/queue.go (about) 1 package gouring 2 3 import ( 4 "runtime" 5 "sync/atomic" 6 "syscall" 7 "unsafe" 8 ) 9 10 const LIBURING_UDATA_TIMEOUT uint64 = ^uint64(0) 11 12 /* 13 * Returns true if we're not using SQ thread (thus nobody submits but us) 14 * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly 15 * awakened. For the latter case, we set the thread wakeup flag. 16 */ 17 func (ring *IoUring) sq_ring_needs_enter(flags *uint32) bool { 18 if ring.Flags&IORING_SETUP_SQPOLL == 0 { 19 return true 20 } 21 22 // FIXME: io_uring_smp_mb 23 24 if atomic.LoadUint32(ring.Sq._Flags())&IORING_SQ_NEED_WAKEUP != 0 { 25 *flags |= IORING_ENTER_SQ_WAKEUP 26 return true 27 } 28 return false 29 } 30 31 func (ring *IoUring) cq_ring_needs_flush() bool { 32 return atomic.LoadUint32(ring.Sq._Flags())&(IORING_SQ_CQ_OVERFLOW|IORING_SQ_TASKRUN) != 0 33 } 34 35 func (ring *IoUring) cq_ring_needs_enter() bool { 36 return (ring.Flags&IORING_SETUP_IOPOLL != 0) || ring.cq_ring_needs_flush() 37 } 38 39 type get_data struct { 40 submit uint32 41 waitNr uint32 42 getFlags uint32 43 sz int32 44 arg unsafe.Pointer 45 } 46 47 func (ring *IoUring) _io_uring_get_cqe(cqePtr **IoUringCqe, data *get_data) (err error) { 48 var cqe *IoUringCqe 49 var looped = false 50 var ret int 51 for { 52 var needEnter = false 53 var flags uint32 = 0 54 var nrAvail uint32 = 0 55 err = ring.__io_uring_peek_cqe(&cqe, &nrAvail) 56 if err != nil { 57 break 58 } 59 if cqe != nil && data.waitNr == 0 && data.submit == 0 { 60 if looped || !ring.cq_ring_needs_enter() { 61 err = syscall.EAGAIN 62 break 63 } 64 needEnter = true 65 } 66 if data.waitNr > nrAvail || needEnter { 67 flags = IORING_ENTER_GETEVENTS | data.getFlags 68 needEnter = true 69 } 70 if data.submit > 0 && ring.sq_ring_needs_enter(&flags) { 71 needEnter = true 72 } 73 if !needEnter { 74 break 75 } 76 77 if ring.IntFlags&INT_FLAG_REG_RING != 0 { 78 flags |= IORING_ENTER_REGISTERED_RING 79 } 80 ret, err = io_uring_enter2(ring.EnterRingFd, data.submit, data.waitNr, flags, (*Sigset_t)(data.arg), data.sz) 81 if err != nil { 82 break 83 } 84 data.submit -= uint32(ret) 85 if cqe != nil { 86 break 87 } 88 looped = true 89 } 90 91 *cqePtr = cqe 92 return 93 } 94 95 func (ring *IoUring) __io_uring_get_cqe(cqePtr **IoUringCqe, submit uint32, waitNr uint32, sigmask *Sigset_t) error { 96 data := &get_data{ 97 submit: submit, 98 waitNr: waitNr, 99 getFlags: 0, 100 sz: NSIG / 8, 101 arg: unsafe.Pointer(sigmask), 102 } 103 return ring._io_uring_get_cqe(cqePtr, data) 104 } 105 106 /* 107 * Fill in an array of IO completions up to count, if any are available. 108 * Returns the amount of IO completions filled. 109 */ 110 func (ring *IoUring) io_uring_peek_batch_cqe(cqes []*IoUringCqe, count uint32) uint32 { 111 var ready uint32 112 var overflowChecked = false 113 var shift = 0 114 if ring.Flags&IORING_SETUP_CQE32 != 0 { 115 shift = 1 116 } 117 118 again: 119 ready = ring.io_uring_cq_ready() 120 if ready > 0 { 121 var head = *ring.Cq._Head() 122 var mask = *ring.Cq._RingMask() 123 var last uint32 124 if count > ready { 125 count = ready 126 } 127 last = head + count 128 var i uintptr = 0 129 for head != last { 130 cqes[i] = ioUringCqeArray_Index(ring.Cq.Cqes, uintptr((head&mask)<<uint32(shift))) 131 i++ 132 head++ 133 } 134 return count 135 } 136 137 if overflowChecked { 138 goto done 139 } 140 141 if ring.cq_ring_needs_flush() { 142 var flags uint32 = IORING_ENTER_GETEVENTS 143 if ring.IntFlags&INT_FLAG_REG_RING != 0 { 144 flags |= IORING_ENTER_REGISTERED_RING 145 } 146 io_uring_enter(ring.EnterRingFd, 0, 0, flags, nil) 147 overflowChecked = true 148 goto again 149 } 150 151 done: 152 return 0 153 } 154 155 /* 156 * Sync internal state with kernel ring state on the SQ side. Returns the 157 * number of pending items in the SQ ring, for the shared ring. 158 */ 159 func (ring *IoUring) __io_uring_flush_sq() uint32 { 160 sq := &ring.Sq 161 var mask = *sq._RingMask() 162 var ktail = *sq._Tail() 163 var toSubmit = sq.SqeTail - sq.SqeHead 164 165 if toSubmit < 1 { 166 goto out 167 } 168 169 /* 170 * Fill in sqes that we have queued up, adding them to the kernel ring 171 */ 172 for ; toSubmit > 0; toSubmit-- { 173 *uint32Array_Index(sq.Array, uintptr(ktail&mask)) = sq.SqeHead & mask 174 ktail++ 175 sq.SqeHead++ 176 } 177 178 /* 179 * Ensure that the kernel sees the SQE updates before it sees the tail 180 * update. 181 */ 182 atomic.StoreUint32(sq._Tail(), ktail) 183 184 out: 185 /* 186 * This _may_ look problematic, as we're not supposed to be reading 187 * SQ->head without acquire semantics. When we're in SQPOLL mode, the 188 * kernel submitter could be updating this right now. For non-SQPOLL, 189 * task itself does it, and there's no potential race. But even for 190 * SQPOLL, the load is going to be potentially out-of-date the very 191 * instant it's done, regardless or whether or not it's done 192 * atomically. Worst case, we're going to be over-estimating what 193 * we can submit. The point is, we need to be able to deal with this 194 * situation regardless of any perceived atomicity. 195 */ 196 return ktail - *sq._Head() 197 } 198 199 /* 200 * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that 201 * more efficiently than queueing an internal timeout command. 202 */ 203 func (ring *IoUring) io_uring_wait_cqes_new(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) error { 204 arg := &IoUringGeteventsArg{ 205 Sigmask: uint64(uintptr(unsafe.Pointer(sigmask))), 206 SigmaskSz: NSIG / 8, 207 Ts: uint64(uintptr(unsafe.Pointer(ts))), 208 } 209 data := &get_data{ 210 waitNr: waitNtr, 211 getFlags: IORING_ENTER_EXT_ARG, 212 sz: int32(unsafe.Sizeof(arg)), 213 } 214 return ring._io_uring_get_cqe(cqePtr, data) 215 } 216 217 /* 218 * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note 219 * that an sqe is used internally to handle the timeout. For kernel doesn't 220 * support IORING_FEAT_EXT_ARG, applications using this function must never 221 * set sqe->user_data to LIBURING_UDATA_TIMEOUT! 222 * 223 * For kernels without IORING_FEAT_EXT_ARG (5.10 and older), if 'ts' is 224 * specified, the application need not call io_uring_submit() before 225 * calling this function, as we will do that on its behalf. From this it also 226 * follows that this function isn't safe to use for applications that split SQ 227 * and CQ handling between two threads and expect that to work without 228 * synchronization, as this function manipulates both the SQ and CQ side. 229 * 230 * For kernels with IORING_FEAT_EXT_ARG, no implicit submission is done and 231 * hence this function is safe to use for applications that split SQ and CQ 232 * handling between two threads. 233 */ 234 func (ring *IoUring) __io_uring_submit_timeout(waitNr uint32, ts *syscall.Timespec) (ret int, err error) { 235 sqe := ring.io_uring_get_sqe() 236 if sqe == nil { 237 ret, err = ring.io_uringn_submit() 238 if err != nil { 239 return 240 } 241 sqe = ring.io_uring_get_sqe() 242 if sqe == nil { 243 err = syscall.EAGAIN 244 return 245 } 246 } 247 248 PrepTimeout(sqe, ts, waitNr, 0) 249 sqe.UserData.SetUint64(LIBURING_UDATA_TIMEOUT) 250 ret = int(ring.__io_uring_flush_sq()) 251 return 252 } 253 254 func (ring *IoUring) io_uring_wait_cqes(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) (err error) { 255 var toSubmit = 0 256 if ts != nil { 257 if ring.Features&IORING_FEAT_EXT_ARG != 0 { 258 err = ring.io_uring_wait_cqes_new(cqePtr, waitNtr, ts, sigmask) 259 return 260 } 261 toSubmit, err = ring.__io_uring_submit_timeout(waitNtr, ts) 262 if err != nil { 263 return 264 } 265 } 266 err = ring.__io_uring_get_cqe(cqePtr, uint32(toSubmit), waitNtr, sigmask) 267 return 268 } 269 270 func (ring *IoUring) io_uring_submit_and_wait_timeout(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) (err error) { 271 var toSubmit int 272 if ts != nil { 273 if ring.Features&IORING_FEAT_EXT_ARG != 0 { 274 arg := IoUringGeteventsArg{ 275 Sigmask: uint64(uintptr(unsafe.Pointer(sigmask))), 276 SigmaskSz: NSIG / 8, 277 Ts: uint64(uintptr(unsafe.Pointer(ts))), 278 } 279 data := &get_data{ 280 submit: ring.__io_uring_flush_sq(), 281 waitNr: waitNtr, 282 getFlags: IORING_ENTER_EXT_ARG, 283 sz: int32(unsafe.Sizeof(arg)), 284 arg: unsafe.Pointer(&arg), 285 } 286 return ring._io_uring_get_cqe(cqePtr, data) 287 } 288 toSubmit, err = ring.__io_uring_submit_timeout(waitNtr, ts) 289 if err != nil { 290 return 291 } 292 } else { 293 toSubmit = int(ring.__io_uring_flush_sq()) 294 } 295 err = ring.__io_uring_get_cqe(cqePtr, uint32(toSubmit), waitNtr, sigmask) 296 return 297 } 298 299 /* 300 * See io_uring_wait_cqes() - this function is the same, it just always uses 301 * '1' as the wait_nr. 302 */ 303 func (ring *IoUring) io_uring_wait_cqe_timeout(cqePtr **IoUringCqe, ts *syscall.Timespec) error { 304 return ring.io_uring_wait_cqes(cqePtr, 1, ts, nil) 305 } 306 307 /* 308 * Submit sqes acquired from io_uring_get_sqe() to the kernel. 309 * 310 * Returns number of sqes submitted 311 */ 312 func (ring *IoUring) io_uringn_submit() (int, error) { 313 return ring.__io_uring_submit_and_wait(0) 314 } 315 316 /* 317 * Like io_uring_submit(), but allows waiting for events as well. 318 * 319 * Returns number of sqes submitted 320 */ 321 func (ring *IoUring) io_uring_submit_and_wait(waitNtr uint32) (int, error) { 322 return ring.__io_uring_submit_and_wait(waitNtr) 323 } 324 325 func (ring *IoUring) __io_uring_submit_and_wait(waitNr uint32) (int, error) { 326 return ring.__io_uring_submit(ring.__io_uring_flush_sq(), waitNr) 327 } 328 329 func (ring *IoUring) __io_uring_submit(submitted uint32, waitNr uint32) (ret int, err error) { 330 var flags uint32 = 0 331 332 if ring.sq_ring_needs_enter(&flags) || waitNr != 0 { 333 if waitNr != 0 || ring.Flags&IORING_SETUP_IOPOLL != 0 { 334 flags |= IORING_ENTER_GETEVENTS 335 } 336 if ring.IntFlags&INT_FLAG_REG_RING != 0 { 337 flags |= IORING_ENTER_REGISTERED_RING 338 } 339 ret, err = io_uring_enter(ring.EnterRingFd, submitted, waitNr, flags, nil) 340 } else { 341 ret = int(submitted) 342 } 343 return 344 } 345 346 func (ring *IoUring) io_uring_get_sqe() *IoUringSqe { 347 return ring._io_uring_get_sqe() 348 } 349 350 /* 351 * Return an sqe to fill. Application must later call io_uring_submit() 352 * when it's ready to tell the kernel about it. The caller may call this 353 * function multiple times before calling io_uring_submit(). 354 * 355 * Returns a vacant sqe, or NULL if we're full. 356 */ 357 func (ring *IoUring) _io_uring_get_sqe() (sqe *IoUringSqe) { 358 sq := &ring.Sq 359 var head = atomic.LoadUint32(sq._Head()) 360 var next = sq.SqeTail + 1 361 var shift uint32 = 0 362 363 if ring.Flags&IORING_SETUP_SQE128 != 0 { 364 shift = 1 365 } 366 367 if next-head <= *sq._RingEntries() { 368 sqe = ioUringSqeArray_Index(sq.Sqes, uintptr((sq.SqeTail&*sq._RingMask())<<shift)) 369 sq.SqeTail = next 370 return 371 } 372 373 sqe = nil 374 return 375 } 376 377 func (ring *IoUring) io_uring_cq_ready() uint32 { 378 return atomic.LoadUint32(ring.Cq._Tail()) - *ring.Cq._Head() 379 } 380 381 func (ring *IoUring) __io_uring_peek_cqe(cqePtr **IoUringCqe, nrAvail *uint32) error { 382 var cqe *IoUringCqe 383 var err int32 = 0 384 var avail int 385 386 var mask = *ring.Cq._RingMask() 387 var shift uint32 = 0 388 389 if ring.Flags&IORING_SETUP_CQE32 != 0 { 390 shift = 1 391 } 392 393 for { 394 var tail = atomic.LoadUint32(ring.Cq._Tail()) 395 var head = *ring.Cq._Head() 396 397 cqe = nil 398 avail = int(tail - head) 399 if avail < 1 { 400 break 401 } 402 403 cqe = ioUringCqeArray_Index(ring.Cq.Cqes, uintptr((head&mask)<<shift)) 404 if ring.Features&IORING_FEAT_EXT_ARG == 0 && 405 cqe.UserData.GetUint64() == LIBURING_UDATA_TIMEOUT { 406 if cqe.Res < 0 { 407 err = cqe.Res 408 } 409 ring.io_uring_cq_advance(1) 410 if err == 0 { 411 // yields G 412 runtime.Gosched() 413 continue 414 } 415 cqe = nil 416 } 417 418 break 419 } 420 421 *cqePtr = cqe 422 if nrAvail != nil { 423 *nrAvail = uint32(avail) 424 } 425 if err == 0 { 426 return nil 427 } 428 return syscall.Errno(-err) 429 } 430 431 func (ring *IoUring) io_uring_cq_advance(nr uint32) { 432 if nr > 0 { 433 atomic.StoreUint32(ring.Cq._Head(), *ring.Cq._Head()+nr) 434 } 435 } 436 437 /* 438 * Return an IO completion, waiting for 'wait_nr' completions if one isn't 439 * readily available. Returns 0 with cqe_ptr filled in on success, -errno on 440 * failure. 441 */ 442 func (ring *IoUring) io_uring_wait_cqe_nr(cqePtr **IoUringCqe, waitNr uint32) error { 443 return ring.__io_uring_get_cqe(cqePtr, 0, waitNr, nil) 444 } 445 446 /* 447 * Return an IO completion, if one is readily available. Returns 0 with 448 * cqe_ptr filled in on success, -errno on failure. 449 */ 450 func (ring *IoUring) io_uring_peek_cqe(cqePtr **IoUringCqe) error { 451 err := ring.__io_uring_peek_cqe(cqePtr, nil) 452 if err == nil && *cqePtr != nil { 453 return nil 454 } 455 return ring.io_uring_wait_cqe_nr(cqePtr, 0) 456 } 457 458 /* 459 * Return an IO completion, waiting for it if necessary. Returns 0 with 460 * cqe_ptr filled in on success, -errno on failure. 461 */ 462 func (ring *IoUring) io_uring_wait_cqe(cqePtr **IoUringCqe) error { 463 err := ring.__io_uring_peek_cqe(cqePtr, nil) 464 if err == nil && *cqePtr != nil { 465 return nil 466 } 467 return ring.io_uring_wait_cqe_nr(cqePtr, 1) 468 } 469 470 /* 471 * Must be called after io_uring_{peek,wait}_cqe() after the cqe has 472 * been processed by the application. 473 */ 474 func (ring *IoUring) io_uring_cqe_seen(cqe *IoUringCqe) { 475 if cqe != nil { 476 ring.io_uring_cq_advance(1) 477 } 478 }