github.com/ii64/gouring@v0.4.1/queue.go (about)

     1  package gouring
     2  
     3  import (
     4  	"runtime"
     5  	"sync/atomic"
     6  	"syscall"
     7  	"unsafe"
     8  )
     9  
    10  const LIBURING_UDATA_TIMEOUT uint64 = ^uint64(0)
    11  
    12  /*
    13   * Returns true if we're not using SQ thread (thus nobody submits but us)
    14   * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
    15   * awakened. For the latter case, we set the thread wakeup flag.
    16   */
    17  func (ring *IoUring) sq_ring_needs_enter(flags *uint32) bool {
    18  	if ring.Flags&IORING_SETUP_SQPOLL == 0 {
    19  		return true
    20  	}
    21  
    22  	// FIXME: io_uring_smp_mb
    23  
    24  	if atomic.LoadUint32(ring.Sq._Flags())&IORING_SQ_NEED_WAKEUP != 0 {
    25  		*flags |= IORING_ENTER_SQ_WAKEUP
    26  		return true
    27  	}
    28  	return false
    29  }
    30  
    31  func (ring *IoUring) cq_ring_needs_flush() bool {
    32  	return atomic.LoadUint32(ring.Sq._Flags())&(IORING_SQ_CQ_OVERFLOW|IORING_SQ_TASKRUN) != 0
    33  }
    34  
    35  func (ring *IoUring) cq_ring_needs_enter() bool {
    36  	return (ring.Flags&IORING_SETUP_IOPOLL != 0) || ring.cq_ring_needs_flush()
    37  }
    38  
    39  type get_data struct {
    40  	submit   uint32
    41  	waitNr   uint32
    42  	getFlags uint32
    43  	sz       int32
    44  	arg      unsafe.Pointer
    45  }
    46  
    47  func (ring *IoUring) _io_uring_get_cqe(cqePtr **IoUringCqe, data *get_data) (err error) {
    48  	var cqe *IoUringCqe
    49  	var looped = false
    50  	var ret int
    51  	for {
    52  		var needEnter = false
    53  		var flags uint32 = 0
    54  		var nrAvail uint32 = 0
    55  		err = ring.__io_uring_peek_cqe(&cqe, &nrAvail)
    56  		if err != nil {
    57  			break
    58  		}
    59  		if cqe != nil && data.waitNr == 0 && data.submit == 0 {
    60  			if looped || !ring.cq_ring_needs_enter() {
    61  				err = syscall.EAGAIN
    62  				break
    63  			}
    64  			needEnter = true
    65  		}
    66  		if data.waitNr > nrAvail || needEnter {
    67  			flags = IORING_ENTER_GETEVENTS | data.getFlags
    68  			needEnter = true
    69  		}
    70  		if data.submit > 0 && ring.sq_ring_needs_enter(&flags) {
    71  			needEnter = true
    72  		}
    73  		if !needEnter {
    74  			break
    75  		}
    76  
    77  		if ring.IntFlags&INT_FLAG_REG_RING != 0 {
    78  			flags |= IORING_ENTER_REGISTERED_RING
    79  		}
    80  		ret, err = io_uring_enter2(ring.EnterRingFd, data.submit, data.waitNr, flags, (*Sigset_t)(data.arg), data.sz)
    81  		if err != nil {
    82  			break
    83  		}
    84  		data.submit -= uint32(ret)
    85  		if cqe != nil {
    86  			break
    87  		}
    88  		looped = true
    89  	}
    90  
    91  	*cqePtr = cqe
    92  	return
    93  }
    94  
    95  func (ring *IoUring) __io_uring_get_cqe(cqePtr **IoUringCqe, submit uint32, waitNr uint32, sigmask *Sigset_t) error {
    96  	data := &get_data{
    97  		submit:   submit,
    98  		waitNr:   waitNr,
    99  		getFlags: 0,
   100  		sz:       NSIG / 8,
   101  		arg:      unsafe.Pointer(sigmask),
   102  	}
   103  	return ring._io_uring_get_cqe(cqePtr, data)
   104  }
   105  
   106  /*
   107   * Fill in an array of IO completions up to count, if any are available.
   108   * Returns the amount of IO completions filled.
   109   */
   110  func (ring *IoUring) io_uring_peek_batch_cqe(cqes []*IoUringCqe, count uint32) uint32 {
   111  	var ready uint32
   112  	var overflowChecked = false
   113  	var shift = 0
   114  	if ring.Flags&IORING_SETUP_CQE32 != 0 {
   115  		shift = 1
   116  	}
   117  
   118  again:
   119  	ready = ring.io_uring_cq_ready()
   120  	if ready > 0 {
   121  		var head = *ring.Cq._Head()
   122  		var mask = *ring.Cq._RingMask()
   123  		var last uint32
   124  		if count > ready {
   125  			count = ready
   126  		}
   127  		last = head + count
   128  		var i uintptr = 0
   129  		for head != last {
   130  			cqes[i] = ioUringCqeArray_Index(ring.Cq.Cqes, uintptr((head&mask)<<uint32(shift)))
   131  			i++
   132  			head++
   133  		}
   134  		return count
   135  	}
   136  
   137  	if overflowChecked {
   138  		goto done
   139  	}
   140  
   141  	if ring.cq_ring_needs_flush() {
   142  		var flags uint32 = IORING_ENTER_GETEVENTS
   143  		if ring.IntFlags&INT_FLAG_REG_RING != 0 {
   144  			flags |= IORING_ENTER_REGISTERED_RING
   145  		}
   146  		io_uring_enter(ring.EnterRingFd, 0, 0, flags, nil)
   147  		overflowChecked = true
   148  		goto again
   149  	}
   150  
   151  done:
   152  	return 0
   153  }
   154  
   155  /*
   156   * Sync internal state with kernel ring state on the SQ side. Returns the
   157   * number of pending items in the SQ ring, for the shared ring.
   158   */
   159  func (ring *IoUring) __io_uring_flush_sq() uint32 {
   160  	sq := &ring.Sq
   161  	var mask = *sq._RingMask()
   162  	var ktail = *sq._Tail()
   163  	var toSubmit = sq.SqeTail - sq.SqeHead
   164  
   165  	if toSubmit < 1 {
   166  		goto out
   167  	}
   168  
   169  	/*
   170  	 * Fill in sqes that we have queued up, adding them to the kernel ring
   171  	 */
   172  	for ; toSubmit > 0; toSubmit-- {
   173  		*uint32Array_Index(sq.Array, uintptr(ktail&mask)) = sq.SqeHead & mask
   174  		ktail++
   175  		sq.SqeHead++
   176  	}
   177  
   178  	/*
   179  	 * Ensure that the kernel sees the SQE updates before it sees the tail
   180  	 * update.
   181  	 */
   182  	atomic.StoreUint32(sq._Tail(), ktail)
   183  
   184  out:
   185  	/*
   186  	 * This _may_ look problematic, as we're not supposed to be reading
   187  	 * SQ->head without acquire semantics. When we're in SQPOLL mode, the
   188  	 * kernel submitter could be updating this right now. For non-SQPOLL,
   189  	 * task itself does it, and there's no potential race. But even for
   190  	 * SQPOLL, the load is going to be potentially out-of-date the very
   191  	 * instant it's done, regardless or whether or not it's done
   192  	 * atomically. Worst case, we're going to be over-estimating what
   193  	 * we can submit. The point is, we need to be able to deal with this
   194  	 * situation regardless of any perceived atomicity.
   195  	 */
   196  	return ktail - *sq._Head()
   197  }
   198  
   199  /*
   200   * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that
   201   * more efficiently than queueing an internal timeout command.
   202   */
   203  func (ring *IoUring) io_uring_wait_cqes_new(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) error {
   204  	arg := &IoUringGeteventsArg{
   205  		Sigmask:   uint64(uintptr(unsafe.Pointer(sigmask))),
   206  		SigmaskSz: NSIG / 8,
   207  		Ts:        uint64(uintptr(unsafe.Pointer(ts))),
   208  	}
   209  	data := &get_data{
   210  		waitNr:   waitNtr,
   211  		getFlags: IORING_ENTER_EXT_ARG,
   212  		sz:       int32(unsafe.Sizeof(arg)),
   213  	}
   214  	return ring._io_uring_get_cqe(cqePtr, data)
   215  }
   216  
   217  /*
   218   * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
   219   * that an sqe is used internally to handle the timeout. For kernel doesn't
   220   * support IORING_FEAT_EXT_ARG, applications using this function must never
   221   * set sqe->user_data to LIBURING_UDATA_TIMEOUT!
   222   *
   223   * For kernels without IORING_FEAT_EXT_ARG (5.10 and older), if 'ts' is
   224   * specified, the application need not call io_uring_submit() before
   225   * calling this function, as we will do that on its behalf. From this it also
   226   * follows that this function isn't safe to use for applications that split SQ
   227   * and CQ handling between two threads and expect that to work without
   228   * synchronization, as this function manipulates both the SQ and CQ side.
   229   *
   230   * For kernels with IORING_FEAT_EXT_ARG, no implicit submission is done and
   231   * hence this function is safe to use for applications that split SQ and CQ
   232   * handling between two threads.
   233   */
   234  func (ring *IoUring) __io_uring_submit_timeout(waitNr uint32, ts *syscall.Timespec) (ret int, err error) {
   235  	sqe := ring.io_uring_get_sqe()
   236  	if sqe == nil {
   237  		ret, err = ring.io_uringn_submit()
   238  		if err != nil {
   239  			return
   240  		}
   241  		sqe = ring.io_uring_get_sqe()
   242  		if sqe == nil {
   243  			err = syscall.EAGAIN
   244  			return
   245  		}
   246  	}
   247  
   248  	PrepTimeout(sqe, ts, waitNr, 0)
   249  	sqe.UserData.SetUint64(LIBURING_UDATA_TIMEOUT)
   250  	ret = int(ring.__io_uring_flush_sq())
   251  	return
   252  }
   253  
   254  func (ring *IoUring) io_uring_wait_cqes(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) (err error) {
   255  	var toSubmit = 0
   256  	if ts != nil {
   257  		if ring.Features&IORING_FEAT_EXT_ARG != 0 {
   258  			err = ring.io_uring_wait_cqes_new(cqePtr, waitNtr, ts, sigmask)
   259  			return
   260  		}
   261  		toSubmit, err = ring.__io_uring_submit_timeout(waitNtr, ts)
   262  		if err != nil {
   263  			return
   264  		}
   265  	}
   266  	err = ring.__io_uring_get_cqe(cqePtr, uint32(toSubmit), waitNtr, sigmask)
   267  	return
   268  }
   269  
   270  func (ring *IoUring) io_uring_submit_and_wait_timeout(cqePtr **IoUringCqe, waitNtr uint32, ts *syscall.Timespec, sigmask *Sigset_t) (err error) {
   271  	var toSubmit int
   272  	if ts != nil {
   273  		if ring.Features&IORING_FEAT_EXT_ARG != 0 {
   274  			arg := IoUringGeteventsArg{
   275  				Sigmask:   uint64(uintptr(unsafe.Pointer(sigmask))),
   276  				SigmaskSz: NSIG / 8,
   277  				Ts:        uint64(uintptr(unsafe.Pointer(ts))),
   278  			}
   279  			data := &get_data{
   280  				submit:   ring.__io_uring_flush_sq(),
   281  				waitNr:   waitNtr,
   282  				getFlags: IORING_ENTER_EXT_ARG,
   283  				sz:       int32(unsafe.Sizeof(arg)),
   284  				arg:      unsafe.Pointer(&arg),
   285  			}
   286  			return ring._io_uring_get_cqe(cqePtr, data)
   287  		}
   288  		toSubmit, err = ring.__io_uring_submit_timeout(waitNtr, ts)
   289  		if err != nil {
   290  			return
   291  		}
   292  	} else {
   293  		toSubmit = int(ring.__io_uring_flush_sq())
   294  	}
   295  	err = ring.__io_uring_get_cqe(cqePtr, uint32(toSubmit), waitNtr, sigmask)
   296  	return
   297  }
   298  
   299  /*
   300   * See io_uring_wait_cqes() - this function is the same, it just always uses
   301   * '1' as the wait_nr.
   302   */
   303  func (ring *IoUring) io_uring_wait_cqe_timeout(cqePtr **IoUringCqe, ts *syscall.Timespec) error {
   304  	return ring.io_uring_wait_cqes(cqePtr, 1, ts, nil)
   305  }
   306  
   307  /*
   308   * Submit sqes acquired from io_uring_get_sqe() to the kernel.
   309   *
   310   * Returns number of sqes submitted
   311   */
   312  func (ring *IoUring) io_uringn_submit() (int, error) {
   313  	return ring.__io_uring_submit_and_wait(0)
   314  }
   315  
   316  /*
   317   * Like io_uring_submit(), but allows waiting for events as well.
   318   *
   319   * Returns number of sqes submitted
   320   */
   321  func (ring *IoUring) io_uring_submit_and_wait(waitNtr uint32) (int, error) {
   322  	return ring.__io_uring_submit_and_wait(waitNtr)
   323  }
   324  
   325  func (ring *IoUring) __io_uring_submit_and_wait(waitNr uint32) (int, error) {
   326  	return ring.__io_uring_submit(ring.__io_uring_flush_sq(), waitNr)
   327  }
   328  
   329  func (ring *IoUring) __io_uring_submit(submitted uint32, waitNr uint32) (ret int, err error) {
   330  	var flags uint32 = 0
   331  
   332  	if ring.sq_ring_needs_enter(&flags) || waitNr != 0 {
   333  		if waitNr != 0 || ring.Flags&IORING_SETUP_IOPOLL != 0 {
   334  			flags |= IORING_ENTER_GETEVENTS
   335  		}
   336  		if ring.IntFlags&INT_FLAG_REG_RING != 0 {
   337  			flags |= IORING_ENTER_REGISTERED_RING
   338  		}
   339  		ret, err = io_uring_enter(ring.EnterRingFd, submitted, waitNr, flags, nil)
   340  	} else {
   341  		ret = int(submitted)
   342  	}
   343  	return
   344  }
   345  
   346  func (ring *IoUring) io_uring_get_sqe() *IoUringSqe {
   347  	return ring._io_uring_get_sqe()
   348  }
   349  
   350  /*
   351   * Return an sqe to fill. Application must later call io_uring_submit()
   352   * when it's ready to tell the kernel about it. The caller may call this
   353   * function multiple times before calling io_uring_submit().
   354   *
   355   * Returns a vacant sqe, or NULL if we're full.
   356   */
   357  func (ring *IoUring) _io_uring_get_sqe() (sqe *IoUringSqe) {
   358  	sq := &ring.Sq
   359  	var head = atomic.LoadUint32(sq._Head())
   360  	var next = sq.SqeTail + 1
   361  	var shift uint32 = 0
   362  
   363  	if ring.Flags&IORING_SETUP_SQE128 != 0 {
   364  		shift = 1
   365  	}
   366  
   367  	if next-head <= *sq._RingEntries() {
   368  		sqe = ioUringSqeArray_Index(sq.Sqes, uintptr((sq.SqeTail&*sq._RingMask())<<shift))
   369  		sq.SqeTail = next
   370  		return
   371  	}
   372  
   373  	sqe = nil
   374  	return
   375  }
   376  
   377  func (ring *IoUring) io_uring_cq_ready() uint32 {
   378  	return atomic.LoadUint32(ring.Cq._Tail()) - *ring.Cq._Head()
   379  }
   380  
   381  func (ring *IoUring) __io_uring_peek_cqe(cqePtr **IoUringCqe, nrAvail *uint32) error {
   382  	var cqe *IoUringCqe
   383  	var err int32 = 0
   384  	var avail int
   385  
   386  	var mask = *ring.Cq._RingMask()
   387  	var shift uint32 = 0
   388  
   389  	if ring.Flags&IORING_SETUP_CQE32 != 0 {
   390  		shift = 1
   391  	}
   392  
   393  	for {
   394  		var tail = atomic.LoadUint32(ring.Cq._Tail())
   395  		var head = *ring.Cq._Head()
   396  
   397  		cqe = nil
   398  		avail = int(tail - head)
   399  		if avail < 1 {
   400  			break
   401  		}
   402  
   403  		cqe = ioUringCqeArray_Index(ring.Cq.Cqes, uintptr((head&mask)<<shift))
   404  		if ring.Features&IORING_FEAT_EXT_ARG == 0 &&
   405  			cqe.UserData.GetUint64() == LIBURING_UDATA_TIMEOUT {
   406  			if cqe.Res < 0 {
   407  				err = cqe.Res
   408  			}
   409  			ring.io_uring_cq_advance(1)
   410  			if err == 0 {
   411  				// yields G
   412  				runtime.Gosched()
   413  				continue
   414  			}
   415  			cqe = nil
   416  		}
   417  
   418  		break
   419  	}
   420  
   421  	*cqePtr = cqe
   422  	if nrAvail != nil {
   423  		*nrAvail = uint32(avail)
   424  	}
   425  	if err == 0 {
   426  		return nil
   427  	}
   428  	return syscall.Errno(-err)
   429  }
   430  
   431  func (ring *IoUring) io_uring_cq_advance(nr uint32) {
   432  	if nr > 0 {
   433  		atomic.StoreUint32(ring.Cq._Head(), *ring.Cq._Head()+nr)
   434  	}
   435  }
   436  
   437  /*
   438   * Return an IO completion, waiting for 'wait_nr' completions if one isn't
   439   * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
   440   * failure.
   441   */
   442  func (ring *IoUring) io_uring_wait_cqe_nr(cqePtr **IoUringCqe, waitNr uint32) error {
   443  	return ring.__io_uring_get_cqe(cqePtr, 0, waitNr, nil)
   444  }
   445  
   446  /*
   447   * Return an IO completion, if one is readily available. Returns 0 with
   448   * cqe_ptr filled in on success, -errno on failure.
   449   */
   450  func (ring *IoUring) io_uring_peek_cqe(cqePtr **IoUringCqe) error {
   451  	err := ring.__io_uring_peek_cqe(cqePtr, nil)
   452  	if err == nil && *cqePtr != nil {
   453  		return nil
   454  	}
   455  	return ring.io_uring_wait_cqe_nr(cqePtr, 0)
   456  }
   457  
   458  /*
   459   * Return an IO completion, waiting for it if necessary. Returns 0 with
   460   * cqe_ptr filled in on success, -errno on failure.
   461   */
   462  func (ring *IoUring) io_uring_wait_cqe(cqePtr **IoUringCqe) error {
   463  	err := ring.__io_uring_peek_cqe(cqePtr, nil)
   464  	if err == nil && *cqePtr != nil {
   465  		return nil
   466  	}
   467  	return ring.io_uring_wait_cqe_nr(cqePtr, 1)
   468  }
   469  
   470  /*
   471   * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
   472   * been processed by the application.
   473   */
   474  func (ring *IoUring) io_uring_cqe_seen(cqe *IoUringCqe) {
   475  	if cqe != nil {
   476  		ring.io_uring_cq_advance(1)
   477  	}
   478  }