github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/sys/linux/io_uring.txt (about)

     1  # Copyright 2019 syzkaller project authors. All rights reserved.
     2  # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  # See http://kernel.dk/io_uring.pdf
     5  
     6  include <uapi/linux/io_uring.h>
     7  # For EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL
     8  include <uapi/linux/eventpoll.h>
     9  
    10  resource fd_io_uring[fd]
    11  resource ring_ptr[int64]
    12  resource sqes_ptr[int64]
    13  resource ioring_personality_id[int16]
    14  
    15  # fs/io_uring.c
    16  define IORING_MAX_ENTRIES	32768
    17  define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
    18  
    19  # First does the setup calling io_uring_setup, than calls mmap to map the ring and
    20  # the sqes. It is hard for the fuzzer to generate correct programs using mmap calls
    21  # with fuzzer-provided mmap length. This wrapper ensures correct length computation.
    22  syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring
    23  
    24  io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params]) fd_io_uring
    25  io_uring_enter(fd fd_io_uring, to_submit int32[0:IORING_MAX_ENTRIES], min_complete int32[0:IORING_MAX_CQ_ENTRIES], flags flags[io_uring_enter_flags], sigmask ptr[in, sigset_t], size len[sigmask])
    26  io_uring_register$IORING_REGISTER_BUFFERS(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS], arg ptr[in, array[iovec_out]], nr_args len[arg])
    27  io_uring_register$IORING_UNREGISTER_BUFFERS(fd fd_io_uring, opcode const[IORING_UNREGISTER_BUFFERS], arg const[0], nr_args const[0])
    28  io_uring_register$IORING_REGISTER_FILES(fd fd_io_uring, opcode const[IORING_REGISTER_FILES], arg ptr[in, array[fd]], nr_args len[arg])
    29  io_uring_register$IORING_UNREGISTER_FILES(fd fd_io_uring, opcode const[IORING_UNREGISTER_FILES], arg const[0], nr_args const[0])
    30  io_uring_register$IORING_REGISTER_EVENTFD(fd fd_io_uring, opcode const[IORING_REGISTER_EVENTFD], arg ptr[in, fd_event], nr_args const[1])
    31  io_uring_register$IORING_UNREGISTER_EVENTFD(fd fd_io_uring, opcode const[IORING_UNREGISTER_EVENTFD], arg const[0], nr_args const[0])
    32  io_uring_register$IORING_REGISTER_FILES_UPDATE(fd fd_io_uring, opcode const[IORING_REGISTER_FILES_UPDATE], arg ptr[in, io_uring_files_update], nr_args len[arg:fds])
    33  io_uring_register$IORING_REGISTER_EVENTFD_ASYNC(fd fd_io_uring, opcode const[IORING_REGISTER_EVENTFD_ASYNC], arg ptr[in, fd_event], nr_args const[1])
    34  io_uring_register$IORING_REGISTER_PROBE(fd fd_io_uring, opcode const[IORING_REGISTER_PROBE], arg ptr[inout, io_uring_probe], nr_args len[arg:ops])
    35  io_uring_register$IORING_REGISTER_PERSONALITY(fd fd_io_uring, opcode const[IORING_REGISTER_PERSONALITY], arg const[0], nr_args const[0]) ioring_personality_id
    36  io_uring_register$IORING_UNREGISTER_PERSONALITY(fd fd_io_uring, opcode const[IORING_UNREGISTER_PERSONALITY], arg const[0], nr_args ioring_personality_id)
    37  # IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD >= 5.2
    38  # IORING_REGISTER_FILES_UPDATE >= 5.5
    39  # IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY>= 5.6
    40  
    41  io_uring_register$IORING_REGISTER_ENABLE_RINGS(fd fd_io_uring, opcode const[IORING_REGISTER_ENABLE_RINGS], arg const[0], nr_args const[0])
    42  io_uring_register$IORING_REGISTER_RESTRICTIONS(fd fd_io_uring, opcode const[IORING_REGISTER_RESTRICTIONS], arg ptr[in, array[io_uring_restriction_st]], nr_args len[arg])
    43  # IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_RESTRICTIONS >= 5.10
    44  io_uring_register$IORING_REGISTER_BUFFERS2(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS2], arg ptr[in, io_uring_rsrc_register], size bytesize[arg])
    45  io_uring_register$IORING_REGISTER_BUFFERS_UPDATE(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS_UPDATE], arg ptr[in, io_uring_rsrc_update2], size bytesize[arg])
    46  io_uring_register$IORING_REGISTER_FILES2(fd fd_io_uring, opcode const[IORING_REGISTER_FILES2], arg ptr[in, io_uring_rsrc_register], size bytesize[arg])
    47  io_uring_register$IORING_REGISTER_FILES_UPDATE2(fd fd_io_uring, opcode const[IORING_REGISTER_FILES_UPDATE2], arg ptr[in, io_uring_rsrc_update2], size bytesize[arg])
    48  # IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2 >= 5.13
    49  io_uring_register$IORING_REGISTER_IOWQ_AFF(fd fd_io_uring, opcode const[IORING_REGISTER_IOWQ_AFF], arg ptr[in, array[int8]], size bytesize[arg])
    50  io_uring_register$IORING_UNREGISTER_IOWQ_AFF(fd fd_io_uring, opcode const[IORING_UNREGISTER_IOWQ_AFF], arg const[0], nr_args const[0])
    51  # IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF >= 5.14
    52  io_uring_register$IORING_REGISTER_IOWQ_MAX_WORKERS(fd fd_io_uring, opcode const[IORING_REGISTER_IOWQ_MAX_WORKERS], arg ptr[in, array[int32, 2]], nr_args const[2])
    53  # IORING_REGISTER_IOWQ_MAX_WORKERS >= 5.15
    54  io_uring_register$IORING_REGISTER_RING_FDS(fd fd_io_uring, opcode const[IORING_REGISTER_RING_FDS], arg ptr[in, array[io_uring_rsrc_register]], nr_args len[arg])
    55  io_uring_register$IORING_UNREGISTER_RING_FDS(fd fd_io_uring, opcode const[IORING_UNREGISTER_RING_FDS], arg ptr[in, array[io_uring_rsrc_register]], nr_args len[arg])
    56  # IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS >= 5.18
    57  io_uring_register$IORING_REGISTER_PBUF_RING(fd fd_io_uring, opcode const[IORING_REGISTER_PBUF_RING], arg ptr[in, io_uring_buf_reg], nr_args const[1])
    58  io_uring_register$IORING_UNREGISTER_PBUF_RING(fd fd_io_uring, opcode const[IORING_UNREGISTER_PBUF_RING], arg ptr[in, io_uring_buf_reg], nr_args const[1])
    59  # IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING >= 5.19
    60  
    61  io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE
    62  
    63  # The mmap'ed area for SQ and CQ rings are really the same -- the difference is
    64  # accounted for with the usage of offsets.
    65  mmap$IORING_OFF_SQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQ_RING]) ring_ptr
    66  mmap$IORING_OFF_CQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_CQ_RING]) ring_ptr
    67  mmap$IORING_OFF_SQES(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQES]) sqes_ptr
    68  
    69  # If no flags are specified(0), the io_uring instance is setup for interrupt driven IO.
    70  io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN
    71  # watch out the being tested kernel version
    72  # IORING_FEAT_SINGLE_MMAP >= 5.4
    73  # IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE >= 5.5
    74  # IORING_FEAT_RW_CUR_POS >= 5.6
    75  # IORING_FEAT_FAST_POLL >= 5.7
    76  # IORING_FEAT_POLL_32BITS >= 5.9
    77  # IORING_SETUP_R_DISABLED >= 5.10 (this shoule be used with IORING_REGISTER_ENABLE_RINGS)
    78  # IORING_FEAT_SQPOLL_NONFIXED >= 5.11
    79  # IORING_FEAT_NATIVE_WORKERS >= 5.12
    80  # IORING_FEAT_RSRC_TAGS >= 5.13
    81  # IORING_FEAT_CQE_SKIP >= 5.17
    82  # IORING_SETUP_SUBMIT_ALL >= 5.18
    83  # IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32 >= 5.19
    84  # IORING_SETUP_SINGLE_ISSUER >= 6.0
    85  # IORING_SETUP_DEFER_TASKRUN >= 6.1
    86  
    87  io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING
    88  # IORING_ENTER_EXT_ARG >= 5.11
    89  _ = __NR_mmap2
    90  
    91  # Once an io_uring is set up by calling io_uring_setup, the offsets to the member fields
    92  # to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets.
    93  # Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings
    94  # is organized in code. The offsets can be marked as resources in syzkaller descriptions but
    95  # this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are
    96  # hard-coded here (and in the executor).
    97  define SQ_HEAD_OFFSET	0
    98  define SQ_TAIL_OFFSET	64
    99  define SQ_RING_MASK_OFFSET	256
   100  define SQ_RING_ENTRIES_OFFSET	264
   101  define SQ_FLAGS_OFFSET	276
   102  define SQ_DROPPED_OFFSET	272
   103  define CQ_HEAD_OFFSET	128
   104  define CQ_TAIL_OFFSET	192
   105  define CQ_RING_MASK_OFFSET	260
   106  define CQ_RING_ENTRIES_OFFSET	268
   107  define CQ_RING_OVERFLOW_OFFSET	284
   108  define CQ_FLAGS_OFFSET	280
   109  
   110  # Notice all offsets are pointing to uint32 values. This is assumed for the
   111  io_uring_offsets = SQ_HEAD_OFFSET, SQ_TAIL_OFFSET, SQ_RING_MASK_OFFSET, SQ_RING_ENTRIES_OFFSET, SQ_DROPPED_OFFSET, CQ_HEAD_OFFSET, CQ_TAIL_OFFSET, CQ_RING_MASK_OFFSET, CQ_RING_ENTRIES_OFFSET, CQ_RING_OVERFLOW_OFFSET, io_uring_flags_offsets
   112  
   113  # Also, all values are int32, thus, set nbytes to 4.
   114  syz_memcpy_off$IO_URING_METADATA_GENERIC(ring_ptr ring_ptr, off flags[io_uring_offsets], src ptr[in, int32], src_off const[0], nbytes const[4])
   115  
   116  # The flags available are: IORING_SQ_NEED_WAKEUP (1) for sq, IORING_CQ_EVENTFD_DISABLED (1) for cq. Use int32[0:1] to represent possible values.
   117  io_uring_flags_offsets = SQ_FLAGS_OFFSET, CQ_FLAGS_OFFSET
   118  syz_memcpy_off$IO_URING_METADATA_FLAGS(ring_ptr ring_ptr, flag_off flags[io_uring_flags_offsets], src ptr[in, int32[0:1]], src_off const[0], nbytes const[4])
   119  
   120  io_uring_probe {
   121  	last_op	const[0, int8]
   122  	ops_len	const[0, int8]
   123  	resv	const[0, int16]
   124  	resv2	array[const[0, int32], 3]
   125  	ops	array[io_uring_probe_op, 0:IORING_OP_LAST]
   126  }
   127  
   128  io_uring_probe_op {
   129  	op	const[0, int8]
   130  	resv	const[0, int8]
   131  	flags	const[0, int16]
   132  	resv2	const[0, int32]
   133  }
   134  
   135  io_uring_files_update {
   136  	offset	int32
   137  	resv	const[0, int32]
   138  	fds	ptr64[in, array[fd]]
   139  }
   140  
   141  #
   142  # type template for io_uring_restriction
   143  #
   144  
   145  type io_uring_restriction[OPCODE, OPARG] {
   146  	op	const[OPCODE, int16]
   147  	oparg	OPARG
   148  	resv	const[0, int8]
   149  	resv2	array[const[0, int32], 3]
   150  }
   151  
   152  io_uring_restriction_st [
   153  	ioring_restriction_register_op		io_uring_restriction[IORING_RESTRICTION_REGISTER_OP, flags[io_uring_register_opcodes, int8]]
   154  	ioring_restriction_sqe_op		io_uring_restriction[IORING_RESTRICTION_SQE_OP, flags[io_uring_register_opcodes, int8]]
   155  	ioring_restriction_sqe_flags_allowed	io_uring_restriction[IORING_RESTRICTION_SQE_FLAGS_ALLOWED, flags[io_uring_register_opcodes, int8]]
   156  	ioring_restriction_sqe_flags_required	io_uring_restriction[IORING_RESTRICTION_SQE_FLAGS_REQUIRED, flags[io_uring_register_opcodes, int8]]
   157  ]
   158  
   159  io_uring_rsrc_flags = IORING_RSRC_REGISTER_SPARSE
   160  
   161  io_uring_rsrc_register {
   162  	nr	len[data, int32]
   163  	flags	flags[io_uring_rsrc_flags, int32]
   164  	resv2	const[0, int64]
   165  	data	ptr64[in, array[iovec_out]]
   166  	tags	ptr64[in, array[int64]]
   167  }
   168  
   169  io_uring_rsrc_update2 {
   170  	offset	int32
   171  	resv	const[0, int32]
   172  	data	ptr64[in, array[iovec_out]]
   173  	tags	ptr64[in, array[int64]]
   174  	nr	len[data, int32]
   175  	resv2	const[0, int32]
   176  }
   177  
   178  io_uring_buf {
   179  	addr	ptr64[in, array[int8]]
   180  	len	len[addr, int32]
   181  	bid	io_uring_bid[int16]
   182  	resv	const[0, int16]
   183  }
   184  
   185  io_uring_buf_array {
   186  	data	array[io_uring_buf]
   187  } [align[4096]]
   188  
   189  io_uring_buf_reg {
   190  	ring_addr	ptr64[in, io_uring_buf_array]
   191  	ring_entries	len[ring_addr:data, int32]
   192  	bgid		io_uring_bgid[int16]	(in)
   193  # IOU_PBUF_RING_MMAP not supported yet
   194  	flags		const[0, int16]
   195  	resv		array[const[0, int64], 3]
   196  }
   197  
   198  io_uring_params {
   199  	sq_entries	int32	(out)
   200  	cq_entries	int32[0:IORING_MAX_CQ_ENTRIES]	(inout)
   201  	flags		flags[io_uring_setup_flags, int32]	(in)
   202  	sq_thread_cpu	int32[0:3]	(in)
   203  	sq_thread_idle	int32[0:1000]	(in)
   204  	features	int32	(out)
   205  	wq_fd		fd_io_uring[opt]	(in)
   206  	resv		array[const[0, int32], 3]
   207  # We don't really use them (they are hard-coded). Thus, just pass some memory region of their size.
   208  # TODO: Now that per-field directions is supported, can we avoid using hard-coded values for offsets?
   209  	sq_off		array[int32, 10]	(out)
   210  	cq_off		array[int32, 10]	(out)
   211  }
   212  
   213  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   214  # Descriptions for sq_ring and cq_ring manipulation # # # # # # # # # # # # # #
   215  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   216  
   217  # Retrieve the cqe at the head of the cq_ring and advance the head. The only meaningful
   218  # resource contained within a cqe is by the completion of openat or openat2 calls,
   219  # which produce fd. If that is the case, returns the return value of those. Otherwise,
   220  # for other operations, returns an invalid fd (-1).
   221  syz_io_uring_complete(ring_ptr ring_ptr) fd
   222  
   223  # Submit sqe into the sq_ring
   224  syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, sqe ptr[in, io_uring_sqe_u])
   225  
   226  io_uring_sqe_u [
   227  	IORING_OP_NOP			io_uring_sqe$nop
   228  	IORING_OP_READV			io_uring_sqe_readv
   229  	IORING_OP_WRITEV		io_uring_sqe$writev
   230  	IORING_OP_FSYNC			io_uring_sqe$fsync
   231  	IORING_OP_READ_FIXED		io_uring_sqe$read_fixed
   232  	IORING_OP_WRITE_FIXED		io_uring_sqe$write_fixed
   233  	IORING_OP_POLL_ADD		io_uring_sqe$poll_add
   234  	IORING_OP_POLL_REMOVE		io_uring_sqe$poll_remove
   235  	IORING_OP_SYNC_FILE_RANGE	io_uring_sqe$sync_file_range
   236  	IORING_OP_SENDMSG		io_uring_sqe$sendmsg
   237  	IORING_OP_RECVMSG		io_uring_sqe$recvmsg
   238  	IORING_OP_TIMEOUT		io_uring_sqe$timeout
   239  	IORING_OP_TIMEOUT_REMOVE	io_uring_sqe$timeout_remove
   240  	IORING_OP_ACCEPT		io_uring_sqe$accept
   241  	IORING_OP_ASYNC_CANCEL		io_uring_sqe$async_cancel
   242  	IORING_OP_LINK_TIMEOUT		io_uring_sqe$link_timeout
   243  	IORING_OP_CONNECT		io_uring_sqe$connect
   244  	IORING_OP_FALLOCATE		io_uring_sqe$fallocate
   245  	IORING_OP_OPENAT		io_uring_sqe$openat
   246  	IORING_OP_CLOSE			io_uring_sqe$close
   247  	IORING_OP_FILES_UPDATE		io_uring_sqe$files_update
   248  	IORING_OP_STATX			io_uring_sqe$statx
   249  	IORING_OP_READ			io_uring_sqe_read
   250  	IORING_OP_WRITE			io_uring_sqe$write
   251  	IORING_OP_FADVISE		io_uring_sqe$fadvise
   252  	IORING_OP_MADVISE		io_uring_sqe$madvise
   253  	IORING_OP_SEND			io_uring_sqe$send
   254  	IORING_OP_RECV			io_uring_sqe_recv
   255  	IORING_OP_OPENAT2		io_uring_sqe$openat2
   256  	IORING_OP_EPOLL_CTL		io_uring_sqe_epoll_ctl
   257  	IORING_OP_SPLICE		io_uring_sqe$splice
   258  	IORING_OP_PROVIDE_BUFFERS	io_uring_sqe$provide_buffers
   259  	IORING_OP_REMOVE_BUFFERS	io_uring_sqe$remove_buffers
   260  	IORING_OP_TEE			io_uring_sqe$tee
   261  	IORING_OP_SHUTDOWN		io_uring_sqe$shutdown
   262  	IORING_OP_RENAMEAT		io_uring_sqe$renameat
   263  	IORING_OP_UNLINKAT		io_uring_sqe$unlinkat
   264  	IORING_OP_MKDIRAT		io_uring_sqe$mkdirat
   265  	IORING_OP_SYMLINKAT		io_uring_sqe$symlinkat
   266  	IORING_OP_LINKAT		io_uring_sqe$linkat
   267  	IORING_OP_MSG_RING		io_uring_sqe$msg_ring
   268  # 	TODO undocumented 6.0
   269  #	IORING_OP_FSETXATTR		io_uring_sqe$fsetxatt
   270  #	IORING_OP_SETXATTR		io_uring_sqe$setxatt
   271  #	IORING_OP_FGETXATTR		io_uring_sqe$fgetxatt
   272  #	IORING_OP_GETXATTR		io_uring_sqe$etxatt
   273  #	IORING_OP_SOCKET		io_uring_sqe$socket
   274  #	IORING_OP_URING_CMD		io_uring_sqe$uring_cmd
   275  #	IORING_OP_SEND_ZC		io_uring_sqe$send_zc
   276  ]
   277  
   278  # io_uring_enter_opcodes = IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, IORING_OP_FSYNC, IORING_OP_READ_FIXED, IORING_OP_WRITE_FIXED, IORING_OP_POLL_ADD, IORING_OP_POLL_REMOVE, IORING_OP_SYNC_FILE_RANGE, IORING_OP_SENDMSG, IORING_OP_RECVMSG, IORING_OP_TIMEOUT, IORING_OP_TIMEOUT_REMOVE, IORING_OP_ACCEPT, IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, IORING_OP_FADVISE, IORING_OP_MADVISE, IORING_OP_SEND, IORING_OP_RECV, IORING_OP_OPENAT2, IORING_OP_EPOLL_CTL, IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, IORING_OP_MSG_RING, IORING_OP_FSETXATTR, IORING_OP_SETXATTR, IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SEND_ZC
   279  
   280  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   281  # io_uring submission queue entry (io_uring_sqe) descriptions # # # # # # # # #
   282  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   283  
   284  #
   285  # sqe type template
   286  #
   287  
   288  type io_uring_sqe[OP, IOPRIO, FD, OFF, ADDR, LEN, MISC_FLAGS, USER_DATA, MISC] {
   289  	opcode		const[OP, int8]
   290  	flags		flags[iosqe_flags, int8]
   291  	ioprio		IOPRIO
   292  	fd		FD
   293  	off		OFF
   294  	addr		ADDR
   295  	len		LEN
   296  	misc_flags	MISC_FLAGS
   297  	user_data	flags[USER_DATA, int64]
   298  # This is a union of different possibilites with a padding at the end.
   299  	misc		MISC
   300  } [size[SIZEOF_IO_URING_SQE]]
   301  
   302  define SIZEOF_IO_URING_SQE	64
   303  
   304  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   305  
   306  #
   307  # Instantiation of sqes for each op
   308  #
   309  
   310  type io_uring_sqe$nop io_uring_sqe[IORING_OP_NOP, const[0, int16], const[0, int32], const[0, int64], const[0, int64], const[0, int32], const[0, int32], zero_flag, array[const[0, int64], 3]]
   311  
   312  io_uring_sqe_readv [
   313  	pass_iovec		io_uring_sqe[IORING_OP_READV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], ptr[in, array[iovec_out]], len[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   314  	use_registered_buffer	io_uring_sqe[IORING_OP_READV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
   315  ]
   316  
   317  type io_uring_sqe$writev io_uring_sqe[IORING_OP_WRITEV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], ptr[in, array[iovec_in]], len[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
   318  type io_uring_sqe$fsync io_uring_sqe[IORING_OP_FSYNC, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], const[0, int32], flags[io_uring_fsync_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   319  type io_uring_sqe$read_fixed io_uring_sqe[IORING_OP_READ_FIXED, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], int64, int32, flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
   320  type io_uring_sqe$write_fixed io_uring_sqe[IORING_OP_WRITE_FIXED, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], int64, int32, flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
   321  type io_uring_sqe$poll_add io_uring_sqe[IORING_OP_POLL_ADD, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], const[0, int32], io_uring_sqe_poll_add_misc_flags, sqe_user_data_not_openat, personality_only_misc]
   322  type io_uring_sqe$poll_remove io_uring_sqe[IORING_OP_POLL_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   323  type io_uring_sqe$sync_file_range io_uring_sqe[IORING_OP_SYNC_FILE_RANGE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, flags[sync_file_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   324  # IORING_OP_SYNC_FILE_RANGE >= 5.2
   325  type io_uring_sqe$sendmsg io_uring_sqe[IORING_OP_SENDMSG, const[0, int16], sock, const[0, int64], ptr[in, send_msghdr], const[0, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   326  type io_uring_sqe$recvmsg io_uring_sqe[IORING_OP_RECVMSG, flags[iouring_recv_ioprio, int16], sock, const[0, int64], ptr[inout, recv_msghdr], const[0, int32], flags[recv_flags, int32], sqe_user_data_not_openat, buf_group_personality_misc]
   327  # IORING_OP_SENDMSG, IORING_OP_RECVMSG >= 5.3
   328  type io_uring_sqe$timeout io_uring_sqe[IORING_OP_TIMEOUT, const[0, int16], const[0, int32], io_uring_timeout_completion_event_count, ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   329  # IORING_OP_TIMEOUT >= 5.4
   330  type io_uring_sqe$timeout_remove io_uring_sqe[IORING_OP_TIMEOUT_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   331  type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, const[0, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   332  type io_uring_sqe$async_cancel io_uring_sqe[IORING_OP_ASYNC_CANCEL, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   333  type io_uring_sqe$link_timeout io_uring_sqe[IORING_OP_LINK_TIMEOUT, const[0, int16], const[0, int32], const[0, int64], ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   334  type io_uring_sqe$connect io_uring_sqe[IORING_OP_CONNECT, const[0, int16], sock, len[addr, int32], ptr[in, sockaddr_storage], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   335  # IORING_OP_TIMEOUT_REMOVE, IORING_OP_ACCEPT, IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT >= 5.5
   336  type io_uring_sqe$fallocate io_uring_sqe[IORING_OP_FALLOCATE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   337  type io_uring_sqe$openat io_uring_sqe[IORING_OP_OPENAT, const[0, int16], fd_dir[opt], const[0, int64], ptr64[in, filename], flags[open_mode, int32], flags[open_flags, int32], sqe_user_data_openat, personality_only_misc]
   338  type io_uring_sqe$close io_uring_sqe[IORING_OP_CLOSE, const[0, int16], fd, const[0, int64], const[0, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   339  type io_uring_sqe$files_update io_uring_sqe[IORING_OP_FILES_UPDATE, const[0, int16], const[0, int32], fileoff[int64], ptr[in, array[fd]], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   340  type io_uring_sqe$statx io_uring_sqe[IORING_OP_STATX, const[0, int16], fd_dir[opt], ptr[out, statx], ptr64[in, filename], flags[statx_mask, int32], flags[statx_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   341  
   342  io_uring_sqe_read [
   343  	pass_buffer		io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[out], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   344  	use_registered_buffer	io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc]
   345  ]
   346  
   347  type io_uring_sqe$write io_uring_sqe[IORING_OP_WRITE, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[in], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   348  type io_uring_sqe$fadvise io_uring_sqe[IORING_OP_FADVISE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, flags[fadvise_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   349  type io_uring_sqe$madvise io_uring_sqe[IORING_OP_MADVISE, const[0, int16], const[0, int32], const[0, int64], vma, len[addr, int32], flags[madvise_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   350  type io_uring_sqe$send io_uring_sqe[IORING_OP_SEND, const[0, int16], sock, const[0, int64], buffer[in], len[addr, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   351  
   352  iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF
   353  
   354  io_uring_sqe_recv [
   355  	pass_buffer		io_uring_sqe[IORING_OP_RECV, const[0, int16], sock, const[0, int64], buffer[inout], len[addr, int32], flags[recv_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   356  	use_registered_buffer	io_uring_sqe[IORING_OP_RECV, flags[iouring_recv_ioprio, int16], sock, const[0, int64], const[0, int64], const[0, int32], flags[recv_flags, int32], sqe_user_data_not_openat, buf_group_personality_misc]
   357  ]
   358  
   359  type io_uring_sqe$openat2 io_uring_sqe[IORING_OP_OPENAT2, const[0, int16], fd_dir[opt], ptr[in, open_how], ptr64[in, filename], bytesize[off, int32], const[0, int32], sqe_user_data_openat, personality_only_misc]
   360  type io_uring_sqe$epoll_ctl_t[EPOLL_OP, EPOLL_EVENTS] io_uring_sqe[IORING_OP_EPOLL_CTL, const[0, int16], fd_epoll, EPOLL_EVENTS, fd, const[EPOLL_OP, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   361  
   362  io_uring_sqe_epoll_ctl [
   363  	add	io_uring_sqe$epoll_ctl_t[EPOLL_CTL_ADD, ptr[in, epoll_event]]
   364  	del	io_uring_sqe$epoll_ctl_t[EPOLL_CTL_DEL, const[0, int64]]
   365  	mod	io_uring_sqe$epoll_ctl_t[EPOLL_CTL_MOD, ptr[in, epoll_event]]
   366  ]
   367  
   368  # IORING_OP_EPOLL_CTL, IORING_OP_SEND, IORING_OP_FALLOCATE, IORING_OP_MADVISE, IORING_OP_FADVISE, IORING_OP_RECV
   369  # IORING_OP_OPENAT, IORING_OP_OPENAT2, IORING_OP_CLOSE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE >= 5.6
   370  
   371  type io_uring_sqe$splice io_uring_sqe[IORING_OP_SPLICE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], io_uring_sqe_splice_off_in, int32, flags[splice_flags, int32], sqe_user_data_not_openat, io_uring_sqe_splice_misc]
   372  type io_uring_sqe$provide_buffers io_uring_sqe[IORING_OP_PROVIDE_BUFFERS, const[0, int16], int32, io_uring_bid[int64], buffer[in], int32, const[0, int32], sqe_user_data_not_openat, buf_group_personality_misc]
   373  type io_uring_sqe$remove_buffers io_uring_sqe[IORING_OP_PROVIDE_BUFFERS, const[0, int16], int32, const[0, int64], const[0, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, buf_group_personality_misc]
   374  # IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS >= 5.7
   375  type io_uring_sqe$tee io_uring_sqe[IORING_OP_TEE, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], int32, flags[splice_flags, int32], sqe_user_data_not_openat, io_uring_sqe_tee_misc]
   376  # IORING_OP_TEE >= 5.8
   377  
   378  type io_uring_sqe$shutdown io_uring_sqe[IORING_OP_SHUTDOWN, const[0, int16], sock, const[0, int64], const[0, int64], flags[shutdown_flags, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   379  type io_uring_sqe$renameat io_uring_sqe[IORING_OP_RENAMEAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   380  type io_uring_sqe$unlinkat io_uring_sqe[IORING_OP_UNLINKAT, const[0, int16], fd_dir, const[0, int64], ptr64[in, filename], const[0, int32], flags[unlinkat_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   381  # IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT >= 5.11
   382  
   383  type io_uring_sqe$mkdirat io_uring_sqe[IORING_OP_MKDIRAT, const[0, int16], fd_dir, const[0, int64], ptr64[in, filename], flags[open_mode, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   384  type io_uring_sqe$symlinkat io_uring_sqe[IORING_OP_SYMLINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   385  type io_uring_sqe$linkat io_uring_sqe[IORING_OP_LINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, flags[linkat_flags, int32], sqe_user_data_not_openat, personality_only_misc]
   386  # IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT >= 5.15
   387  
   388  type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, int64, buffer[in], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc]
   389  # IORING_OP_MSG_RING >= 5.18
   390  
   391  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
   392  
   393  #
   394  # Flags, enumerations, and misc fields of sqe ops
   395  #
   396  
   397  iosqe_flags = IOSQE_IO_DRAIN, IOSQE_IO_LINK, IOSQE_IO_HARDLINK, IOSQE_ASYNC, IOSQE_BUFFER_SELECT, IOSQE_FIXED_FILE, IOSQE_CQE_SKIP_SUCCESS
   398  # should not use BIT as flags
   399  # IOSQE_IO_DRAIN >= 5.2
   400  # IOSQE_IO_LINK >= 5.3
   401  # IOSQE_IO_HARDLINK >= 5.5
   402  # IOSQE_ASYNC >= 5.6
   403  # IOSQE_BUFFER_SELECT >= 5.7
   404  # IOSQE_FIXED_FILE >= 5.1
   405  # IOSQE_CQE_SKIP_SUCCESS >= 5.17
   406  
   407  fd_or_fixed_fd_index [
   408  	fd		fd
   409  # Use the registered files (io_uring_register$IORING_REGISTER_FILES) when IOSQE_FIXED_FILE_BIT is set in sqe.
   410  # To ease collisions, limit the indices.
   411  	fd_index	int32[0:10]
   412  ]
   413  
   414  # 0 for normal file integrity sync, IORING_FSYNC_DATASYNC to provide data sync only semantics
   415  io_uring_fsync_flags = 0, IORING_FSYNC_DATASYNC
   416  
   417  # 0 for relative, IORING_TIMEOUT_ABS for absolute timeout value
   418  io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS
   419  
   420  # The timeout condition is met when either the specific timeout expries, or the
   421  # specified number of events have completed. If not set, defaults to 1. Use a
   422  # limited range to allow utilization of this value to meet timeout condition besides
   423  # the timeout expiration.
   424  type io_uring_timeout_completion_event_count int64[0:10]
   425  
   426  # An already issued request can be attempted to be cancelled using ASYNC_CANCEL
   427  # operation. This operation identifies the operations using what's passed as
   428  # with user_data in their sqe. To ease collisions of ASYNC_CANCEL operation with
   429  # already submitted ones, use a limited range of values for user_data field.
   430  # Among all operations that can be achieved by submitting to the io_uring, only
   431  # openat and openat2 returns a useful resource (fd) that we can use for other
   432  # systemcalls. The resulting fds are returned within io_uring_cqe.res. The only way
   433  # to identify cqes for those operations is to keep track of their user data. Thus,
   434  # use a seperate set of sqe_user_data range for openat and openat2.
   435  sqe_user_data_not_openat = 0, 1
   436  sqe_user_data_openat = 0x12345, 0x23456
   437  sqe_user_data = 0, 1, 0x12345, 0x23456
   438  
   439  # The buffer id (bid) and the buffer group id (bgid) are registered using
   440  # IORING_OP_PROVIDE_BUFFERS. Use the ids in a limited range to ease collisions
   441  # with other operations.
   442  type io_uring_bid[T] T[0:3]
   443  type io_uring_bgid[T] T[0:3]
   444  
   445  zero_flag = 0
   446  
   447  io_uring_sqe_poll_add_misc_flags {
   448  	misc_flags		flags[pollfd_events, int16]
   449  # 2 bytes of padding to fill what is left from the union of flags
   450  	fill_flags_union	const[0, int16]
   451  }
   452  
   453  io_uring_sqe_splice_off_in {
   454  	splice_off_in_unused	const[0, int32]
   455  	splice_off_in		fd
   456  }
   457  
   458  # Descriptions for MISC field of io_uring_sqe. The content for most are common
   459  # while there are a few specials.
   460  
   461  personality_only_misc {
   462  	buf_index_unused	const[0, int16]
   463  	ioring_personality_id	ioring_personality_id[opt]
   464  	pad_unused		array[const[0, int8], 20]
   465  }
   466  
   467  buf_index_personality_misc {
   468  	buf_index		io_uring_bid[int16]
   469  	ioring_personality_id	ioring_personality_id[opt]
   470  	pad_unused		array[const[0, int8], 20]
   471  }
   472  
   473  buf_group_personality_misc {
   474  	buf_group		io_uring_bgid[int16]
   475  	ioring_personality_id	ioring_personality_id[opt]
   476  	pad_unused		array[const[0, int8], 20]
   477  }
   478  
   479  io_uring_sqe_splice_misc {
   480  	buf_unused		const[0, int16]
   481  	ioring_personality_id	ioring_personality_id[opt]
   482  	splice_fd_in		fd
   483  	pad_unused		array[const[0, int64], 2]
   484  }
   485  
   486  io_uring_sqe_tee_misc {
   487  	buf_unused		const[0, int16]
   488  	ioring_personality_id	ioring_personality_id[opt]
   489  	splice_fd_in		fd
   490  	pad_unused		array[const[0, int64], 2]
   491  }