github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/sys/linux/io_uring.txt (about) 1 # Copyright 2019 syzkaller project authors. All rights reserved. 2 # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 # See http://kernel.dk/io_uring.pdf 5 6 include <uapi/linux/io_uring.h> 7 # For EPOLL_CTL_ADD, EPOLL_CTL_MOD, EPOLL_CTL_DEL 8 include <uapi/linux/eventpoll.h> 9 10 resource fd_io_uring[fd] 11 resource ring_ptr[int64] 12 resource sqes_ptr[int64] 13 resource ioring_personality_id[int16] 14 15 # fs/io_uring.c 16 define IORING_MAX_ENTRIES 32768 17 define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) 18 19 # First does the setup calling io_uring_setup, than calls mmap to map the ring and 20 # the sqes. It is hard for the fuzzer to generate correct programs using mmap calls 21 # with fuzzer-provided mmap length. This wrapper ensures correct length computation. 22 syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring 23 24 io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params]) fd_io_uring 25 io_uring_enter(fd fd_io_uring, to_submit int32[0:IORING_MAX_ENTRIES], min_complete int32[0:IORING_MAX_CQ_ENTRIES], flags flags[io_uring_enter_flags], sigmask ptr[in, sigset_t], size len[sigmask]) 26 io_uring_register$IORING_REGISTER_BUFFERS(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS], arg ptr[in, array[iovec_out]], nr_args len[arg]) 27 io_uring_register$IORING_UNREGISTER_BUFFERS(fd fd_io_uring, opcode const[IORING_UNREGISTER_BUFFERS], arg const[0], nr_args const[0]) 28 io_uring_register$IORING_REGISTER_FILES(fd fd_io_uring, opcode const[IORING_REGISTER_FILES], arg ptr[in, array[fd]], nr_args len[arg]) 29 io_uring_register$IORING_UNREGISTER_FILES(fd fd_io_uring, opcode const[IORING_UNREGISTER_FILES], arg const[0], nr_args const[0]) 30 io_uring_register$IORING_REGISTER_EVENTFD(fd fd_io_uring, opcode const[IORING_REGISTER_EVENTFD], arg ptr[in, fd_event], nr_args const[1]) 31 io_uring_register$IORING_UNREGISTER_EVENTFD(fd fd_io_uring, opcode const[IORING_UNREGISTER_EVENTFD], arg const[0], nr_args const[0]) 32 io_uring_register$IORING_REGISTER_FILES_UPDATE(fd fd_io_uring, opcode const[IORING_REGISTER_FILES_UPDATE], arg ptr[in, io_uring_files_update], nr_args len[arg:fds]) 33 io_uring_register$IORING_REGISTER_EVENTFD_ASYNC(fd fd_io_uring, opcode const[IORING_REGISTER_EVENTFD_ASYNC], arg ptr[in, fd_event], nr_args const[1]) 34 io_uring_register$IORING_REGISTER_PROBE(fd fd_io_uring, opcode const[IORING_REGISTER_PROBE], arg ptr[inout, io_uring_probe], nr_args len[arg:ops]) 35 io_uring_register$IORING_REGISTER_PERSONALITY(fd fd_io_uring, opcode const[IORING_REGISTER_PERSONALITY], arg const[0], nr_args const[0]) ioring_personality_id 36 io_uring_register$IORING_UNREGISTER_PERSONALITY(fd fd_io_uring, opcode const[IORING_UNREGISTER_PERSONALITY], arg const[0], nr_args ioring_personality_id) 37 # IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD >= 5.2 38 # IORING_REGISTER_FILES_UPDATE >= 5.5 39 # IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY>= 5.6 40 41 io_uring_register$IORING_REGISTER_ENABLE_RINGS(fd fd_io_uring, opcode const[IORING_REGISTER_ENABLE_RINGS], arg const[0], nr_args const[0]) 42 io_uring_register$IORING_REGISTER_RESTRICTIONS(fd fd_io_uring, opcode const[IORING_REGISTER_RESTRICTIONS], arg ptr[in, array[io_uring_restriction_st]], nr_args len[arg]) 43 # IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_RESTRICTIONS >= 5.10 44 io_uring_register$IORING_REGISTER_BUFFERS2(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS2], arg ptr[in, io_uring_rsrc_register], size bytesize[arg]) 45 io_uring_register$IORING_REGISTER_BUFFERS_UPDATE(fd fd_io_uring, opcode const[IORING_REGISTER_BUFFERS_UPDATE], arg ptr[in, io_uring_rsrc_update2], size bytesize[arg]) 46 io_uring_register$IORING_REGISTER_FILES2(fd fd_io_uring, opcode const[IORING_REGISTER_FILES2], arg ptr[in, io_uring_rsrc_register], size bytesize[arg]) 47 io_uring_register$IORING_REGISTER_FILES_UPDATE2(fd fd_io_uring, opcode const[IORING_REGISTER_FILES_UPDATE2], arg ptr[in, io_uring_rsrc_update2], size bytesize[arg]) 48 # IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2 >= 5.13 49 io_uring_register$IORING_REGISTER_IOWQ_AFF(fd fd_io_uring, opcode const[IORING_REGISTER_IOWQ_AFF], arg ptr[in, array[int8]], size bytesize[arg]) 50 io_uring_register$IORING_UNREGISTER_IOWQ_AFF(fd fd_io_uring, opcode const[IORING_UNREGISTER_IOWQ_AFF], arg const[0], nr_args const[0]) 51 # IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF >= 5.14 52 io_uring_register$IORING_REGISTER_IOWQ_MAX_WORKERS(fd fd_io_uring, opcode const[IORING_REGISTER_IOWQ_MAX_WORKERS], arg ptr[in, array[int32, 2]], nr_args const[2]) 53 # IORING_REGISTER_IOWQ_MAX_WORKERS >= 5.15 54 io_uring_register$IORING_REGISTER_RING_FDS(fd fd_io_uring, opcode const[IORING_REGISTER_RING_FDS], arg ptr[in, array[io_uring_rsrc_register]], nr_args len[arg]) 55 io_uring_register$IORING_UNREGISTER_RING_FDS(fd fd_io_uring, opcode const[IORING_UNREGISTER_RING_FDS], arg ptr[in, array[io_uring_rsrc_register]], nr_args len[arg]) 56 # IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS >= 5.18 57 io_uring_register$IORING_REGISTER_PBUF_RING(fd fd_io_uring, opcode const[IORING_REGISTER_PBUF_RING], arg ptr[in, io_uring_buf_reg], nr_args const[1]) 58 io_uring_register$IORING_UNREGISTER_PBUF_RING(fd fd_io_uring, opcode const[IORING_UNREGISTER_PBUF_RING], arg ptr[in, io_uring_buf_reg], nr_args const[1]) 59 # IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING >= 5.19 60 61 io_uring_register_opcodes = IORING_REGISTER_BUFFERS, IORING_UNREGISTER_BUFFERS, IORING_REGISTER_FILES, IORING_UNREGISTER_FILES, IORING_REGISTER_EVENTFD, IORING_UNREGISTER_EVENTFD, IORING_REGISTER_FILES_UPDATE, IORING_REGISTER_EVENTFD_ASYNC, IORING_REGISTER_PROBE, IORING_REGISTER_PERSONALITY, IORING_UNREGISTER_PERSONALITY, IORING_REGISTER_RESTRICTIONS, IORING_REGISTER_ENABLE_RINGS, IORING_REGISTER_FILES2, IORING_REGISTER_FILES_UPDATE2, IORING_REGISTER_BUFFERS2, IORING_REGISTER_BUFFERS_UPDATE, IORING_REGISTER_IOWQ_AFF, IORING_UNREGISTER_IOWQ_AFF, IORING_REGISTER_IOWQ_MAX_WORKERS, IORING_REGISTER_RING_FDS, IORING_UNREGISTER_RING_FDS, IORING_REGISTER_PBUF_RING, IORING_UNREGISTER_PBUF_RING, IORING_REGISTER_SYNC_CANCEL, IORING_REGISTER_FILE_ALLOC_RANGE 62 63 # The mmap'ed area for SQ and CQ rings are really the same -- the difference is 64 # accounted for with the usage of offsets. 65 mmap$IORING_OFF_SQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQ_RING]) ring_ptr 66 mmap$IORING_OFF_CQ_RING(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_CQ_RING]) ring_ptr 67 mmap$IORING_OFF_SQES(addr vma, len len[addr], prot flags[mmap_prot], flags flags[mmap_flags], fd fd_io_uring, offset const[IORING_OFF_SQES]) sqes_ptr 68 69 # If no flags are specified(0), the io_uring instance is setup for interrupt driven IO. 70 io_uring_setup_flags = 0, IORING_SETUP_IOPOLL, IORING_SETUP_SQPOLL, IORING_SETUP_SQ_AFF, IORING_SETUP_CQSIZE, IORING_SETUP_CLAMP, IORING_SETUP_ATTACH_WQ, IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE, IORING_FEAT_RW_CUR_POS, IORING_FEAT_FAST_POLL, IORING_FEAT_POLL_32BITS, IORING_SETUP_R_DISABLED, IORING_FEAT_SQPOLL_NONFIXED, IORING_FEAT_NATIVE_WORKERS, IORING_FEAT_RSRC_TAGS, IORING_FEAT_CQE_SKIP, IORING_SETUP_SUBMIT_ALL, IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32, IORING_SETUP_SINGLE_ISSUER, IORING_SETUP_DEFER_TASKRUN 71 # watch out the being tested kernel version 72 # IORING_FEAT_SINGLE_MMAP >= 5.4 73 # IORING_FEAT_NODROP, IORING_FEAT_SUBMIT_STABLE >= 5.5 74 # IORING_FEAT_RW_CUR_POS >= 5.6 75 # IORING_FEAT_FAST_POLL >= 5.7 76 # IORING_FEAT_POLL_32BITS >= 5.9 77 # IORING_SETUP_R_DISABLED >= 5.10 (this shoule be used with IORING_REGISTER_ENABLE_RINGS) 78 # IORING_FEAT_SQPOLL_NONFIXED >= 5.11 79 # IORING_FEAT_NATIVE_WORKERS >= 5.12 80 # IORING_FEAT_RSRC_TAGS >= 5.13 81 # IORING_FEAT_CQE_SKIP >= 5.17 82 # IORING_SETUP_SUBMIT_ALL >= 5.18 83 # IORING_SETUP_COOP_TASKRUN, IORING_SETUP_TASKRUN_FLAG, IORING_SETUP_SQE128, IORING_SETUP_CQE32 >= 5.19 84 # IORING_SETUP_SINGLE_ISSUER >= 6.0 85 # IORING_SETUP_DEFER_TASKRUN >= 6.1 86 87 io_uring_enter_flags = IORING_ENTER_GETEVENTS, IORING_ENTER_SQ_WAKEUP, IORING_ENTER_SQ_WAIT, IORING_ENTER_EXT_ARG, IORING_ENTER_REGISTERED_RING 88 # IORING_ENTER_EXT_ARG >= 5.11 89 _ = __NR_mmap2 90 91 # Once an io_uring is set up by calling io_uring_setup, the offsets to the member fields 92 # to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets. 93 # Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings 94 # is organized in code. The offsets can be marked as resources in syzkaller descriptions but 95 # this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are 96 # hard-coded here (and in the executor). 97 define SQ_HEAD_OFFSET 0 98 define SQ_TAIL_OFFSET 64 99 define SQ_RING_MASK_OFFSET 256 100 define SQ_RING_ENTRIES_OFFSET 264 101 define SQ_FLAGS_OFFSET 276 102 define SQ_DROPPED_OFFSET 272 103 define CQ_HEAD_OFFSET 128 104 define CQ_TAIL_OFFSET 192 105 define CQ_RING_MASK_OFFSET 260 106 define CQ_RING_ENTRIES_OFFSET 268 107 define CQ_RING_OVERFLOW_OFFSET 284 108 define CQ_FLAGS_OFFSET 280 109 110 # Notice all offsets are pointing to uint32 values. This is assumed for the 111 io_uring_offsets = SQ_HEAD_OFFSET, SQ_TAIL_OFFSET, SQ_RING_MASK_OFFSET, SQ_RING_ENTRIES_OFFSET, SQ_DROPPED_OFFSET, CQ_HEAD_OFFSET, CQ_TAIL_OFFSET, CQ_RING_MASK_OFFSET, CQ_RING_ENTRIES_OFFSET, CQ_RING_OVERFLOW_OFFSET, io_uring_flags_offsets 112 113 # Also, all values are int32, thus, set nbytes to 4. 114 syz_memcpy_off$IO_URING_METADATA_GENERIC(ring_ptr ring_ptr, off flags[io_uring_offsets], src ptr[in, int32], src_off const[0], nbytes const[4]) 115 116 # The flags available are: IORING_SQ_NEED_WAKEUP (1) for sq, IORING_CQ_EVENTFD_DISABLED (1) for cq. Use int32[0:1] to represent possible values. 117 io_uring_flags_offsets = SQ_FLAGS_OFFSET, CQ_FLAGS_OFFSET 118 syz_memcpy_off$IO_URING_METADATA_FLAGS(ring_ptr ring_ptr, flag_off flags[io_uring_flags_offsets], src ptr[in, int32[0:1]], src_off const[0], nbytes const[4]) 119 120 io_uring_probe { 121 last_op const[0, int8] 122 ops_len const[0, int8] 123 resv const[0, int16] 124 resv2 array[const[0, int32], 3] 125 ops array[io_uring_probe_op, 0:IORING_OP_LAST] 126 } 127 128 io_uring_probe_op { 129 op const[0, int8] 130 resv const[0, int8] 131 flags const[0, int16] 132 resv2 const[0, int32] 133 } 134 135 io_uring_files_update { 136 offset int32 137 resv const[0, int32] 138 fds ptr64[in, array[fd]] 139 } 140 141 # 142 # type template for io_uring_restriction 143 # 144 145 type io_uring_restriction[OPCODE, OPARG] { 146 op const[OPCODE, int16] 147 oparg OPARG 148 resv const[0, int8] 149 resv2 array[const[0, int32], 3] 150 } 151 152 io_uring_restriction_st [ 153 ioring_restriction_register_op io_uring_restriction[IORING_RESTRICTION_REGISTER_OP, flags[io_uring_register_opcodes, int8]] 154 ioring_restriction_sqe_op io_uring_restriction[IORING_RESTRICTION_SQE_OP, flags[io_uring_register_opcodes, int8]] 155 ioring_restriction_sqe_flags_allowed io_uring_restriction[IORING_RESTRICTION_SQE_FLAGS_ALLOWED, flags[io_uring_register_opcodes, int8]] 156 ioring_restriction_sqe_flags_required io_uring_restriction[IORING_RESTRICTION_SQE_FLAGS_REQUIRED, flags[io_uring_register_opcodes, int8]] 157 ] 158 159 io_uring_rsrc_flags = IORING_RSRC_REGISTER_SPARSE 160 161 io_uring_rsrc_register { 162 nr len[data, int32] 163 flags flags[io_uring_rsrc_flags, int32] 164 resv2 const[0, int64] 165 data ptr64[in, array[iovec_out]] 166 tags ptr64[in, array[int64]] 167 } 168 169 io_uring_rsrc_update2 { 170 offset int32 171 resv const[0, int32] 172 data ptr64[in, array[iovec_out]] 173 tags ptr64[in, array[int64]] 174 nr len[data, int32] 175 resv2 const[0, int32] 176 } 177 178 io_uring_buf { 179 addr ptr64[in, array[int8]] 180 len len[addr, int32] 181 bid io_uring_bid[int16] 182 resv const[0, int16] 183 } 184 185 io_uring_buf_array { 186 data array[io_uring_buf] 187 } [align[4096]] 188 189 io_uring_buf_reg { 190 ring_addr ptr64[in, io_uring_buf_array] 191 ring_entries len[ring_addr:data, int32] 192 bgid io_uring_bgid[int16] (in) 193 # IOU_PBUF_RING_MMAP not supported yet 194 flags const[0, int16] 195 resv array[const[0, int64], 3] 196 } 197 198 io_uring_params { 199 sq_entries int32 (out) 200 cq_entries int32[0:IORING_MAX_CQ_ENTRIES] (inout) 201 flags flags[io_uring_setup_flags, int32] (in) 202 sq_thread_cpu int32[0:3] (in) 203 sq_thread_idle int32[0:1000] (in) 204 features int32 (out) 205 wq_fd fd_io_uring[opt] (in) 206 resv array[const[0, int32], 3] 207 # We don't really use them (they are hard-coded). Thus, just pass some memory region of their size. 208 # TODO: Now that per-field directions is supported, can we avoid using hard-coded values for offsets? 209 sq_off array[int32, 10] (out) 210 cq_off array[int32, 10] (out) 211 } 212 213 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 214 # Descriptions for sq_ring and cq_ring manipulation # # # # # # # # # # # # # # 215 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 216 217 # Retrieve the cqe at the head of the cq_ring and advance the head. The only meaningful 218 # resource contained within a cqe is by the completion of openat or openat2 calls, 219 # which produce fd. If that is the case, returns the return value of those. Otherwise, 220 # for other operations, returns an invalid fd (-1). 221 syz_io_uring_complete(ring_ptr ring_ptr) fd 222 223 # Submit sqe into the sq_ring 224 syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, sqe ptr[in, io_uring_sqe_u]) 225 226 io_uring_sqe_u [ 227 IORING_OP_NOP io_uring_sqe$nop 228 IORING_OP_READV io_uring_sqe_readv 229 IORING_OP_WRITEV io_uring_sqe$writev 230 IORING_OP_FSYNC io_uring_sqe$fsync 231 IORING_OP_READ_FIXED io_uring_sqe$read_fixed 232 IORING_OP_WRITE_FIXED io_uring_sqe$write_fixed 233 IORING_OP_POLL_ADD io_uring_sqe$poll_add 234 IORING_OP_POLL_REMOVE io_uring_sqe$poll_remove 235 IORING_OP_SYNC_FILE_RANGE io_uring_sqe$sync_file_range 236 IORING_OP_SENDMSG io_uring_sqe$sendmsg 237 IORING_OP_RECVMSG io_uring_sqe$recvmsg 238 IORING_OP_TIMEOUT io_uring_sqe$timeout 239 IORING_OP_TIMEOUT_REMOVE io_uring_sqe$timeout_remove 240 IORING_OP_ACCEPT io_uring_sqe$accept 241 IORING_OP_ASYNC_CANCEL io_uring_sqe$async_cancel 242 IORING_OP_LINK_TIMEOUT io_uring_sqe$link_timeout 243 IORING_OP_CONNECT io_uring_sqe$connect 244 IORING_OP_FALLOCATE io_uring_sqe$fallocate 245 IORING_OP_OPENAT io_uring_sqe$openat 246 IORING_OP_CLOSE io_uring_sqe$close 247 IORING_OP_FILES_UPDATE io_uring_sqe$files_update 248 IORING_OP_STATX io_uring_sqe$statx 249 IORING_OP_READ io_uring_sqe_read 250 IORING_OP_WRITE io_uring_sqe$write 251 IORING_OP_FADVISE io_uring_sqe$fadvise 252 IORING_OP_MADVISE io_uring_sqe$madvise 253 IORING_OP_SEND io_uring_sqe$send 254 IORING_OP_RECV io_uring_sqe_recv 255 IORING_OP_OPENAT2 io_uring_sqe$openat2 256 IORING_OP_EPOLL_CTL io_uring_sqe_epoll_ctl 257 IORING_OP_SPLICE io_uring_sqe$splice 258 IORING_OP_PROVIDE_BUFFERS io_uring_sqe$provide_buffers 259 IORING_OP_REMOVE_BUFFERS io_uring_sqe$remove_buffers 260 IORING_OP_TEE io_uring_sqe$tee 261 IORING_OP_SHUTDOWN io_uring_sqe$shutdown 262 IORING_OP_RENAMEAT io_uring_sqe$renameat 263 IORING_OP_UNLINKAT io_uring_sqe$unlinkat 264 IORING_OP_MKDIRAT io_uring_sqe$mkdirat 265 IORING_OP_SYMLINKAT io_uring_sqe$symlinkat 266 IORING_OP_LINKAT io_uring_sqe$linkat 267 IORING_OP_MSG_RING io_uring_sqe$msg_ring 268 # TODO undocumented 6.0 269 # IORING_OP_FSETXATTR io_uring_sqe$fsetxatt 270 # IORING_OP_SETXATTR io_uring_sqe$setxatt 271 # IORING_OP_FGETXATTR io_uring_sqe$fgetxatt 272 # IORING_OP_GETXATTR io_uring_sqe$etxatt 273 # IORING_OP_SOCKET io_uring_sqe$socket 274 # IORING_OP_URING_CMD io_uring_sqe$uring_cmd 275 # IORING_OP_SEND_ZC io_uring_sqe$send_zc 276 ] 277 278 # io_uring_enter_opcodes = IORING_OP_NOP, IORING_OP_READV, IORING_OP_WRITEV, IORING_OP_FSYNC, IORING_OP_READ_FIXED, IORING_OP_WRITE_FIXED, IORING_OP_POLL_ADD, IORING_OP_POLL_REMOVE, IORING_OP_SYNC_FILE_RANGE, IORING_OP_SENDMSG, IORING_OP_RECVMSG, IORING_OP_TIMEOUT, IORING_OP_TIMEOUT_REMOVE, IORING_OP_ACCEPT, IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT, IORING_OP_FALLOCATE, IORING_OP_OPENAT, IORING_OP_CLOSE, IORING_OP_FILES_UPDATE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE, IORING_OP_FADVISE, IORING_OP_MADVISE, IORING_OP_SEND, IORING_OP_RECV, IORING_OP_OPENAT2, IORING_OP_EPOLL_CTL, IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT, IORING_OP_MSG_RING, IORING_OP_FSETXATTR, IORING_OP_SETXATTR, IORING_OP_FGETXATTR, IORING_OP_GETXATTR, IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SEND_ZC 279 280 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 281 # io_uring submission queue entry (io_uring_sqe) descriptions # # # # # # # # # 282 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 283 284 # 285 # sqe type template 286 # 287 288 type io_uring_sqe[OP, IOPRIO, FD, OFF, ADDR, LEN, MISC_FLAGS, USER_DATA, MISC] { 289 opcode const[OP, int8] 290 flags flags[iosqe_flags, int8] 291 ioprio IOPRIO 292 fd FD 293 off OFF 294 addr ADDR 295 len LEN 296 misc_flags MISC_FLAGS 297 user_data flags[USER_DATA, int64] 298 # This is a union of different possibilites with a padding at the end. 299 misc MISC 300 } [size[SIZEOF_IO_URING_SQE]] 301 302 define SIZEOF_IO_URING_SQE 64 303 304 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 305 306 # 307 # Instantiation of sqes for each op 308 # 309 310 type io_uring_sqe$nop io_uring_sqe[IORING_OP_NOP, const[0, int16], const[0, int32], const[0, int64], const[0, int64], const[0, int32], const[0, int32], zero_flag, array[const[0, int64], 3]] 311 312 io_uring_sqe_readv [ 313 pass_iovec io_uring_sqe[IORING_OP_READV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], ptr[in, array[iovec_out]], len[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc] 314 use_registered_buffer io_uring_sqe[IORING_OP_READV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] 315 ] 316 317 type io_uring_sqe$writev io_uring_sqe[IORING_OP_WRITEV, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], ptr[in, array[iovec_in]], len[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] 318 type io_uring_sqe$fsync io_uring_sqe[IORING_OP_FSYNC, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], const[0, int32], flags[io_uring_fsync_flags, int32], sqe_user_data_not_openat, personality_only_misc] 319 type io_uring_sqe$read_fixed io_uring_sqe[IORING_OP_READ_FIXED, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], int64, int32, flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] 320 type io_uring_sqe$write_fixed io_uring_sqe[IORING_OP_WRITE_FIXED, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], int64, int32, flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] 321 type io_uring_sqe$poll_add io_uring_sqe[IORING_OP_POLL_ADD, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], const[0, int32], io_uring_sqe_poll_add_misc_flags, sqe_user_data_not_openat, personality_only_misc] 322 type io_uring_sqe$poll_remove io_uring_sqe[IORING_OP_POLL_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 323 type io_uring_sqe$sync_file_range io_uring_sqe[IORING_OP_SYNC_FILE_RANGE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, flags[sync_file_flags, int32], sqe_user_data_not_openat, personality_only_misc] 324 # IORING_OP_SYNC_FILE_RANGE >= 5.2 325 type io_uring_sqe$sendmsg io_uring_sqe[IORING_OP_SENDMSG, const[0, int16], sock, const[0, int64], ptr[in, send_msghdr], const[0, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc] 326 type io_uring_sqe$recvmsg io_uring_sqe[IORING_OP_RECVMSG, flags[iouring_recv_ioprio, int16], sock, const[0, int64], ptr[inout, recv_msghdr], const[0, int32], flags[recv_flags, int32], sqe_user_data_not_openat, buf_group_personality_misc] 327 # IORING_OP_SENDMSG, IORING_OP_RECVMSG >= 5.3 328 type io_uring_sqe$timeout io_uring_sqe[IORING_OP_TIMEOUT, const[0, int16], const[0, int32], io_uring_timeout_completion_event_count, ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc] 329 # IORING_OP_TIMEOUT >= 5.4 330 type io_uring_sqe$timeout_remove io_uring_sqe[IORING_OP_TIMEOUT_REMOVE, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 331 type io_uring_sqe$accept io_uring_sqe[IORING_OP_ACCEPT, const[0, int16], sock, ptr[inout, len[addr, int32]], ptr[out, sockaddr_storage, opt], const[0, int32], flags[accept_flags, int32], sqe_user_data_not_openat, personality_only_misc] 332 type io_uring_sqe$async_cancel io_uring_sqe[IORING_OP_ASYNC_CANCEL, const[0, int16], const[0, int32], const[0, int64], flags[sqe_user_data, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 333 type io_uring_sqe$link_timeout io_uring_sqe[IORING_OP_LINK_TIMEOUT, const[0, int16], const[0, int32], const[0, int64], ptr[in, timespec], const[1, int32], flags[io_uring_timeout_flags, int32], sqe_user_data_not_openat, personality_only_misc] 334 type io_uring_sqe$connect io_uring_sqe[IORING_OP_CONNECT, const[0, int16], sock, len[addr, int32], ptr[in, sockaddr_storage], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 335 # IORING_OP_TIMEOUT_REMOVE, IORING_OP_ACCEPT, IORING_OP_ASYNC_CANCEL, IORING_OP_LINK_TIMEOUT, IORING_OP_CONNECT >= 5.5 336 type io_uring_sqe$fallocate io_uring_sqe[IORING_OP_FALLOCATE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, const[0, int32], sqe_user_data_not_openat, personality_only_misc] 337 type io_uring_sqe$openat io_uring_sqe[IORING_OP_OPENAT, const[0, int16], fd_dir[opt], const[0, int64], ptr64[in, filename], flags[open_mode, int32], flags[open_flags, int32], sqe_user_data_openat, personality_only_misc] 338 type io_uring_sqe$close io_uring_sqe[IORING_OP_CLOSE, const[0, int16], fd, const[0, int64], const[0, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 339 type io_uring_sqe$files_update io_uring_sqe[IORING_OP_FILES_UPDATE, const[0, int16], const[0, int32], fileoff[int64], ptr[in, array[fd]], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 340 type io_uring_sqe$statx io_uring_sqe[IORING_OP_STATX, const[0, int16], fd_dir[opt], ptr[out, statx], ptr64[in, filename], flags[statx_mask, int32], flags[statx_flags, int32], sqe_user_data_not_openat, personality_only_misc] 341 342 io_uring_sqe_read [ 343 pass_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[out], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc] 344 use_registered_buffer io_uring_sqe[IORING_OP_READ, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], const[0, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, buf_index_personality_misc] 345 ] 346 347 type io_uring_sqe$write io_uring_sqe[IORING_OP_WRITE, flags[ioprio_priorities, int16], fd_or_fixed_fd_index, fileoff[int64], buffer[in], bytesize[addr, int32], flags[rwf_flags, int32], sqe_user_data_not_openat, personality_only_misc] 348 type io_uring_sqe$fadvise io_uring_sqe[IORING_OP_FADVISE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], const[0, int64], int32, flags[fadvise_flags, int32], sqe_user_data_not_openat, personality_only_misc] 349 type io_uring_sqe$madvise io_uring_sqe[IORING_OP_MADVISE, const[0, int16], const[0, int32], const[0, int64], vma, len[addr, int32], flags[madvise_flags, int32], sqe_user_data_not_openat, personality_only_misc] 350 type io_uring_sqe$send io_uring_sqe[IORING_OP_SEND, const[0, int16], sock, const[0, int64], buffer[in], len[addr, int32], flags[send_flags, int32], sqe_user_data_not_openat, personality_only_misc] 351 352 iouring_recv_ioprio = IORING_RECVSEND_POLL_FIRST, IORING_RECV_MULTISHOT, IORING_RECVSEND_FIXED_BUF 353 354 io_uring_sqe_recv [ 355 pass_buffer io_uring_sqe[IORING_OP_RECV, const[0, int16], sock, const[0, int64], buffer[inout], len[addr, int32], flags[recv_flags, int32], sqe_user_data_not_openat, personality_only_misc] 356 use_registered_buffer io_uring_sqe[IORING_OP_RECV, flags[iouring_recv_ioprio, int16], sock, const[0, int64], const[0, int64], const[0, int32], flags[recv_flags, int32], sqe_user_data_not_openat, buf_group_personality_misc] 357 ] 358 359 type io_uring_sqe$openat2 io_uring_sqe[IORING_OP_OPENAT2, const[0, int16], fd_dir[opt], ptr[in, open_how], ptr64[in, filename], bytesize[off, int32], const[0, int32], sqe_user_data_openat, personality_only_misc] 360 type io_uring_sqe$epoll_ctl_t[EPOLL_OP, EPOLL_EVENTS] io_uring_sqe[IORING_OP_EPOLL_CTL, const[0, int16], fd_epoll, EPOLL_EVENTS, fd, const[EPOLL_OP, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 361 362 io_uring_sqe_epoll_ctl [ 363 add io_uring_sqe$epoll_ctl_t[EPOLL_CTL_ADD, ptr[in, epoll_event]] 364 del io_uring_sqe$epoll_ctl_t[EPOLL_CTL_DEL, const[0, int64]] 365 mod io_uring_sqe$epoll_ctl_t[EPOLL_CTL_MOD, ptr[in, epoll_event]] 366 ] 367 368 # IORING_OP_EPOLL_CTL, IORING_OP_SEND, IORING_OP_FALLOCATE, IORING_OP_MADVISE, IORING_OP_FADVISE, IORING_OP_RECV 369 # IORING_OP_OPENAT, IORING_OP_OPENAT2, IORING_OP_CLOSE, IORING_OP_STATX, IORING_OP_READ, IORING_OP_WRITE >= 5.6 370 371 type io_uring_sqe$splice io_uring_sqe[IORING_OP_SPLICE, const[0, int16], fd_or_fixed_fd_index, fileoff[int64], io_uring_sqe_splice_off_in, int32, flags[splice_flags, int32], sqe_user_data_not_openat, io_uring_sqe_splice_misc] 372 type io_uring_sqe$provide_buffers io_uring_sqe[IORING_OP_PROVIDE_BUFFERS, const[0, int16], int32, io_uring_bid[int64], buffer[in], int32, const[0, int32], sqe_user_data_not_openat, buf_group_personality_misc] 373 type io_uring_sqe$remove_buffers io_uring_sqe[IORING_OP_PROVIDE_BUFFERS, const[0, int16], int32, const[0, int64], const[0, int64], const[0, int32], const[0, int32], sqe_user_data_not_openat, buf_group_personality_misc] 374 # IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS >= 5.7 375 type io_uring_sqe$tee io_uring_sqe[IORING_OP_TEE, const[0, int16], fd_or_fixed_fd_index, const[0, int64], const[0, int64], int32, flags[splice_flags, int32], sqe_user_data_not_openat, io_uring_sqe_tee_misc] 376 # IORING_OP_TEE >= 5.8 377 378 type io_uring_sqe$shutdown io_uring_sqe[IORING_OP_SHUTDOWN, const[0, int16], sock, const[0, int64], const[0, int64], flags[shutdown_flags, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 379 type io_uring_sqe$renameat io_uring_sqe[IORING_OP_RENAMEAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, const[0, int32], sqe_user_data_not_openat, personality_only_misc] 380 type io_uring_sqe$unlinkat io_uring_sqe[IORING_OP_UNLINKAT, const[0, int16], fd_dir, const[0, int64], ptr64[in, filename], const[0, int32], flags[unlinkat_flags, int32], sqe_user_data_not_openat, personality_only_misc] 381 # IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT >= 5.11 382 383 type io_uring_sqe$mkdirat io_uring_sqe[IORING_OP_MKDIRAT, const[0, int16], fd_dir, const[0, int64], ptr64[in, filename], flags[open_mode, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 384 type io_uring_sqe$symlinkat io_uring_sqe[IORING_OP_SYMLINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], const[0, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 385 type io_uring_sqe$linkat io_uring_sqe[IORING_OP_LINKAT, const[0, int16], fd_dir, ptr64[in, filename], ptr64[in, filename], fd_dir, flags[linkat_flags, int32], sqe_user_data_not_openat, personality_only_misc] 386 # IORING_OP_MKDIRAT, IORING_OP_SYMLINKAT, IORING_OP_LINKAT >= 5.15 387 388 type io_uring_sqe$msg_ring io_uring_sqe[IORING_OP_MSG_RING, const[0, int16], fd_io_uring, int64, buffer[in], len[addr, int32], const[0, int32], sqe_user_data_not_openat, personality_only_misc] 389 # IORING_OP_MSG_RING >= 5.18 390 391 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 392 393 # 394 # Flags, enumerations, and misc fields of sqe ops 395 # 396 397 iosqe_flags = IOSQE_IO_DRAIN, IOSQE_IO_LINK, IOSQE_IO_HARDLINK, IOSQE_ASYNC, IOSQE_BUFFER_SELECT, IOSQE_FIXED_FILE, IOSQE_CQE_SKIP_SUCCESS 398 # should not use BIT as flags 399 # IOSQE_IO_DRAIN >= 5.2 400 # IOSQE_IO_LINK >= 5.3 401 # IOSQE_IO_HARDLINK >= 5.5 402 # IOSQE_ASYNC >= 5.6 403 # IOSQE_BUFFER_SELECT >= 5.7 404 # IOSQE_FIXED_FILE >= 5.1 405 # IOSQE_CQE_SKIP_SUCCESS >= 5.17 406 407 fd_or_fixed_fd_index [ 408 fd fd 409 # Use the registered files (io_uring_register$IORING_REGISTER_FILES) when IOSQE_FIXED_FILE_BIT is set in sqe. 410 # To ease collisions, limit the indices. 411 fd_index int32[0:10] 412 ] 413 414 # 0 for normal file integrity sync, IORING_FSYNC_DATASYNC to provide data sync only semantics 415 io_uring_fsync_flags = 0, IORING_FSYNC_DATASYNC 416 417 # 0 for relative, IORING_TIMEOUT_ABS for absolute timeout value 418 io_uring_timeout_flags = 0, IORING_TIMEOUT_ABS 419 420 # The timeout condition is met when either the specific timeout expries, or the 421 # specified number of events have completed. If not set, defaults to 1. Use a 422 # limited range to allow utilization of this value to meet timeout condition besides 423 # the timeout expiration. 424 type io_uring_timeout_completion_event_count int64[0:10] 425 426 # An already issued request can be attempted to be cancelled using ASYNC_CANCEL 427 # operation. This operation identifies the operations using what's passed as 428 # with user_data in their sqe. To ease collisions of ASYNC_CANCEL operation with 429 # already submitted ones, use a limited range of values for user_data field. 430 # Among all operations that can be achieved by submitting to the io_uring, only 431 # openat and openat2 returns a useful resource (fd) that we can use for other 432 # systemcalls. The resulting fds are returned within io_uring_cqe.res. The only way 433 # to identify cqes for those operations is to keep track of their user data. Thus, 434 # use a seperate set of sqe_user_data range for openat and openat2. 435 sqe_user_data_not_openat = 0, 1 436 sqe_user_data_openat = 0x12345, 0x23456 437 sqe_user_data = 0, 1, 0x12345, 0x23456 438 439 # The buffer id (bid) and the buffer group id (bgid) are registered using 440 # IORING_OP_PROVIDE_BUFFERS. Use the ids in a limited range to ease collisions 441 # with other operations. 442 type io_uring_bid[T] T[0:3] 443 type io_uring_bgid[T] T[0:3] 444 445 zero_flag = 0 446 447 io_uring_sqe_poll_add_misc_flags { 448 misc_flags flags[pollfd_events, int16] 449 # 2 bytes of padding to fill what is left from the union of flags 450 fill_flags_union const[0, int16] 451 } 452 453 io_uring_sqe_splice_off_in { 454 splice_off_in_unused const[0, int32] 455 splice_off_in fd 456 } 457 458 # Descriptions for MISC field of io_uring_sqe. The content for most are common 459 # while there are a few specials. 460 461 personality_only_misc { 462 buf_index_unused const[0, int16] 463 ioring_personality_id ioring_personality_id[opt] 464 pad_unused array[const[0, int8], 20] 465 } 466 467 buf_index_personality_misc { 468 buf_index io_uring_bid[int16] 469 ioring_personality_id ioring_personality_id[opt] 470 pad_unused array[const[0, int8], 20] 471 } 472 473 buf_group_personality_misc { 474 buf_group io_uring_bgid[int16] 475 ioring_personality_id ioring_personality_id[opt] 476 pad_unused array[const[0, int8], 20] 477 } 478 479 io_uring_sqe_splice_misc { 480 buf_unused const[0, int16] 481 ioring_personality_id ioring_personality_id[opt] 482 splice_fd_in fd 483 pad_unused array[const[0, int64], 2] 484 } 485 486 io_uring_sqe_tee_misc { 487 buf_unused const[0, int16] 488 ioring_personality_id ioring_personality_id[opt] 489 splice_fd_in fd 490 pad_unused array[const[0, int64], 2] 491 }