golang.org/x/arch@v0.17.0/x86/x86spec/cleanup.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "fmt" 9 "os" 10 "sort" 11 "strings" 12 ) 13 14 // Clean up the data from the Intel manual for correctness 15 // and to annotate details relevant to decoding or encoding, 16 // such as whether an instruction is valid only in certain 17 // operand size modes. 18 19 // encodeReplace maps (argument, encoding) pairs to the corrected argument. 20 // We use a suffix 1 for the register and 2 for the r/m in the modrm byte. 21 // We use a suffix V for a register number specified in the VEX.vvvv bits. 22 var encodeReplace = map[[2]string]string{ 23 {"mm", "ModRM:reg"}: "mm1", 24 {"mm", "ModRM:r/m"}: "mm2", 25 {"mm1", "ModRM:r/m"}: "mm2", 26 {"mm2", "ModRM:reg"}: "mm1", 27 {"mm/m32", "ModRM:r/m"}: "mm2/m32", 28 {"mm/m64", "ModRM:r/m"}: "mm2/m64", 29 {"xmm", "ModRM:reg"}: "xmm1", 30 {"xmm", "ModRM:r/m"}: "xmm2", 31 {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", 32 {"xmm0", "ModRM:reg"}: "xmm1", 33 {"xmm1", "ModRM:r/m"}: "xmm2", 34 {"xmm1/m16", "ModRM:r/m"}: "xmm2/m16", 35 {"xmm1/m32", "ModRM:r/m"}: "xmm2/m32", 36 {"xmm1/m64", "ModRM:r/m"}: "xmm2/m64", 37 {"xmm1/m128", "ModRM:r/m"}: "xmm2/m128", 38 {"xmm1/m256", "ModRM:r/m"}: "xmm2/m256", 39 {"xmm/m16", "ModRM:r/m"}: "xmm2/m16", 40 {"xmm/m32", "ModRM:r/m"}: "xmm2/m32", 41 {"xmm/m64", "ModRM:r/m"}: "xmm2/m64", 42 {"xmm/m128", "ModRM:r/m"}: "xmm2/m128", 43 {"xmm/m256", "ModRM:r/m"}: "xmm2/m256", 44 {"xmm3", "ModRM:reg"}: "xmm1", 45 {"xmm3", "ModRM:r/m"}: "xmm2", 46 {"xmm3/m16", "ModRM:r/m"}: "xmm2/m16", 47 {"xmm3/m32", "ModRM:r/m"}: "xmm2/m32", 48 {"xmm3/m64", "ModRM:r/m"}: "xmm2/m64", 49 {"xmm3/m128", "ModRM:r/m"}: "xmm2/m128", 50 {"xmm3/m256", "ModRM:r/m"}: "xmm2/m256", 51 {"xmm2", "ModRM:reg"}: "xmm1", 52 {"xmm2/m16", "ModRM:reg"}: "xmm1/m16", 53 {"xmm2/m32", "ModRM:reg"}: "xmm1/m32", 54 {"xmm2/m64", "ModRM:reg"}: "xmm1/m64", 55 {"xmm2/m128", "ModRM:reg"}: "xmm1/m128", 56 {"xmm2/m256", "ModRM:reg"}: "xmm1/m256", 57 {"ymm", "ModRM:reg"}: "ymm1", 58 {"ymm", "ModRM:r/m"}: "ymm2", 59 {"ymm0", "ModRM:reg"}: "ymm1", 60 {"ymm1", "ModRM:r/m"}: "ymm2", 61 {"ymm1/m16", "ModRM:r/m"}: "ymm2/m16", 62 {"ymm1/m32", "ModRM:r/m"}: "ymm2/m32", 63 {"ymm1/m64", "ModRM:r/m"}: "ymm2/m64", 64 {"ymm1/m128", "ModRM:r/m"}: "ymm2/m128", 65 {"ymm1/m256", "ModRM:r/m"}: "ymm2/m256", 66 {"ymm3", "ModRM:reg"}: "ymm1", 67 {"ymm3", "ModRM:r/m"}: "ymm2", 68 {"ymm3/m16", "ModRM:r/m"}: "ymm2/m16", 69 {"ymm3/m32", "ModRM:r/m"}: "ymm2/m32", 70 {"ymm3/m64", "ModRM:r/m"}: "ymm2/m64", 71 {"ymm3/m128", "ModRM:r/m"}: "ymm2/m128", 72 {"ymm3/m256", "ModRM:r/m"}: "ymm2/m256", 73 {"ymm2", "ModRM:reg"}: "ymm1", 74 {"ymm2/m16", "ModRM:reg"}: "ymm1/m16", 75 {"ymm2/m32", "ModRM:reg"}: "ymm1/m32", 76 {"ymm2/m64", "ModRM:reg"}: "ymm1/m64", 77 {"ymm2/m128", "ModRM:reg"}: "ymm1/m128", 78 {"ymm2/m256", "ModRM:reg"}: "ymm1/m256", 79 {"xmm1", "VEX.vvvv"}: "xmmV", 80 {"xmm2", "VEX.vvvv"}: "xmmV", 81 {"ymm1", "VEX.vvvv"}: "ymmV", 82 {"ymm2", "VEX.vvvv"}: "ymmV", 83 {"xmm4", "imm8[7:4]"}: "xmmIH", 84 {"ymm4", "imm8[7:4]"}: "ymmIH", 85 {"r8", "opcode + rd"}: "r8op", 86 {"r16", "opcode + rd"}: "r16op", 87 {"r32", "opcode + rd"}: "r32op", 88 {"r64", "opcode + rd"}: "r64op", 89 {"reg/m32", "ModRM:r/m"}: "r/m32", 90 {"reg/m16", "ModRM:r/m"}: "r32/m16", 91 {"bnd", "ModRM:reg"}: "bnd1", 92 {"bnd2", "ModRM:reg"}: "bnd1", 93 {"bnd1/m64", "ModRM:r/m"}: "bnd2/m64", 94 {"bnd1/m128", "ModRM:r/m"}: "bnd2/m128", 95 {"r32a", "ModRM:reg"}: "r32", 96 {"r64a", "ModRM:reg"}: "r64", 97 {"r32", "VEX.vvvv"}: "r32V", 98 {"r64", "VEX.vvvv"}: "r64V", 99 {"r32b", "VEX.vvvv"}: "r32V", 100 {"r64b", "VEX.vvvv"}: "r64V", 101 {"r64", "VEX.vvvv"}: "r64V", 102 {"ST", "ST(0)"}: "ST(0)", 103 } 104 105 // A few instructions do not have the usual encoding descriptions. 106 // Supply them. 107 var encodings = map[string][]string{ 108 "FADD m32fp": {"ModRM:r/m (r)"}, 109 "FADD m64fp": {"ModRM:r/m (r)"}, 110 "FADD ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 111 "FADD ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 112 "FADDP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 113 "FIADD m32int": {"ModRM:r/m (r)"}, 114 "FIADD m16int": {"ModRM:r/m (r)"}, 115 "FBLD m80dec": {"ModRM:r/m (r)"}, 116 "FBSTP m80bcd": {"ModRM:r/m (w)"}, 117 "FCMOVB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 118 "FCMOVE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 119 "FCMOVBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 120 "FCMOVU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 121 "FCMOVNB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 122 "FCMOVNE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 123 "FCMOVNBE ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 124 "FCMOVNU ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 125 "FCOM m32fp": {"ModRM:r/m (r)"}, 126 "FCOM m64fp": {"ModRM:r/m (r)"}, 127 "FCOM ST(i)": {"ST(i) (r)"}, 128 "FCOMP m32fp": {"ModRM:r/m (r)"}, 129 "FCOMP m64fp": {"ModRM:r/m (r)"}, 130 "FCOMP ST(i)": {"ST(i) (r)"}, 131 "FCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, 132 "FCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, 133 "FUCOMI ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, 134 "FUCOMIP ST, ST(i)": {"ST(0) (r)", "ST(i) (r)"}, 135 "FDIV m32fp": {"ModRM:r/m (r)"}, 136 "FDIV m64fp": {"ModRM:r/m (r)"}, 137 "FDIV ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 138 "FDIV ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 139 "FDIVP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 140 "FIDIV m16int": {"ModRM:r/m (r)"}, 141 "FIDIV m32int": {"ModRM:r/m (r)"}, 142 "FIDIV m64int": {"ModRM:r/m (r)"}, 143 "FDIVR m32fp": {"ModRM:r/m (r)"}, 144 "FDIVR m64fp": {"ModRM:r/m (r)"}, 145 "FDIVR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 146 "FDIVR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 147 "FDIVRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 148 "FIDIVR m16int": {"ModRM:r/m (r)"}, 149 "FIDIVR m32int": {"ModRM:r/m (r)"}, 150 "FIDIVR m64int": {"ModRM:r/m (r)"}, 151 "FFREE ST(i)": {"ST(i) (w)"}, 152 "FICOM m16int": {"ModRM:r/m (r)"}, 153 "FICOM m32int": {"ModRM:r/m (r)"}, 154 "FICOMP m16int": {"ModRM:r/m (r)"}, 155 "FICOMP m32int": {"ModRM:r/m (r)"}, 156 "FILD m16int": {"ModRM:r/m (r)"}, 157 "FILD m32int": {"ModRM:r/m (r)"}, 158 "FILD m64int": {"ModRM:r/m (r)"}, 159 "FIST m16int": {"ModRM:r/m (w)"}, 160 "FIST m32int": {"ModRM:r/m (w)"}, 161 "FISTP m16int": {"ModRM:r/m (w)"}, 162 "FISTP m32int": {"ModRM:r/m (w)"}, 163 "FISTP m64int": {"ModRM:r/m (w)"}, 164 "FISTTP m16int": {"ModRM:r/m (w)"}, 165 "FISTTP m32int": {"ModRM:r/m (w)"}, 166 "FISTTP m64int": {"ModRM:r/m (w)"}, 167 "FLD m32fp": {"ModRM:r/m (r)"}, 168 "FLD m64fp": {"ModRM:r/m (r)"}, 169 "FLD m80fp": {"ModRM:r/m (r)"}, 170 "FLD ST(i)": {"ST(i) (r)"}, 171 "FLDCW m2byte": {"ModRM:r/m (r)"}, 172 "FLDENV m14/28byte": {"ModRM:r/m (r)"}, 173 "FMUL m32fp": {"ModRM:r/m (r)"}, 174 "FMUL m64fp": {"ModRM:r/m (r)"}, 175 "FMUL ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 176 "FMUL ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 177 "FMULP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 178 "FIMUL m16int": {"ModRM:r/m (r)"}, 179 "FIMUL m32int": {"ModRM:r/m (r)"}, 180 "FRSTOR m94/108byte": {"ModRM:r/m (r)"}, 181 "FSAVE m94/108byte": {"ModRM:r/m (w)"}, 182 "FNSAVE m94/108byte": {"ModRM:r/m (w)"}, 183 "FST m32fp": {"ModRM:r/m (w)"}, 184 "FST m64fp": {"ModRM:r/m (w)"}, 185 "FST m80fp": {"ModRM:r/m (w)"}, 186 "FST ST(i)": {"ST(i) (w)"}, 187 "FSTP m32fp": {"ModRM:r/m (w)"}, 188 "FSTP m64fp": {"ModRM:r/m (w)"}, 189 "FSTP m80fp": {"ModRM:r/m (w)"}, 190 "FSTP ST(i)": {"ST(i) (w)"}, 191 "FSTCW m2byte": {"ModRM:r/m (w)"}, 192 "FNSTCW m2byte": {"ModRM:r/m (w)"}, 193 "FSTENV m14/28byte": {"ModRM:r/m (w)"}, 194 "FNSTENV m14/28byte": {"ModRM:r/m (w)"}, 195 "FSTSW m2byte": {"ModRM:r/m (w)"}, 196 "FSTSW AX": {"AX (w)"}, 197 "FNSTSW m2byte": {"ModRM:r/m (w)"}, 198 "FNSTSW AX": {"AX (w)"}, 199 "FSUB m32fp": {"ModRM:r/m (r)"}, 200 "FSUB m64fp": {"ModRM:r/m (r)"}, 201 "FSUB ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 202 "FSUB ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 203 "FSUBP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 204 "FISUB m16int": {"ModRM:r/m (r)"}, 205 "FISUB m32int": {"ModRM:r/m (r)"}, 206 "FSUBR m32fp": {"ModRM:r/m (r)"}, 207 "FSUBR m64fp": {"ModRM:r/m (r)"}, 208 "FSUBR ST(0), ST(i)": {"ST(0) (r, w)", "ST(i) (r)"}, 209 "FSUBR ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 210 "FSUBRP ST(i), ST(0)": {"ST(i) (r, w)", "ST(0) (r)"}, 211 "FISUBR m16int": {"ModRM:r/m (r)"}, 212 "FISUBR m32int": {"ModRM:r/m (r)"}, 213 "FISUBR m64int": {"ModRM:r/m (r)"}, 214 "FUCOM ST(i)": {"ST(i) (r)"}, 215 "FUCOMP ST(i)": {"ST(i) (r)"}, 216 "FXCH ST(i)": {"ST(i) (r, w)"}, 217 "POP DS": {"DS (w)"}, 218 "POP ES": {"ES (w)"}, 219 "POP FS": {"FS (w)"}, 220 "POP GS": {"GS (w)"}, 221 "POP SS": {"SS (w)"}, 222 "POP CS": {"CS (w)"}, 223 "PUSH CS": {"CS (r)"}, 224 "PUSH DS": {"DS (r)"}, 225 "PUSH ES": {"ES (r)"}, 226 "PUSH FS": {"FS (r)"}, 227 "PUSH GS": {"GS (r)"}, 228 "PUSH SS": {"SS (r)"}, 229 "INT 3": {"3 (r)"}, 230 231 // In manual but hard to parse 232 "BNDLDX bnd, mib": {"ModRM:reg (w)", "ModRM:r/m (r)"}, 233 "BNDSTX mib, bnd": {"ModRM:r/m (r)", "ModRM:reg (r)"}, 234 235 // In manual but wrong 236 "CALL rel16": {"Offset"}, 237 "CALL rel32": {"Offset"}, 238 "IN AL, imm8": {"AL (w)", "imm8 (r)"}, 239 "IN AX, imm8": {"AX (w)", "imm8 (r)"}, 240 "IN EAX, imm8": {"EAX (w)", "imm8 (r)"}, 241 "IN AL, DX": {"AL (w)", "DX (r)"}, 242 "IN AX, DX": {"AX (w)", "DX (r)"}, 243 "IN EAX, DX": {"EAX (w)", "DX (r)"}, 244 "OUT DX, AL": {"DX (r)", "AL (r)"}, 245 "OUT DX, AX": {"DX (r)", "AX (r)"}, 246 "OUT DX, EAX": {"DX (r)", "EAX (r)"}, 247 "OUT imm8, AL": {"imm8 (r)", "AL (r)"}, 248 "OUT imm8, AX": {"imm8 (r)", "AX (r)"}, 249 "OUT imm8, EAX": {"imm8 (r)", "EAX (r)"}, 250 "XCHG AX, r16": {"AX (r, w)", "opcode + rd (r, w)"}, 251 "XCHG EAX, r32": {"EAX (r, w)", "opcode + rd (r, w)"}, 252 "XCHG RAX, r64": {"RAX (r, w)", "opcode + rd (r, w)"}, 253 254 // Encoding not listed. 255 "INVEPT r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 256 "INVEPT r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 257 "INVVPID r32, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 258 "INVVPID r64, m128": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 259 "VMREAD r/m32, r32": {"ModRM:r/m (w)", "ModRM:reg (r)"}, 260 "VMREAD r/m64, r64": {"ModRM:r/m (w)", "ModRM:reg (r)"}, 261 "VMWRITE r32, r/m32": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 262 "VMWRITE r64, r/m64": {"ModRM:reg (r)", "ModRM:r/m (r)"}, 263 "VMCLEAR m64": {"ModRM:r/m (w)"}, 264 "VMPTRLD m64": {"ModRM:r/m (r)"}, 265 "VMPTRST m64": {"ModRM:r/m (w)"}, 266 "VMXON m64": {"ModRM:r/m (r)"}, 267 } 268 269 // opAction lists the read/write actions for individual opcodes, 270 // where the manual does not. 271 var opAction = map[string][]string{ 272 "ADC": {"rw", "r"}, 273 "ADD": {"rw", "r"}, 274 "AND": {"rw", "r"}, 275 "BLENDVPD": {"rw", "r", "r"}, 276 "BLENDVPS": {"rw", "r", "r"}, 277 "IN": {"w", "r"}, 278 "MOV": {"w", "r"}, 279 "OR": {"rw", "r"}, 280 "OUT": {"r", "r"}, 281 "PBLENDVB": {"rw", "r", "r"}, 282 "RCL": {"rw", "r"}, 283 "RCR": {"rw", "r"}, 284 "ROL": {"rw", "r"}, 285 "ROR": {"rw", "r"}, 286 "SAL": {"rw", "r"}, 287 "SAR": {"rw", "r"}, 288 "SBB": {"rw", "r"}, 289 "SHL": {"rw", "r"}, 290 "SHLD": {"rw", "r", "r"}, 291 "SHR": {"rw", "r"}, 292 "SHRD": {"rw", "r", "r"}, 293 "SUB": {"rw", "r", "r"}, 294 "TEST": {"r", "r"}, 295 "VBLENDVPD": {"rw", "r", "r"}, 296 "VBLENDVPS": {"rw", "r", "r"}, 297 "VPBLENDVB": {"rw", "r", "r"}, 298 "VPMASKMOVD": {"w", "r", "r"}, 299 "VPMASKMOVQ": {"w", "r", "r"}, 300 "VPSLLVD": {"w", "r", "r"}, 301 "VPSRAVD": {"w", "r", "r"}, 302 "VPSRLVD": {"w", "r", "r"}, 303 "VPSRLVQ": {"w", "r", "r"}, 304 "VINSERTI128": {"w", "r", "r"}, 305 "VPBLENDD": {"w", "r", "r"}, 306 "VPERMD": {"w", "r", "r"}, 307 "VPERMPS": {"w", "r", "r"}, 308 "VPERM2I128": {"w", "r", "r"}, 309 "VPSLLVQ": {"w", "r", "r"}, 310 "XCHG": {"rw", "rw"}, 311 "XOR": {"rw", "r"}, 312 } 313 314 // encodeOK lists valid arg, encoding pairs. 315 // Any pair not listed gets a warning. 316 var encodeOK = map[[2]string]bool{ 317 {"0", "imm8"}: true, 318 {"1", "1"}: true, 319 {"1", "imm8"}: true, 320 {"<XMM0>", "<XMM0>"}: true, 321 {"<XMM0>", "implicit XMM0"}: true, 322 {"AL", "AL"}: true, 323 {"AL", "AL/AX/EAX/RAX"}: true, 324 {"AX", "AL/AX/EAX/RAX"}: true, 325 {"AX", "AX"}: true, 326 {"AX", "AX/EAX/RAX"}: true, 327 {"CL", "CL"}: true, 328 {"CR0-CR7", "ModRM:reg"}: true, 329 {"CR8", ""}: true, 330 {"CS", "CS"}: true, 331 {"DR0-DR7", "ModRM:reg"}: true, 332 {"DS", "DS"}: true, 333 {"DX", "DX"}: true, 334 {"EAX", "AL/AX/EAX/RAX"}: true, 335 {"EAX", "AX/EAX/RAX"}: true, 336 {"EAX", "EAX"}: true, 337 {"ES", "ES"}: true, 338 {"FS", "FS"}: true, 339 {"GS", "GS"}: true, 340 {"RAX", "AL/AX/EAX/RAX"}: true, 341 {"RAX", "AX/EAX/RAX"}: true, 342 {"RAX", "RAX"}: true, 343 {"ST", "ST(0)"}: true, 344 {"ST(0)", "ST(0)"}: true, 345 {"ST(i)", "ST(i)"}: true, 346 {"Sreg", "ModRM:reg"}: true, 347 {"bnd1", "ModRM:reg"}: true, 348 {"bnd2/m128", "ModRM:r/m"}: true, 349 {"bnd2/m64", "ModRM:r/m"}: true, 350 {"imm16", "imm16"}: true, 351 {"imm16", "imm8"}: true, 352 {"imm16", "imm8/16/32"}: true, 353 {"imm16", "imm8/16/32"}: true, 354 {"imm16", "imm8/16/32/64"}: true, 355 {"imm16", "iw"}: true, 356 {"imm32", "imm8"}: true, 357 {"imm32", "imm8/16/32"}: true, 358 {"imm32", "imm8/16/32"}: true, 359 {"imm32", "imm8/16/32/64"}: true, 360 {"imm64", "imm8/16/32/64"}: true, 361 {"imm8", "imm8"}: true, 362 {"imm8", "imm8/16/32"}: true, 363 {"imm8", "imm8/16/32"}: true, 364 {"imm8", "imm8/16/32/64"}: true, 365 {"imm8", "imm8[3:0]"}: true, 366 {"m", "ModRM:r/m"}: true, 367 {"m128", "ModRM:r/m"}: true, 368 {"m14/28byte", "ModRM:r/m"}: true, 369 {"m16", "ModRM:r/m"}: true, 370 {"m16&16", "ModRM:r/m"}: true, 371 {"m16&32", "ModRM:r/m"}: true, 372 {"m16&64", "ModRM:r/m"}: true, 373 {"m16:16", "ModRM:r/m"}: true, 374 {"m16:16", "Offset"}: true, 375 {"m16:32", "ModRM:r/m"}: true, 376 {"m16:32", "Offset"}: true, 377 {"m16:64", "ModRM:r/m"}: true, 378 {"m16:64", "Offset"}: true, 379 {"m16int", "ModRM:r/m"}: true, 380 {"m256", "ModRM:r/m"}: true, 381 {"m2byte", "ModRM:r/m"}: true, 382 {"m32", "ModRM:r/m"}: true, 383 {"m32&32", "ModRM:r/m"}: true, 384 {"m32fp", "ModRM:r/m"}: true, 385 {"m32int", "ModRM:r/m"}: true, 386 {"m512byte", "ModRM:r/m"}: true, 387 {"m64", "ModRM:r/m"}: true, 388 {"m64fp", "ModRM:r/m"}: true, 389 {"m64int", "ModRM:r/m"}: true, 390 {"m8", "ModRM:r/m"}: true, 391 {"m80bcd", "ModRM:r/m"}: true, 392 {"m80dec", "ModRM:r/m"}: true, 393 {"m80fp", "ModRM:r/m"}: true, 394 {"m94/108byte", "ModRM:r/m"}: true, 395 {"mem", "ModRM:r/m"}: true, 396 {"mib", "ModRM:r/m"}: true, 397 {"mm/m32", "ModRM:r/m"}: true, 398 {"mm1", "ModRM:reg"}: true, 399 {"mm2", "ModRM:r/m"}: true, 400 {"mm2/m32", "ModRM:r/m"}: true, 401 {"mm2/m64", "ModRM:r/m"}: true, 402 {"moffs16", "Moffs"}: true, 403 {"moffs32", "Moffs"}: true, 404 {"moffs64", "Moffs"}: true, 405 {"moffs8", "Moffs"}: true, 406 {"ptr16:16", "Offset"}: true, 407 {"ptr16:32", "Offset"}: true, 408 {"r/m16", "ModRM:r/m"}: true, 409 {"r/m32", "ModRM:r/m"}: true, 410 {"r/m64", "ModRM:r/m"}: true, 411 {"r/m8", "ModRM:r/m"}: true, 412 {"r16", "ModRM:reg"}: true, 413 {"r16op", "opcode + rd"}: true, 414 {"r32", "ModRM:reg"}: true, 415 {"r32", "VEX.vvvv"}: true, 416 {"r32/m16", "ModRM:r/m"}: true, 417 {"r32/m8", "ModRM:r/m"}: true, 418 {"r32V", "VEX.vvvv"}: true, 419 {"r32op", "opcode + rd"}: true, 420 {"r64", "ModRM:reg"}: true, 421 {"r64/m16", "ModRM:r/m"}: true, 422 {"r64V", "VEX.vvvv"}: true, 423 {"r64op", "opcode + rd"}: true, 424 {"r8", "ModRM:reg"}: true, 425 {"r8op", "opcode + rd"}: true, 426 {"rel16", "Offset"}: true, 427 {"rel32", "Offset"}: true, 428 {"rel8", "Offset"}: true, 429 {"rmr16", "ModRM:r/m"}: true, 430 {"rmr32", "ModRM:r/m"}: true, 431 {"rmr64", "ModRM:r/m"}: true, 432 {"xmm/m128", "ModRM:r/m"}: true, 433 {"xmm/m32", "ModRM:r/m"}: true, 434 {"xmm1", "ModRM:reg"}: true, 435 {"xmm2", "ModRM:r/m"}: true, 436 {"xmm2/m128", "ModRM:r/m"}: true, 437 {"xmm2/m16", "ModRM:r/m"}: true, 438 {"xmm2/m32", "ModRM:r/m"}: true, 439 {"xmm2/m64", "ModRM:r/m"}: true, 440 {"xmm2/m8", "ModRM:r/m"}: true, 441 {"xmmIH", "imm8[7:4]"}: true, 442 {"xmmV", "VEX.vvvv"}: true, 443 {"ymm1", "ModRM:reg"}: true, 444 {"ymm2", "ModRM:r/m"}: true, 445 {"ymm2/m256", "ModRM:r/m"}: true, 446 {"ymmIH", "imm8[7:4]"}: true, 447 {"ymmV", "VEX.vvvv"}: true, 448 {"vm32x", "vsib"}: true, 449 {"vm64x", "vsib"}: true, 450 {"vm32y", "vsib"}: true, 451 {"vm64y", "vsib"}: true, 452 {"SS", "SS"}: true, 453 {"3", "3"}: true, 454 } 455 456 // instBlacklist lists the instruction syntaxes to ignore when parsing. 457 // We exclude Intel's general forms for these not-actually-general instructions. 458 // The syntax makes it look like arbitrary memory operands can be used when in fact 459 // the exact address is fixed in all cases - [DI] or [SI], for example 460 var instBlacklist = map[string]bool{ 461 "CMPS m16, m16": true, 462 "CMPS m32, m32": true, 463 "CMPS m64, m64": true, 464 "CMPS m8, m8": true, 465 "INS m16, DX": true, 466 "INS m32, DX": true, 467 "INS m8, DX": true, 468 "LODS m16": true, 469 "LODS m32": true, 470 "LODS m64": true, 471 "LODS m8": true, 472 "MOVS m16, m16": true, 473 "MOVS m32, m32": true, 474 "MOVS m64, m64": true, 475 "MOVS m8, m8": true, 476 "OUTS DX, m16": true, 477 "OUTS DX, m32": true, 478 "OUTS DX, m8": true, 479 "REP INS m16, DX": true, 480 "REP INS m32, DX": true, 481 "REP INS m8, DX": true, 482 "REP INS r/m32, DX": true, 483 "REP LODS AL": true, 484 "REP LODS AX": true, 485 "REP LODS EAX": true, 486 "REP LODS RAX": true, 487 "REP MOVS m16, m16": true, 488 "REP MOVS m32, m32": true, 489 "REP MOVS m64, m64": true, 490 "REP MOVS m8, m8": true, 491 "REP OUTS DX, m16": true, 492 "REP OUTS DX, m32": true, 493 "REP OUTS DX, m8": true, 494 "REP OUTS DX, r/m16": true, 495 "REP OUTS DX, r/m32": true, 496 "REP OUTS DX, r/m8": true, 497 "REP STOS m16": true, 498 "REP STOS m32": true, 499 "REP STOS m64": true, 500 "REP STOS m8": true, 501 "REPE CMPS m16, m16": true, 502 "REPE CMPS m32, m32": true, 503 "REPE CMPS m64, m64": true, 504 "REPE CMPS m8, m8": true, 505 "REPE SCAS m16": true, 506 "REPE SCAS m32": true, 507 "REPE SCAS m64": true, 508 "REPE SCAS m8": true, 509 "REPNE CMPS m16, m16": true, 510 "REPNE CMPS m32, m32": true, 511 "REPNE CMPS m64, m64": true, 512 "REPNE CMPS m8, m8": true, 513 "REPNE SCAS m16": true, 514 "REPNE SCAS m32": true, 515 "REPNE SCAS m64": true, 516 "REPNE SCAS m8": true, 517 "SCAS m16": true, 518 "SCAS m32": true, 519 "SCAS m64": true, 520 "SCAS m8": true, 521 "STOS m16": true, 522 "STOS m32": true, 523 "STOS m64": true, 524 "STOS m8": true, 525 "XLAT m8": true, 526 } 527 528 // condPrefs lists preferences for condition code suffixes. 529 // The first suffix in each pair takes priority over the second. 530 var condPrefs = [][2]string{ 531 {"B", "C"}, 532 {"B", "NAE"}, 533 {"AE", "NB"}, 534 {"AE", "NC"}, 535 {"E", "Z"}, 536 {"NE", "NZ"}, 537 {"BE", "NA"}, 538 {"A", "NBE"}, 539 {"P", "PE"}, 540 {"NP", "PO"}, 541 {"L", "NGE"}, 542 {"GE", "NL"}, 543 {"LE", "NG"}, 544 {"G", "NLE"}, 545 } 546 547 // conv16 specifies replacements to turn a 16-bit syntax into a 32-bit syntax. 548 // If the conv16 can be applied to one form to create a new form with the same 549 // fixed instruction prefix, the pair is tagged as operand16 and operand32 550 // respectively. 551 var conv16 = strings.NewReplacer( 552 "16:16", "16:32", 553 "16", "32", 554 "AX", "EAX", 555 "CBW", "CWDE", 556 "CMPSW", "CMPSD", 557 "CWD", "CDQ", 558 "INSW", "INSD", 559 "IRET", "IRETD", 560 "LODSW", "LODSD", 561 "MOVSW", "MOVSD", 562 "OUTSW", "OUTSD", 563 "POPA", "POPAD", 564 "POPF", "POPFD", 565 "PUSHA", "PUSHAD", 566 "PUSHF", "PUSHFD", 567 "SCASW", "SCASD", 568 "STOSW", "STOSD", 569 ) 570 571 // fixup records additional modifications needed that are not derived 572 // from the instructions in the manual. It is keyed by the syntax and opcode. 573 var fixup = map[[2]string][]fixer{ 574 // NOP is a very special case overloading XCHG AX, AX. 575 // The decoder handles it in custom code; exclude from the usual tables. 576 {"NOP", "90"}: {fixAddTag("pseudo")}, 577 578 // PAUSE is a special case of NOP. 579 {"PAUSE", "F3 90"}: {fixAddTag("pseudo")}, // used to add 'keepop' tag but not sure what that means 580 581 // Far CALL, JMP, RET are given L prefix (long) for disambiguation. 582 {"CALL m16:16", "FF /3"}: {fixRename("CALL_FAR")}, 583 {"CALL m16:32", "FF /3"}: {fixRename("CALL_FAR")}, 584 {"CALL m16:64", "REX.W FF /3"}: {fixRename("CALL_FAR")}, 585 {"CALL ptr16:16", "9A cd"}: {fixRename("CALL_FAR")}, 586 {"CALL ptr16:32", "9A cp"}: {fixRename("CALL_FAR")}, 587 {"JMP m16:16", "FF /5"}: {fixRename("JMP_FAR")}, 588 {"JMP m16:32", "FF /5"}: {fixRename("JMP_FAR")}, 589 {"JMP m16:64", "REX.W FF /5"}: {fixRename("JMP_FAR")}, 590 {"JMP ptr16:16", "EA cd"}: {fixRename("JMP_FAR")}, 591 {"JMP ptr16:32", "EA cp"}: {fixRename("JMP_FAR")}, 592 {"RET imm16", "CA iw"}: {fixRename("RET_FAR"), fixArg(0, "imm16u")}, 593 {"RET", "CB"}: {fixRename("RET_FAR")}, 594 595 // Unsigned immediates. (RET far imm16 handled above.) 596 // Some of these are just preferences for disassembling. 597 {"ENTER imm16, imm8", "C8 iw ib"}: {fixArg(1, "imm8b")}, 598 {"RET imm16", "C2 iw"}: {fixArg(0, "imm16u")}, 599 {"IN AL, imm8", "E4 ib"}: {fixArg(1, "imm8u")}, 600 {"IN AX, imm8", "E5 ib"}: {fixArg(1, "imm8u")}, 601 {"IN EAX, imm8", "E5 ib"}: {fixArg(1, "imm8u"), fixAddTag("operand64")}, 602 {"OUT imm8, AL", "E6 ib"}: {fixArg(0, "imm8u")}, 603 {"OUT imm8, AX", "E7 ib"}: {fixArg(0, "imm8u")}, 604 {"OUT imm8, EAX", "E7 ib"}: {fixArg(0, "imm8u"), fixAddTag("operand64")}, 605 {"MOV r8op, imm8", "B0+rb ib"}: {fixArg(1, "imm8u")}, 606 {"MOV r8op, imm8", "REX B0+rb ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, 607 {"MOV r/m8, imm8", "C6 /0 ib"}: {fixArg(1, "imm8u")}, 608 {"MOV r/m8, imm8", "REX C6 /0 ib"}: {fixArg(1, "imm8u"), fixAddTag("pseudo64")}, 609 610 // The listings for MOVSX and MOVSXD do not list some variants that 611 // assemblers seem to allow. 612 // As a result, this instruction got the wrong tag. 613 // The other instructions are listed in extraInsts. 614 {"MOVSX r32, r/m16", "0F BF /r"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, 615 {"MOVZX r32, r/m16", "0F B7 /r"}: {fixRemoveTag("operand16")}, 616 617 // Listings are incomplete or incorrect. Fix tags to adjust for new instructions below. 618 {"SLDT r/m16", "0F 00 /0"}: {fixRemoveTag("operand32")}, 619 {"STR r/m16", "0F 00 /1"}: {fixAddTag("operand16")}, 620 {"BSWAP r32op", "0F C8+rd"}: {fixRemoveTag("operand16")}, 621 {"MOV Sreg, r/m16", "8E /r"}: {fixRemoveTag("operand32")}, 622 {"MOV Sreg, r/m64", "REX.W 8E /r"}: {fixArg(1, "r/m16")}, 623 {"MOV r/m64, Sreg", "REX.W 8C /r"}: {fixArg(0, "r/m16")}, 624 {"MOV r/m16, Sreg", "8C /r"}: {fixRemoveTag("operand32")}, 625 {"MOV r/m64, imm32", "REX.W C7 /0 io"}: {fixOpcode("REX.W C7 /0 id")}, 626 627 // On 64-bit, these ignore 64-bit mode change. 628 {"POP FS", "0F A1"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, 629 {"POP GS", "0F A9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, 630 {"LEAVE", "C9"}: {fixIfValid("N.E.", "V", fixAddTag("operand64"))}, 631 632 {"IN EAX, DX", "ED"}: {fixAddTag("operand64")}, 633 {"INSD", "6D"}: {fixAddTag("operand64")}, 634 {"OUT DX, EAX", "EF"}: {fixAddTag("operand64")}, 635 {"OUTSD", "6F"}: {fixAddTag("operand64")}, 636 {"XBEGIN rel32", "C7 F8 cd"}: {fixAddTag("operand64")}, 637 638 // Treat FWAIT, not WAIT, as canonical. 639 {"FWAIT", "9B"}: {fixRemoveTag("pseudo")}, 640 {"WAIT", "9B"}: {fixAddTag("pseudo")}, 641 642 // LAHF and SAHF are listed as "Invalid*" for 64-bit mode. 643 // They are actually defined, so Valid from our point of view. 644 // It's just that only a very few 64-bit processors allowed them. 645 {"LAHF", "9F"}: {fixValid("V", "V")}, 646 {"SAHF", "9E"}: {fixValid("V", "V")}, 647 648 // The JZ forms are listed twice in the table, which confuses things. 649 {"JZ rel16", "0F 84 cw"}: {fixAddTag("operand16"), fixRemoveTag("operand32")}, 650 {"JZ rel32", "0F 84 cd"}: {fixAddTag("operand32"), fixRemoveTag("operand16")}, 651 652 // XCHG has two of every instruction, which makes things bad. 653 // The XX hack below takes care of most problems but this one remains. 654 {"XCHG r/m16, r16", "87 /r"}: {fixRemoveTag("pseudo")}, 655 656 // MOV CR8 is just the obvious extension of the MOV CR0-CR7 form. 657 {"MOV rmr64, CR8", "REX.R + 0F 20 /0"}: {fixAddTag("pseudo")}, 658 {"MOV CR8, rmr64", "REX.R + 0F 22 /0"}: {fixAddTag("pseudo")}, 659 660 // TODO: EXPLAIN ALL THESE 661 {"ADCX r32, r/m32", "66 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, 662 {"ADOX r32, r/m32", "F3 0F 38 F6 /r"}: {fixAddTag("operand16"), fixAddTag("operand32")}, 663 {"POPFQ", "9D"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 664 {"PUSHFQ", "9C"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 665 {"JCXZ rel8", "E3 cb"}: {fixAddTag("address16")}, 666 {"JECXZ rel8", "E3 cb"}: {fixAddTag("address32")}, 667 {"JRCXZ rel8", "E3 cb"}: {fixAddTag("address64")}, 668 {"PUSH r64op", "50+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 669 {"PUSH r/m64", "FF /6"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 670 {"POP r64op", "58+rd"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 671 {"POP r/m64", "8F /0"}: {fixAddTag("operand32"), fixAddTag("operand64")}, 672 {"SMSW r/m16", "0F 01 /4"}: {fixAddTag("operand16")}, 673 {"SMSW r32/m16", "0F 01 /4"}: {fixRemoveTag("operand16"), fixAddTag("operand32")}, 674 675 // Express to the decoder that the rel16 only applies in 16-bit operand mode. 676 {"JA rel16", "0F 87 cw"}: {fixAddTag("operand16")}, 677 {"JAE rel16", "0F 83 cw"}: {fixAddTag("operand16")}, 678 {"JB rel16", "0F 82 cw"}: {fixAddTag("operand16")}, 679 {"JBE rel16", "0F 86 cw"}: {fixAddTag("operand16")}, 680 {"JE rel16", "0F 84 cw"}: {fixAddTag("operand16")}, 681 {"JG rel16", "0F 8F cw"}: {fixAddTag("operand16")}, 682 {"JGE rel16", "0F 8D cw"}: {fixAddTag("operand16")}, 683 {"JL rel16", "0F 8C cw"}: {fixAddTag("operand16")}, 684 {"JLE rel16", "0F 8E cw"}: {fixAddTag("operand16")}, 685 {"JNE rel16", "0F 85 cw"}: {fixAddTag("operand16")}, 686 {"JNO rel16", "0F 81 cw"}: {fixAddTag("operand16")}, 687 {"JNP rel16", "0F 8B cw"}: {fixAddTag("operand16")}, 688 {"JNS rel16", "0F 89 cw"}: {fixAddTag("operand16")}, 689 {"JO rel16", "0F 80 cw"}: {fixAddTag("operand16")}, 690 {"JP rel16", "0F 8A cw"}: {fixAddTag("operand16")}, 691 {"JS rel16", "0F 88 cw"}: {fixAddTag("operand16")}, 692 693 {"JA rel32", "0F 87 cd"}: {fixAddTag("operand32")}, 694 {"JAE rel32", "0F 83 cd"}: {fixAddTag("operand32")}, 695 {"JB rel32", "0F 82 cd"}: {fixAddTag("operand32")}, 696 {"JBE rel32", "0F 86 cd"}: {fixAddTag("operand32")}, 697 {"JE rel32", "0F 84 cd"}: {fixAddTag("operand32")}, 698 {"JG rel32", "0F 8F cd"}: {fixAddTag("operand32")}, 699 {"JGE rel32", "0F 8D cd"}: {fixAddTag("operand32")}, 700 {"JL rel32", "0F 8C cd"}: {fixAddTag("operand32")}, 701 {"JLE rel32", "0F 8E cd"}: {fixAddTag("operand32")}, 702 {"JNE rel32", "0F 85 cd"}: {fixAddTag("operand32")}, 703 {"JNO rel32", "0F 81 cd"}: {fixAddTag("operand32")}, 704 {"JNP rel32", "0F 8B cd"}: {fixAddTag("operand32")}, 705 {"JNS rel32", "0F 89 cd"}: {fixAddTag("operand32")}, 706 {"JO rel32", "0F 80 cd"}: {fixAddTag("operand32")}, 707 {"JP rel32", "0F 8A cd"}: {fixAddTag("operand32")}, 708 {"JS rel32", "0F 88 cd"}: {fixAddTag("operand32")}, 709 710 {"LSL r16, r/m16", "0F 03 /r"}: {fixAddTag("operand16")}, 711 } 712 713 var extraInsts = []*instruction{ 714 // Undocumented. 715 {syntax: "ICEBP", opcode: "F1", valid32: "V", valid64: "V"}, 716 {syntax: "UD1", opcode: "0F B9", valid32: "V", valid64: "V"}, 717 {syntax: "FFREEP ST(i)", opcode: "DF C0+i", valid32: "V", valid64: "V", action: "w"}, 718 719 // Where did these come from? They were in version 0.01 of the csv table. 720 {syntax: "MOVNTSD m64, xmm1", opcode: "F2 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, 721 {syntax: "MOVNTSS m32, xmm1", opcode: "F3 0F 2B /r", valid32: "V", valid64: "V", cpuid: "SSE", action: "w,r"}, 722 723 // These express to the decoder that in 64-bit mode 724 // an operand prefix does not affect the size of the relative offset. 725 {syntax: "CALL rel32", opcode: "E8 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 726 {syntax: "JMP rel32", opcode: "E9 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 727 {syntax: "JA rel32", opcode: "0F 87 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 728 {syntax: "JAE rel32", opcode: "0F 83 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 729 {syntax: "JB rel32", opcode: "0F 82 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 730 {syntax: "JBE rel32", opcode: "0F 86 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 731 {syntax: "JE rel32", opcode: "0F 84 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 732 {syntax: "JG rel32", opcode: "0F 8F cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 733 {syntax: "JGE rel32", opcode: "0F 8D cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 734 {syntax: "JL rel32", opcode: "0F 8C cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 735 {syntax: "JLE rel32", opcode: "0F 8E cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 736 {syntax: "JNE rel32", opcode: "0F 85 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 737 {syntax: "JNO rel32", opcode: "0F 81 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 738 {syntax: "JNP rel32", opcode: "0F 8B cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 739 {syntax: "JNS rel32", opcode: "0F 89 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 740 {syntax: "JO rel32", opcode: "0F 80 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 741 {syntax: "JP rel32", opcode: "0F 8A cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 742 {syntax: "JS rel32", opcode: "0F 88 cd", valid32: "N.S.", valid64: "V", tags: []string{"operand16", "operand64"}, action: "r"}, 743 744 // Disassemblers recognize these, but they're not in the manual. 745 // Not sure if they really exist. 746 747 // The 16-16 and 32-32 forms don't really make sense since there's nothing to extend. 748 {syntax: "MOVSX r16, r/m16", opcode: "0F BF /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, 749 {syntax: "MOVSXD r16, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, 750 {syntax: "MOVSXD r32, r/m32", opcode: "63 /r", valid32: "N.E.", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, 751 {syntax: "MOVZX r16, r/m16", opcode: "0F B7 /r", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "w,r"}, 752 753 {syntax: "LAR r64, r/m16", opcode: "REX.W 0F 02 /r", valid32: "N.E.", valid64: "V", action: "w,r"}, 754 {syntax: "SLDT r32/m16", opcode: "0F 00 /0", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, 755 {syntax: "STR r32/m16", opcode: "0F 00 /1", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w"}, 756 {syntax: "STR r64/m16", opcode: "REX.W 0F 00 /1", valid32: "N.E.", valid64: "V", action: "w"}, 757 758 {syntax: "BSWAP r16op", opcode: "0F C8+rd", valid32: "V", valid64: "V", tags: []string{"operand16"}, action: "rw"}, 759 760 // Do these exist? 761 // I am not sure where they came from, and xed doesn't recognize them. 762 //{syntax: "MOV TR0-TR7, rmr32", opcode: "0F 26 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, 763 //{syntax: "MOV TR0-TR7, rmr64", opcode: "0F 26 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, 764 //{syntax: "MOV rmr32, TR0-TR7", opcode: "0F 24 /r", valid32: "V", valid64: "N.E.", tags: []string{"modrm_regonly"}, action: "w,r"}, 765 //{syntax: "MOV rmr64, TR0-TR7", opcode: "0F 24 /r", valid32: "N.E.", valid64: "V", tags: []string{"modrm_regonly"}, action: "w,r"}, 766 {syntax: "MOV Sreg, r32/m16", opcode: "8E /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, 767 {syntax: "MOV r/m32, Sreg", opcode: "8C /r", valid32: "V", valid64: "V", tags: []string{"operand32"}, action: "w,r"}, 768 } 769 770 type fixer func(*instruction) 771 772 func fixAddTag(tag string) fixer { 773 return func(inst *instruction) { 774 addTag(inst, tag) 775 } 776 } 777 778 func fixRemoveTag(tag string) fixer { 779 return func(inst *instruction) { 780 removeTag(inst, tag) 781 } 782 } 783 784 func fixRename(op string) fixer { 785 return func(inst *instruction) { 786 _, args := splitSyntax(inst.syntax) 787 inst.syntax = joinSyntax(op, args) 788 } 789 } 790 791 func fixArg(i int, arg string) fixer { 792 return func(inst *instruction) { 793 op, args := splitSyntax(inst.syntax) 794 args[i] = arg 795 inst.syntax = joinSyntax(op, args) 796 } 797 } 798 799 func fixIfValid(valid32, valid64 string, fix fixer) fixer { 800 return func(inst *instruction) { 801 if inst.valid32 == valid32 && inst.valid64 == valid64 { 802 fix(inst) 803 } 804 } 805 } 806 807 func fixValid(valid32, valid64 string) fixer { 808 return func(inst *instruction) { 809 inst.valid32 = valid32 810 inst.valid64 = valid64 811 } 812 } 813 814 func fixOpcode(opcode string) fixer { 815 return func(inst *instruction) { 816 inst.opcode = opcode 817 } 818 } 819 820 func cleanup(insts []*instruction) []*instruction { 821 var haveOp map[string]bool 822 if onlySomePages { 823 haveOp = map[string]bool{} 824 } 825 826 // Clean individual instruction encodings and opcode sequences. 827 sawJZ := map[string]bool{} 828 out := insts[:0] 829 for seq, inst := range insts { 830 inst.seq = seq 831 832 // There are two copies each of JZ rel16 and JZ rel32. Delete the second. 833 if strings.HasPrefix(inst.syntax, "JZ rel") { 834 if sawJZ[inst.syntax] { 835 continue 836 } 837 sawJZ[inst.syntax] = true 838 } 839 out = append(out, inst) 840 841 // Intel CMPXCHG16B and CMPXCHG8B have surprise "m64" or " m128" at end of encoding. 842 surprises := []string{ 843 " m64", 844 " m128", 845 } 846 for _, s := range surprises { 847 if strings.HasSuffix(inst.syntax, s) && strings.HasSuffix(inst.opcode, s) { 848 inst.opcode = strings.TrimSuffix(inst.opcode, s) 849 } 850 } 851 852 op, args := splitSyntax(inst.syntax) 853 op = strings.TrimRight(op, "*") 854 inst.syntax = joinSyntax(op, args) 855 856 // Check argument names in syntax against encoding details. 857 if enc, ok := encodings[inst.syntax]; ok { 858 inst.args = enc 859 } 860 if len(args) == len(inst.args)+1 && args[len(args)-1] == "imm8" { 861 fixed := make([]string, len(args)) 862 copy(fixed, inst.args) 863 fixed[len(args)-1] = "imm8" 864 inst.args = fixed 865 } else if len(args) == 0 && len(inst.args) == 1 && inst.args[0] == "NA" { 866 inst.args = []string{} 867 } else if len(args) != len(inst.args) { 868 fmt.Fprintf(os.Stderr, "p.%d: %s has %d args but %d encoding details:\n\t%s\n", inst.page, inst.syntax, len(args), len(inst.args), strings.Join(inst.args, "; ")) 869 inst.syntax = joinSyntax(op, args) 870 continue 871 } 872 873 var action []string 874 for i, arg := range args { 875 arg = strings.TrimSpace(arg) 876 arg = strings.TrimRight(arg, "*") 877 if (arg == "reg" || strings.HasPrefix(arg, "reg/")) && containsAll(inst.desc, "upper bits", "r64", "zero") { 878 arg = "r32" + strings.TrimPrefix(arg, "reg") 879 } 880 881 enc := inst.args[i] 882 enc = strings.TrimSpace(enc) 883 switch { 884 case strings.HasSuffix(enc, " (r))"): 885 enc = strings.TrimSuffix(enc, ")") 886 case strings.HasSuffix(enc, " (R)"): 887 enc = strings.TrimSuffix(enc, " (R)") + " (r)" 888 case strings.HasSuffix(enc, " (W)"): 889 enc = strings.TrimSuffix(enc, " (W)") + " (w)" 890 case strings.HasSuffix(enc, " (r,w)"): 891 enc = strings.TrimSuffix(enc, " (r,w)") + " (r, w)" 892 case enc == "Imm8": 893 enc = "imm8" 894 case enc == "imm8/26/32": 895 enc = "imm8/16/32" 896 case enc == "BaseReg (R): VSIB:base, VectorReg(R): VSIB:index": 897 enc = "vsib (r)" 898 } 899 inst.args[i] = enc 900 901 switch { 902 case strings.HasSuffix(enc, " (r)"): 903 action = append(action, "r") 904 enc = strings.TrimSuffix(enc, " (r)") 905 case strings.HasSuffix(enc, " (w)"): 906 action = append(action, "w") 907 enc = strings.TrimSuffix(enc, " (w)") 908 case strings.HasSuffix(enc, " (r, w)"): 909 action = append(action, "rw") 910 enc = strings.TrimSuffix(enc, " (r, w)") 911 case strings.HasPrefix(enc, "imm"), enc == "Offset", enc == "iw", arg == "1", arg == "0", arg == "3": 912 action = append(action, "r") 913 case i < len(opAction[op]): 914 action = append(action, opAction[op][i]) 915 default: 916 fmt.Fprintf(os.Stderr, "p.%d: %s has encoding %s for %s but no r/w annotations\n", inst.page, inst.syntax, enc, arg) 917 action = append(action, "?") 918 } 919 920 if arg == "mem" && op == "LDDQU" { 921 arg = "m128" 922 } 923 if arg == "reg" && op == "LAR" { 924 arg = "r32" 925 } 926 if actual := encodeReplace[[2]string{arg, enc}]; actual != "" { 927 arg = actual 928 } 929 930 if (arg == "r8" || arg == "r16" || arg == "r32" || arg == "r64") && enc == "ModRM:r/m" { 931 addTag(inst, "modrm_regonly") 932 arg = "rmr" + arg[1:] 933 } 934 if (arg == "xmm2" || arg == "ymm2") && enc == "ModRM:r/m" { 935 addTag(inst, "modrm_regonly") 936 } 937 938 if (arg == "m8" || arg == "m16" || arg == "m32" || arg == "m64" || arg == "m128" || arg == "m256") && enc == "ModRM:r/m" { 939 addTag(inst, "modrm_memonly") 940 } 941 942 if arg == "r64" && (inst.syntax == "MOV r64, CR8" || inst.syntax == "MOV CR8, r64") { 943 arg = "rmr64" 944 addTag(inst, "modrm_regonly") 945 } 946 if arg == "CR8" { 947 enc = "" 948 } 949 950 if !encodeOK[[2]string{arg, enc}] { 951 fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s for %s\n\t{%q, %q}: true,\n", inst.page, inst.syntax, enc, arg, arg, enc) 952 } 953 954 args[i] = arg 955 956 // Intel SETcc and others are missing the /r. 957 // But CALL rel16 and CALL rel32 have a bad encoding table so ignore the ModRM there. 958 if strings.HasPrefix(enc, "ModRM") && !strings.Contains(inst.opcode, " /") && op != "CALL" { 959 inst.opcode += " /r" 960 } 961 if strings.HasPrefix(enc, "ModRM:reg") && !strings.Contains(inst.opcode, "/r") { 962 // The opcode is taken up with something else. Bug in table. 963 fmt.Fprintf(os.Stderr, "p.%d: %s has invalid encoding %s: no reg field in %s\n", inst.page, inst.syntax, arg, inst.opcode) 964 } 965 // XBEGIN is missing cw cd. 966 if enc == "Offset" && arg == "rel16" && !strings.Contains(inst.opcode, " cw") { 967 inst.opcode += " cw" 968 } 969 if enc == "Offset" && arg == "rel32" && !strings.Contains(inst.opcode, " cd") { 970 inst.opcode += " cd" 971 } 972 if enc == "Moffs" && !strings.Contains(inst.opcode, "cm") { 973 inst.opcode += " cm" 974 } 975 976 inst.action = strings.Join(action, ",") 977 } 978 979 inst.syntax = joinSyntax(op, args) 980 981 // The Intel manual lists each XCHG form with arguments in both orders. 982 // While this is technically correct, it confuses lots of the analysis. 983 // Change half of them to start with a fake "XX" byte. 984 if op == "XCHG" && !strings.HasPrefix(args[0], "r/") && !strings.HasSuffix(args[0], "op") { 985 inst.opcode = "XX " + inst.opcode 986 } 987 988 // Intel manual is not great about disabling REX instructions on 32-bit systems. 989 if strings.Contains(inst.opcode, "REX") && inst.valid32 == "V" { 990 inst.valid32 = "N.E." 991 } 992 993 if inst.valid32 == "V" { 994 switch { 995 case containsAll(inst.compat, "not supported", "earlier than the Intel486"): 996 inst.cpuid = "486" 997 case containsAll(inst.compat, "not supported", "earlier than the Pentium"), 998 containsAll(inst.compat, "were introduced", "with the Pentium"): 999 inst.cpuid = "Pentium" 1000 case containsAll(inst.compat, "were introduced", "in the Pentium II"): 1001 inst.cpuid = "PentiumII" 1002 case containsAll(inst.compat, "were introduced", "in the P6 family"), 1003 containsAll(inst.compat, "were introduced in P6 family"): 1004 addTag(inst, "P6") 1005 } 1006 } 1007 1008 if onlySomePages { 1009 op, _ := splitSyntax(inst.syntax) 1010 haveOp[op] = true 1011 } 1012 } 1013 1014 insts = out 1015 sort.Sort(byOpcode(insts)) 1016 1017 // Detect operand size dependencies. 1018 var last *instruction 1019 for _, inst := range insts { 1020 if last != nil { 1021 f1, _ := splitOpcode(last.opcode) 1022 f2, _ := splitOpcode(inst.opcode) 1023 if f1 == f2 { 1024 // Conflict: cannot distinguish instructions based on fixed prefix. 1025 if is16vs32pair(last, inst) { 1026 addTag(last, "operand16") 1027 addTag(inst, "operand32") 1028 continue 1029 } 1030 if is16vs32pair(inst, last) { 1031 addTag(last, "operand32") 1032 addTag(inst, "operand16") 1033 last = inst 1034 continue 1035 } 1036 } 1037 } 1038 last = inst 1039 } 1040 1041 // Detect pseudo-ops, defined as opcode entries subsumed by more general ones. 1042 seen := map[string]*instruction{} 1043 for _, inst := range insts { 1044 if strings.HasPrefix(inst.opcode, "9B ") { // FWAIT prefix 1045 addTag(inst, "pseudo") 1046 continue 1047 } 1048 if inst.opcode == "F0" || inst.opcode == "F2" || inst.opcode == "F3" { 1049 addTag(inst, "pseudo") 1050 continue 1051 } 1052 if strings.HasPrefix(inst.syntax, "REP ") || strings.HasPrefix(inst.syntax, "REPE ") || strings.HasPrefix(inst.syntax, "REPNE ") { 1053 addTag(inst, "pseudo") 1054 continue 1055 } 1056 if strings.HasPrefix(inst.syntax, "SAL ") { // SHL is canonical 1057 addTag(inst, "pseudo") 1058 continue 1059 } 1060 if old := seen[inst.opcode]; old != nil { 1061 if condLess(old.syntax, inst.syntax) { 1062 addTag(inst, "pseudo") 1063 continue 1064 } 1065 if xchgLess(inst.syntax, old.syntax) { 1066 old.tags = append(old.tags, "pseudo") 1067 seen[inst.opcode] = inst 1068 continue 1069 } 1070 } 1071 1072 seen[inst.opcode] = inst 1073 1074 if last != nil && canGenerate(last.opcode, inst.opcode) { 1075 addTag(inst, "pseudo") 1076 continue 1077 } 1078 last = inst 1079 } 1080 for _, inst := range insts { 1081 if strings.Contains(inst.opcode, "REX ") { 1082 if old := seen[strings.Replace(inst.opcode, "REX ", "", 1)]; old != nil && old.syntax == inst.syntax { 1083 addTag(inst, "pseudo64") 1084 continue 1085 } else if old != nil && hasTag(old, "pseudo") { 1086 addTag(inst, "pseudo") 1087 continue 1088 } 1089 } 1090 if strings.Contains(inst.opcode, "REX.W ") { 1091 if old := seen[strings.Replace(inst.opcode, "REX.W ", "", -1)]; old != nil && old.syntax == inst.syntax { 1092 addTag(old, "ignoreREXW") 1093 addTag(inst, "pseudo") 1094 continue 1095 } else if old != nil && hasTag(old, "pseudo") { 1096 addTag(inst, "pseudo") 1097 continue 1098 } else if old != nil && !hasTag(old, "operand16") && !hasTag(old, "operand32") { 1099 // There is a 64-bit form of this instruction. 1100 // Mark this one as only valid in the non-64-bit operand modes. 1101 addTag(old, "operand16") 1102 addTag(old, "operand32") 1103 continue 1104 } 1105 } 1106 } 1107 1108 // Undo XCHG hack above. 1109 for _, inst := range insts { 1110 if strings.HasPrefix(inst.opcode, "XX ") { 1111 inst.opcode = strings.TrimPrefix(inst.opcode, "XX ") 1112 addTag(inst, "pseudo") 1113 removeTag(inst, "pseudo64") 1114 } 1115 } 1116 1117 // Last ditch effort. Manual fixes. 1118 // Some things are too hard to infer. 1119 for _, inst := range insts { 1120 for _, fix := range fixup[[2]string{inst.syntax, inst.opcode}] { 1121 fix(inst) 1122 } 1123 sort.Strings(inst.tags) 1124 } 1125 1126 sort.Sort(bySeq(insts)) 1127 1128 if onlySomePages { 1129 for _, inst := range extraInsts { 1130 op, _ := splitSyntax(inst.syntax) 1131 if haveOp[op] { 1132 insts = append(insts, inst) 1133 } 1134 } 1135 } else { 1136 insts = append(insts, extraInsts...) 1137 } 1138 return insts 1139 } 1140 1141 func hasTag(inst *instruction, tag string) bool { 1142 for _, t := range inst.tags { 1143 if t == tag { 1144 return true 1145 } 1146 } 1147 return false 1148 } 1149 1150 func removeTag(inst *instruction, tag string) { 1151 if !hasTag(inst, tag) { 1152 return 1153 } 1154 out := inst.tags[:0] 1155 for _, t := range inst.tags { 1156 if t != tag { 1157 out = append(out, t) 1158 } 1159 } 1160 inst.tags = out 1161 } 1162 1163 func addTag(inst *instruction, tag string) { 1164 if !hasTag(inst, tag) { 1165 inst.tags = append(inst.tags, tag) 1166 } 1167 } 1168 1169 type byOpcode []*instruction 1170 1171 func (x byOpcode) Len() int { return len(x) } 1172 func (x byOpcode) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 1173 func (x byOpcode) Less(i, j int) bool { 1174 if x[i].opcode != x[j].opcode { 1175 return opcodeLess(x[i].opcode, x[j].opcode) 1176 } 1177 if condLess(x[i].syntax, x[j].syntax) { 1178 return true 1179 } 1180 if condLess(x[j].syntax, x[i].syntax) { 1181 return false 1182 } 1183 if x[i].syntax != x[j].syntax { 1184 return x[i].syntax < x[j].syntax 1185 } 1186 return x[i].seq < x[j].seq 1187 } 1188 1189 type bySeq []*instruction 1190 1191 func (x bySeq) Len() int { return len(x) } 1192 func (x bySeq) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 1193 func (x bySeq) Less(i, j int) bool { 1194 return x[i].seq < x[j].seq 1195 } 1196 1197 type bySyntax []*instruction 1198 1199 func (x bySyntax) Len() int { return len(x) } 1200 func (x bySyntax) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 1201 func (x bySyntax) Less(i, j int) bool { 1202 if x[i].syntax != x[j].syntax { 1203 return x[i].syntax < x[j].syntax 1204 } 1205 return x[i].opcode < x[j].opcode 1206 } 1207 1208 // condLess reports whether the conditional instruction syntax 1209 // x should be considered less than y. 1210 // We sort condition codes we prefer ahead of condition codes we don't, 1211 // so that the latter are recorded as the pseudo-operations. 1212 func condLess(x, y string) bool { 1213 x, _ = splitSyntax(x) 1214 y, _ = splitSyntax(y) 1215 for _, pref := range condPrefs { 1216 if strings.HasSuffix(x, pref[0]) && strings.HasSuffix(y, pref[1]) && strings.TrimSuffix(x, pref[0]) == strings.TrimSuffix(y, pref[1]) { 1217 return true 1218 } 1219 } 1220 return false 1221 } 1222 1223 // xchgLess reports whether the xchg instruction x should be considered less than y. 1224 func xchgLess(x, y string) bool { 1225 return strings.HasPrefix(x, "XCHG ") && x > y 1226 } 1227 1228 // opcodeLess reports whether opcode string x should be considered less than y. 1229 // We sort wildcard fields like "ib" before literal bytes like "0A". 1230 func opcodeLess(x, y string) bool { 1231 for i := 0; i < len(x) || i < len(y); i++ { 1232 if i >= len(x) { 1233 return true 1234 } 1235 if i >= len(y) { 1236 return false 1237 } 1238 if x[i] != y[i] { 1239 // sort word before doubleword 1240 if x[i] == 'w' && y[i] == 'd' { 1241 return true 1242 } 1243 if x[i] == 'd' && y[i] == 'w' { 1244 return false 1245 } 1246 // Sort lower-case before non-lower-case. 1247 // This sorts "ib" before literal bytes like "0A", for example. 1248 return x[i]-'a' < y[i]-'a' 1249 } 1250 } 1251 return false 1252 } 1253 1254 // splitOpcode splits an opcode into its fixed and variable portions. 1255 // For example "05 iw" splits into "05" and "iw". 1256 func splitOpcode(x string) (fixed, variable string) { 1257 i := 0 1258 for i < len(x) { 1259 c := x[i] 1260 if '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || c == ' ' || c == '.' || c == '+' { 1261 i++ 1262 continue 1263 } 1264 if i+2 <= len(x) && c == '/' { 1265 i += 2 1266 continue 1267 } 1268 break 1269 } 1270 return strings.TrimSpace(x[:i]), x[i:] 1271 } 1272 1273 // canGenerate reports whether opcode string x can generate opcode string y. 1274 // For example "D5 ib" can generate "D5 0A". 1275 // Any string x is not considered to generate itself. 1276 func canGenerate(x, y string) bool { 1277 i := 0 1278 for i < len(x) && i < len(y) && x[i] == y[i] { 1279 i++ 1280 } 1281 if i == len(x) || i == len(y) { 1282 return false 1283 } 1284 switch x[i:] { 1285 case "ib": 1286 return len(y[i:]) == 2 && allHex(y[i:]) 1287 case "0+i": 1288 return len(y[i:]) == 1 && '0' <= y[i] && y[i] <= '7' 1289 case "8+i": 1290 return len(y[i:]) == 1 && (y[i] == '8' || y[i] == '9' || 'A' <= y[i] && y[i] <= 'F') 1291 } 1292 return false 1293 } 1294 1295 // allHex reports whether s is entirely hex digits. 1296 func allHex(s string) bool { 1297 for _, c := range s { 1298 if '0' <= c && c <= '9' || 'A' <= c && c <= 'F' { 1299 continue 1300 } 1301 return false 1302 } 1303 return true 1304 } 1305 1306 // is16vs32pair reports whether x and y are the 16- and 32-bit variants of the same instruction, 1307 // based on analysis of the mnemonic syntax. 1308 func is16vs32pair(x, y *instruction) bool { 1309 return conv16.Replace(x.syntax) == y.syntax || 1310 strings.Replace(x.syntax, "r16, r/", "r32, r32/", -1) == y.syntax || // LSL etc 1311 strings.Replace(x.syntax, "r16", "r32", 1) == y.syntax // MOVSXD, MOVSX, etc 1312 } 1313 1314 func containsAll(x string, targ ...string) bool { 1315 for _, y := range targ { 1316 i := strings.Index(x, y) 1317 if i < 0 { 1318 return false 1319 } 1320 x = x[i+len(y):] 1321 } 1322 return true 1323 }