github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/ifuzz/x86/gen/gen.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // gen generates instruction tables (ifuzz_types/insns.go) from Intel XED tables. 5 // Tables used to generate insns.go are checked in in all-enc-instructions.txt. 6 package main 7 8 import ( 9 "bufio" 10 "bytes" 11 "errors" 12 "fmt" 13 "os" 14 "reflect" 15 "strconv" 16 "strings" 17 18 "github.com/google/syzkaller/pkg/ifuzz/iset" 19 "github.com/google/syzkaller/pkg/ifuzz/x86" 20 "github.com/google/syzkaller/pkg/osutil" 21 "github.com/google/syzkaller/pkg/serializer" 22 "github.com/google/syzkaller/pkg/tool" 23 ) 24 25 // nolint: gocyclo, gocognit, funlen, dupl 26 func main() { 27 if len(os.Args) != 3 { 28 tool.Failf("usage: gen instructions.txt output.file") 29 } 30 f, err := os.Open(os.Args[1]) 31 if err != nil { 32 tool.Failf("failed to open input file: %v", err) 33 } 34 defer f.Close() 35 36 skipped := 0 37 saved := "" 38 var insns []*x86.Insn 39 var insn, insn1 *x86.Insn 40 s := bufio.NewScanner(f) 41 for i := 1; s.Scan(); i++ { 42 reportError := func(msg string, args ...interface{}) { 43 fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text()) 44 tool.Failf(msg, args...) 45 } 46 line := s.Text() 47 if comment := strings.IndexByte(line, '#'); comment != -1 { 48 line = line[:comment] 49 } 50 line = strings.TrimSpace(line) 51 if line == "" { 52 continue 53 } 54 if line[len(line)-1] == '\\' { 55 saved += line[:len(line)-1] 56 continue 57 } 58 line = saved + line 59 saved = "" 60 if line == "{" { 61 insn = new(x86.Insn) 62 continue 63 } 64 if line == "}" { 65 if insn1 != nil { 66 insns = append(insns, insn1) 67 insn1 = nil 68 insn = nil 69 } 70 continue 71 } 72 colon := strings.IndexByte(line, ':') 73 if colon == -1 { 74 reportError("no colon") 75 } 76 name := strings.TrimSpace(line[:colon]) 77 if name == "" { 78 reportError("empty attribute name") 79 } 80 var vals []string 81 for _, v := range strings.Split(line[colon+1:], " ") { 82 v = strings.TrimSpace(v) 83 if v == "" { 84 continue 85 } 86 vals = append(vals, v) 87 } 88 switch name { 89 case "ICLASS": 90 if len(vals) != 1 { 91 reportError("ICLASS has more than one value") 92 } 93 insn.Name = vals[0] 94 case "CPL": 95 if len(vals) != 1 { 96 reportError("CPL has more than one value") 97 } 98 if vals[0] != "0" && vals[0] != "3" { 99 reportError("unknown CPL value: %v", vals[0]) 100 } 101 insn.Priv = vals[0] == "0" 102 case "EXTENSION": 103 if len(vals) != 1 { 104 reportError("EXTENSION has more than one value") 105 } 106 insn.Extension = vals[0] 107 switch insn.Extension { 108 case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER": 109 insn.Mode = 1<<iset.ModeLong64 | 1<<iset.ModeProt32 110 } 111 insn.Avx2Gather = insn.Extension == "AVX2GATHER" 112 case "PATTERN": 113 if insn1 != nil { 114 insns = append(insns, insn1) 115 } 116 insn1 = new(x86.Insn) 117 *insn1 = *insn 118 if err := parsePattern(insn1, vals); err != nil { 119 var errSkip errSkip 120 if !errors.As(err, &errSkip) { 121 reportError(errSkip.Error()) 122 } 123 if err.Error() != "" { 124 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 125 } 126 skipped++ 127 insn1 = nil 128 } 129 case "OPERANDS": 130 if insn1 == nil { 131 break 132 } 133 if err := parseOperands(insn1, vals); err != nil { 134 var errSkip errSkip 135 if !errors.As(err, &errSkip) { 136 reportError(errSkip.Error()) 137 } 138 if err.Error() != "" { 139 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 140 } 141 skipped++ 142 insn1 = nil 143 } 144 } 145 } 146 147 var deduped []*x86.Insn 148 nextInsn: 149 for _, insn := range insns { 150 if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" { 151 skipped++ 152 continue 153 } 154 mod0 := insn.Mod 155 for j := len(deduped) - 1; j >= 0; j-- { 156 insn1 := deduped[j] 157 if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 { 158 insn.Mod = insn1.Mod 159 } 160 if reflect.DeepEqual(insn, insn1) { 161 if insn.Mod != mod0 { 162 insn1.Mod = -1 163 } 164 continue nextInsn 165 } 166 insn.Mod = mod0 167 } 168 deduped = append(deduped, insn) 169 } 170 fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped)) 171 insns = deduped 172 173 out := new(bytes.Buffer) 174 fmt.Fprintf(out, ` 175 // Code generated by pkg/ifuzz/x86/gen. DO NOT EDIT. 176 177 //go:build !codeanalysis 178 179 package generated 180 181 import . "github.com/google/syzkaller/pkg/ifuzz/x86" 182 183 func init() { 184 Register(insns) 185 } 186 187 var insns = 188 `) 189 serializer.Write(out, insns) 190 if err := osutil.WriteFileAtomically(os.Args[2], out.Bytes()); err != nil { 191 tool.Fail(err) 192 } 193 194 fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped) 195 } 196 197 type errSkip string 198 199 func (err errSkip) Error() string { 200 return string(err) 201 } 202 203 // nolint: gocyclo, gocognit, funlen 204 func parsePattern(insn *x86.Insn, vals []string) error { 205 if insn.Opcode != nil { 206 return fmt.Errorf("PATTERN is already parsed for the instruction") 207 } 208 // As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix. 209 if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" || 210 insn.Name == "NOP8" || insn.Name == "NOP9" { 211 return errSkip("") 212 } 213 if insn.Mode == 0 { 214 insn.Mode = 1<<iset.ModeLast - 1 215 } 216 insn.Mod = -100 217 insn.Reg = -100 218 insn.Rm = -100 219 insn.VexP = -1 220 for _, v := range vals { 221 switch { 222 case strings.HasPrefix(v, "0x"): 223 op, err := strconv.ParseUint(v, 0, 8) 224 if err != nil { 225 return fmt.Errorf("failed to parse hex pattern: %v", v) 226 } 227 if !insn.Modrm { 228 insn.Opcode = append(insn.Opcode, byte(op)) 229 } else { 230 insn.Suffix = append(insn.Suffix, byte(op)) 231 } 232 case strings.HasPrefix(v, "0b"): 233 if len(v) != 8 || v[6] != '_' { 234 return fmt.Errorf("failed to parse bin pattern: %v", v) 235 } 236 var op byte 237 if v[2] == '1' { 238 op |= 1 << 7 239 } 240 if v[3] == '1' { 241 op |= 1 << 6 242 } 243 if v[4] == '1' { 244 op |= 1 << 5 245 } 246 if v[5] == '1' { 247 op |= 1 << 4 248 } 249 if v[7] == '1' { 250 op |= 1 << 3 251 } 252 insn.Opcode = append(insn.Opcode, op) 253 case strings.HasPrefix(v, "MOD["): 254 insn.Modrm = true 255 vv, err := parseModrm(v[3:]) 256 if err != nil { 257 return fmt.Errorf("failed to parse %v: %w", v, err) 258 } 259 insn.Mod = vv 260 case strings.HasPrefix(v, "REG["): 261 insn.Modrm = true 262 vv, err := parseModrm(v[3:]) 263 if err != nil { 264 return fmt.Errorf("failed to parse %v: %w", v, err) 265 } 266 insn.Reg = vv 267 case strings.HasPrefix(v, "RM["): 268 insn.Modrm = true 269 vv, err := parseModrm(v[2:]) 270 if err != nil { 271 return fmt.Errorf("failed to parse %v: %w", v, err) 272 } 273 insn.Rm = vv 274 case v == "RM=4": 275 insn.Rm = 4 276 case strings.HasPrefix(v, "SRM["): 277 vv, err := parseModrm(v[3:]) 278 if err != nil { 279 return fmt.Errorf("failed to parse %v: %w", v, err) 280 } 281 insn.Rm = vv 282 insn.Srm = true 283 case v == "SRM=0", v == "SRM!=0": 284 case v == "MOD!=3": 285 if !insn.Modrm || insn.Mod != -1 { 286 return fmt.Errorf("MOD!=3 without MOD") 287 } 288 insn.Mod = -3 289 case v == "MOD=3": 290 // Most other instructions contain "MOD[0b11] MOD=3", 291 // but BNDCL contains "MOD[mm] MOD=3" 292 insn.Mod = 3 293 case v == "MOD=0": 294 insn.Mod = 0 295 case v == "MOD=1": 296 insn.Mod = 1 297 case v == "MOD=2": 298 insn.Mod = 2 299 case v == "lock_prefix": 300 insn.Prefix = append(insn.Prefix, 0xF0) 301 302 // Immediates. 303 case v == "UIMM8()", v == "SIMM8()": 304 addImm(insn, 1) 305 case v == "UIMM16()": 306 addImm(insn, 2) 307 case v == "UIMM32()": 308 addImm(insn, 4) 309 case v == "SIMMz()": 310 addImm(insn, -1) 311 case v == "UIMMv()": 312 addImm(insn, -3) 313 case v == "UIMM8_1()": 314 addImm(insn, 1) 315 case v == "BRDISP8()": 316 addImm(insn, 1) 317 case v == "BRDISP32()": 318 addImm(insn, 4) 319 case v == "BRDISPz()": 320 addImm(insn, -1) 321 case v == "MEMDISPv()": 322 addImm(insn, -2) 323 324 // VOP/VEX 325 case v == "XOPV": 326 insn.Vex = 0x8f 327 insn.Mode &^= 1 << iset.ModeReal16 328 case v == "EVV": 329 insn.Vex = 0xc4 330 case v == "VV1": 331 insn.Vex = 0xc4 332 case v == "VMAP0": 333 insn.VexMap = 0 334 case v == "V0F": 335 insn.VexMap = 1 336 case v == "V0F38": 337 insn.VexMap = 2 338 case v == "V0F3A": 339 insn.VexMap = 3 340 case v == "XMAP8": 341 insn.VexMap = 8 342 case v == "XMAP9": 343 insn.VexMap = 9 344 case v == "XMAPA": 345 insn.VexMap = 10 346 case v == "VNP": 347 insn.VexP = 0 348 case v == "V66": 349 insn.VexP = 1 350 case v == "VF2": 351 insn.VexP = 3 352 case v == "VF3": 353 insn.VexP = 2 354 case v == "VL128", v == "VL=0": 355 insn.VexL = -1 356 case v == "VL256", v == "VL=1": 357 insn.VexL = 1 358 case v == "NOVSR": 359 insn.VexNoR = true 360 case v == "NOEVSR": 361 insn.VexNoR = true 362 // VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0 363 case v == "SE_IMM8()": 364 addImm(insn, 1) 365 366 // Modes. 367 case v == "mode64": 368 insn.Mode &= 1 << iset.ModeLong64 369 case v == "not64": 370 insn.Mode &^= 1 << iset.ModeLong64 371 case v == "mode32": 372 insn.Mode &= 1 << iset.ModeProt32 373 case v == "mode16": 374 insn.Mode &= 1<<iset.ModeProt16 | 1<<iset.ModeReal16 375 case v == "eamode64", 376 v == "eamode32", 377 v == "eamode16", 378 v == "eanot16": 379 380 case v == "no_refining_prefix": 381 insn.NoRepPrefix = true 382 insn.No66Prefix = true 383 case v == "no66_prefix", v == "eosz32", v == "eosz64": 384 insn.No66Prefix = true 385 case v == "eosz16", v == "eosznot64", v == "REP!=3": 386 // TODO(dvyukov): this may have some effect on REP/66 prefixes, 387 // but this wasn't checked. These are just added here to unbreak build. 388 case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2": 389 insn.Prefix = append(insn.Prefix, 0xF2) 390 insn.NoRepPrefix = true 391 case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3": 392 insn.Prefix = append(insn.Prefix, 0xF3) 393 insn.NoRepPrefix = true 394 case v == "norep", v == "not_refining", v == "REP=0": 395 insn.NoRepPrefix = true 396 case v == "osz_refining_prefix": 397 insn.Prefix = append(insn.Prefix, 0x66) 398 insn.NoRepPrefix = true 399 case v == "rexw_prefix", v == "W1": 400 insn.Rexw = 1 401 case v == "norexw_prefix", v == "W0": 402 insn.Rexw = -1 403 case v == "MPXMODE=1", 404 v == "MPXMODE=0", 405 v == "TZCNT=1", 406 v == "TZCNT=0", 407 v == "LZCNT=1", 408 v == "LZCNT=0", 409 v == "CR_WIDTH()", 410 v == "DF64()", 411 v == "IMMUNE_REXW()", 412 v == "FORCE64()", 413 v == "EOSZ=1", 414 v == "EOSZ!=1", 415 v == "EOSZ=2", 416 v == "EOSZ!=2", 417 v == "EOSZ=3", 418 v == "EOSZ!=3", 419 v == "BRANCH_HINT()", 420 v == "P4=1", 421 v == "P4=0", 422 v == "rexb_prefix", 423 v == "norexb_prefix", 424 v == "IMMUNE66()", 425 v == "REFINING66()", 426 v == "IGNORE66()", 427 v == "IMMUNE66_LOOP64()", 428 v == "OVERRIDE_SEG0()", 429 v == "OVERRIDE_SEG1()", 430 v == "REMOVE_SEGMENT()", 431 v == "ONE()", 432 v == "nolock_prefix", 433 v == "MODRM()", 434 v == "VMODRM_XMM()", 435 v == "VMODRM_YMM()", 436 v == "BCRC=0", 437 v == "BCRC=1", 438 v == "ESIZE_8_BITS()", 439 v == "ESIZE_16_BITS()", 440 v == "ESIZE_32_BITS()", 441 v == "ESIZE_64_BITS()", 442 v == "ESIZE_128_BITS()", 443 v == "NELEM_GPR_WRITER_STORE()", 444 v == "NELEM_GPR_WRITER_STORE_BYTE()", 445 v == "NELEM_GPR_WRITER_STORE_WORD()", 446 v == "NELEM_GPR_WRITER_LDOP_Q()", 447 v == "NELEM_GPR_WRITER_LDOP_D()", 448 v == "NELEM_GPR_READER()", 449 v == "NELEM_GPR_READER_BYTE()", 450 v == "NELEM_GPR_READER_WORD()", 451 v == "NELEM_GSCAT()", 452 v == "NELEM_HALF()", 453 v == "NELEM_FULL()", 454 v == "NELEM_FULLMEM()", 455 v == "NELEM_QUARTERMEM()", 456 v == "NELEM_EIGHTHMEM()", 457 v == "NELEM_HALFMEM()", 458 v == "NELEM_MEM128()", 459 v == "NELEM_SCALAR()", 460 v == "NELEM_TUPLE1()", 461 v == "NELEM_TUPLE2()", 462 v == "NELEM_TUPLE4()", 463 v == "NELEM_TUPLE8()", 464 v == "NELEM_TUPLE1_4X()", 465 v == "NELEM_TUPLE1_BYTE()", 466 v == "NELEM_TUPLE1_WORD()", 467 v == "NELEM_MOVDDUP()", 468 v == "UISA_VMODRM_XMM()", 469 v == "UISA_VMODRM_YMM()", 470 v == "UISA_VMODRM_ZMM()", 471 v == "MASK=0", 472 v == "FIX_ROUND_LEN128()", 473 v == "FIX_ROUND_LEN512()", 474 v == "AVX512_ROUND()", 475 v == "ZEROING=0", 476 v == "SAE()", 477 v == "VL512", // VL=2 478 v == "not_refining_f3", 479 v == "EVEXRR_ONE", 480 v == "CET=0", 481 v == "CET=1", 482 v == "WBNOINVD=0", 483 v == "WBNOINVD=1", 484 v == "CLDEMOTE=0", 485 v == "CLDEMOTE=1", 486 strings.HasPrefix(v, "MODEP5="): 487 default: 488 return errSkip(fmt.Sprintf("unknown pattern %v", v)) 489 } 490 } 491 if insn.Modrm { 492 switch insn.Mod { 493 case -3, -1, 0, 1, 2, 3: 494 default: 495 return fmt.Errorf("bad MOD value: %v", insn.Mod) 496 } 497 if insn.Reg < -1 || insn.Reg > 7 { 498 return fmt.Errorf("bad REG value: %v", insn.Mod) 499 } 500 if insn.Rm < -1 || insn.Rm > 7 { 501 return fmt.Errorf("bad RM value: %v", insn.Mod) 502 } 503 } 504 if insn.Imm != 0 && len(insn.Suffix) != 0 { 505 return fmt.Errorf("both immediate and suffix opcode") 506 } 507 if insn.Mode == 0 { 508 return errSkip("no modes for instruction") 509 } 510 return nil 511 } 512 513 func parseOperands(insn *x86.Insn, vals []string) error { 514 for _, v := range vals { 515 switch v { 516 case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w": 517 if insn.Reg != -1 { 518 return fmt.Errorf("REG=SEG() operand, but fixed reg") 519 } 520 insn.Reg = -6 521 case "REG0=CR_R():w", "REG1=CR_R():r": 522 if insn.Reg != -1 { 523 return fmt.Errorf("REG=CR_R() operand, but fixed reg") 524 } 525 insn.Reg = -8 526 insn.NoSibDisp = true 527 case "REG0=DR_R():w", "REG1=DR_R():r": 528 insn.NoSibDisp = true 529 case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int": 530 insn.Mem16 = true 531 case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int": 532 insn.Mem32 = true 533 } 534 } 535 return nil 536 } 537 538 func parseModrm(v string) (int8, error) { 539 if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' { 540 return 0, fmt.Errorf("malformed") 541 } 542 if v == "[mm]" || v == "[rrr]" || v == "[nnn]" { 543 return -1, nil 544 } 545 if !strings.HasPrefix(v, "[0b") { 546 return 0, fmt.Errorf("malformed") 547 } 548 var vv int8 549 for i := 3; i < len(v)-1; i++ { 550 if v[i] != '0' && v[i] != '1' { 551 return 0, fmt.Errorf("malformed") 552 } 553 vv *= 2 554 if v[i] == '1' { 555 vv++ 556 } 557 } 558 return vv, nil 559 } 560 561 func addImm(insn *x86.Insn, imm int8) { 562 if insn.Imm == 0 { 563 insn.Imm = imm 564 return 565 } 566 if insn.Imm2 == 0 { 567 insn.Imm2 = imm 568 return 569 } 570 panic("too many immediates") 571 }