github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ifuzz/x86/gen/gen.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // gen generates instruction tables (ifuzz_types/insns.go) from Intel XED tables. 5 // Tables used to generate insns.go are checked in in all-enc-instructions.txt. 6 package main 7 8 import ( 9 "bufio" 10 "errors" 11 "fmt" 12 "os" 13 "reflect" 14 "strconv" 15 "strings" 16 17 "github.com/google/syzkaller/pkg/ifuzz/iset" 18 "github.com/google/syzkaller/pkg/ifuzz/x86" 19 "github.com/google/syzkaller/pkg/serializer" 20 "github.com/google/syzkaller/pkg/tool" 21 ) 22 23 // nolint: gocyclo, gocognit, funlen, dupl 24 func main() { 25 if len(os.Args) != 2 { 26 tool.Failf("usage: gen instructions.txt") 27 } 28 f, err := os.Open(os.Args[1]) 29 if err != nil { 30 tool.Failf("failed to open input file: %v", err) 31 } 32 defer f.Close() 33 34 skipped := 0 35 saved := "" 36 var insns []*x86.Insn 37 var insn, insn1 *x86.Insn 38 s := bufio.NewScanner(f) 39 for i := 1; s.Scan(); i++ { 40 reportError := func(msg string, args ...interface{}) { 41 fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text()) 42 tool.Failf(msg, args...) 43 } 44 line := s.Text() 45 if comment := strings.IndexByte(line, '#'); comment != -1 { 46 line = line[:comment] 47 } 48 line = strings.TrimSpace(line) 49 if line == "" { 50 continue 51 } 52 if line[len(line)-1] == '\\' { 53 saved += line[:len(line)-1] 54 continue 55 } 56 line = saved + line 57 saved = "" 58 if line == "{" { 59 insn = new(x86.Insn) 60 continue 61 } 62 if line == "}" { 63 if insn1 != nil { 64 insns = append(insns, insn1) 65 insn1 = nil 66 insn = nil 67 } 68 continue 69 } 70 colon := strings.IndexByte(line, ':') 71 if colon == -1 { 72 reportError("no colon") 73 } 74 name := strings.TrimSpace(line[:colon]) 75 if name == "" { 76 reportError("empty attribute name") 77 } 78 var vals []string 79 for _, v := range strings.Split(line[colon+1:], " ") { 80 v = strings.TrimSpace(v) 81 if v == "" { 82 continue 83 } 84 vals = append(vals, v) 85 } 86 switch name { 87 case "ICLASS": 88 if len(vals) != 1 { 89 reportError("ICLASS has more than one value") 90 } 91 insn.Name = vals[0] 92 case "CPL": 93 if len(vals) != 1 { 94 reportError("CPL has more than one value") 95 } 96 if vals[0] != "0" && vals[0] != "3" { 97 reportError("unknown CPL value: %v", vals[0]) 98 } 99 insn.Priv = vals[0] == "0" 100 case "EXTENSION": 101 if len(vals) != 1 { 102 reportError("EXTENSION has more than one value") 103 } 104 insn.Extension = vals[0] 105 switch insn.Extension { 106 case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER": 107 insn.Mode = 1<<iset.ModeLong64 | 1<<iset.ModeProt32 108 } 109 insn.Avx2Gather = insn.Extension == "AVX2GATHER" 110 case "PATTERN": 111 if insn1 != nil { 112 insns = append(insns, insn1) 113 } 114 insn1 = new(x86.Insn) 115 *insn1 = *insn 116 if err := parsePattern(insn1, vals); err != nil { 117 var errSkip errSkip 118 if !errors.As(err, &errSkip) { 119 reportError(errSkip.Error()) 120 } 121 if err.Error() != "" { 122 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 123 } 124 skipped++ 125 insn1 = nil 126 } 127 case "OPERANDS": 128 if insn1 == nil { 129 break 130 } 131 if err := parseOperands(insn1, vals); err != nil { 132 var errSkip errSkip 133 if !errors.As(err, &errSkip) { 134 reportError(errSkip.Error()) 135 } 136 if err.Error() != "" { 137 fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) 138 } 139 skipped++ 140 insn1 = nil 141 } 142 } 143 } 144 145 var deduped []*x86.Insn 146 nextInsn: 147 for _, insn := range insns { 148 if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" { 149 skipped++ 150 continue 151 } 152 mod0 := insn.Mod 153 for j := len(deduped) - 1; j >= 0; j-- { 154 insn1 := deduped[j] 155 if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 { 156 insn.Mod = insn1.Mod 157 } 158 if reflect.DeepEqual(insn, insn1) { 159 if insn.Mod != mod0 { 160 insn1.Mod = -1 161 } 162 continue nextInsn 163 } 164 insn.Mod = mod0 165 } 166 deduped = append(deduped, insn) 167 } 168 fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped)) 169 insns = deduped 170 171 fmt.Printf(` 172 // Code generated by pkg/ifuzz/gen. DO NOT EDIT. 173 174 // +build !codeanalysis 175 176 package x86 177 178 import "github.com/google/syzkaller/pkg/ifuzz/x86" 179 180 func init() { 181 x86.Register(insns_x86) 182 } 183 184 var insns_x86 = []*Insn{ 185 `) 186 serializer.Write(os.Stdout, insns) 187 188 fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped) 189 } 190 191 type errSkip string 192 193 func (err errSkip) Error() string { 194 return string(err) 195 } 196 197 // nolint: gocyclo, gocognit, funlen 198 func parsePattern(insn *x86.Insn, vals []string) error { 199 if insn.Opcode != nil { 200 return fmt.Errorf("PATTERN is already parsed for the instruction") 201 } 202 // As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix. 203 if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" || 204 insn.Name == "NOP8" || insn.Name == "NOP9" { 205 return errSkip("") 206 } 207 if insn.Mode == 0 { 208 insn.Mode = 1<<iset.ModeLast - 1 209 } 210 insn.Mod = -100 211 insn.Reg = -100 212 insn.Rm = -100 213 insn.VexP = -1 214 for _, v := range vals { 215 switch { 216 case strings.HasPrefix(v, "0x"): 217 op, err := strconv.ParseUint(v, 0, 8) 218 if err != nil { 219 return fmt.Errorf("failed to parse hex pattern: %v", v) 220 } 221 if !insn.Modrm { 222 insn.Opcode = append(insn.Opcode, byte(op)) 223 } else { 224 insn.Suffix = append(insn.Suffix, byte(op)) 225 } 226 case strings.HasPrefix(v, "0b"): 227 if len(v) != 8 || v[6] != '_' { 228 return fmt.Errorf("failed to parse bin pattern: %v", v) 229 } 230 var op byte 231 if v[2] == '1' { 232 op |= 1 << 7 233 } 234 if v[3] == '1' { 235 op |= 1 << 6 236 } 237 if v[4] == '1' { 238 op |= 1 << 5 239 } 240 if v[5] == '1' { 241 op |= 1 << 4 242 } 243 if v[7] == '1' { 244 op |= 1 << 3 245 } 246 insn.Opcode = append(insn.Opcode, op) 247 case strings.HasPrefix(v, "MOD["): 248 insn.Modrm = true 249 vv, err := parseModrm(v[3:]) 250 if err != nil { 251 return fmt.Errorf("failed to parse %v: %w", v, err) 252 } 253 insn.Mod = vv 254 case strings.HasPrefix(v, "REG["): 255 insn.Modrm = true 256 vv, err := parseModrm(v[3:]) 257 if err != nil { 258 return fmt.Errorf("failed to parse %v: %w", v, err) 259 } 260 insn.Reg = vv 261 case strings.HasPrefix(v, "RM["): 262 insn.Modrm = true 263 vv, err := parseModrm(v[2:]) 264 if err != nil { 265 return fmt.Errorf("failed to parse %v: %w", v, err) 266 } 267 insn.Rm = vv 268 case v == "RM=4": 269 insn.Rm = 4 270 case strings.HasPrefix(v, "SRM["): 271 vv, err := parseModrm(v[3:]) 272 if err != nil { 273 return fmt.Errorf("failed to parse %v: %w", v, err) 274 } 275 insn.Rm = vv 276 insn.Srm = true 277 case v == "SRM=0", v == "SRM!=0": 278 case v == "MOD!=3": 279 if !insn.Modrm || insn.Mod != -1 { 280 return fmt.Errorf("MOD!=3 without MOD") 281 } 282 insn.Mod = -3 283 case v == "MOD=3": 284 // Most other instructions contain "MOD[0b11] MOD=3", 285 // but BNDCL contains "MOD[mm] MOD=3" 286 insn.Mod = 3 287 case v == "MOD=0": 288 insn.Mod = 0 289 case v == "MOD=1": 290 insn.Mod = 1 291 case v == "MOD=2": 292 insn.Mod = 2 293 case v == "lock_prefix": 294 insn.Prefix = append(insn.Prefix, 0xF0) 295 296 // Immediates. 297 case v == "UIMM8()", v == "SIMM8()": 298 addImm(insn, 1) 299 case v == "UIMM16()": 300 addImm(insn, 2) 301 case v == "UIMM32()": 302 addImm(insn, 4) 303 case v == "SIMMz()": 304 addImm(insn, -1) 305 case v == "UIMMv()": 306 addImm(insn, -3) 307 case v == "UIMM8_1()": 308 addImm(insn, 1) 309 case v == "BRDISP8()": 310 addImm(insn, 1) 311 case v == "BRDISP32()": 312 addImm(insn, 4) 313 case v == "BRDISPz()": 314 addImm(insn, -1) 315 case v == "MEMDISPv()": 316 addImm(insn, -2) 317 318 // VOP/VEX 319 case v == "XOPV": 320 insn.Vex = 0x8f 321 insn.Mode &^= 1 << iset.ModeReal16 322 case v == "EVV": 323 insn.Vex = 0xc4 324 case v == "VV1": 325 insn.Vex = 0xc4 326 case v == "VMAP0": 327 insn.VexMap = 0 328 case v == "V0F": 329 insn.VexMap = 1 330 case v == "V0F38": 331 insn.VexMap = 2 332 case v == "V0F3A": 333 insn.VexMap = 3 334 case v == "XMAP8": 335 insn.VexMap = 8 336 case v == "XMAP9": 337 insn.VexMap = 9 338 case v == "XMAPA": 339 insn.VexMap = 10 340 case v == "VNP": 341 insn.VexP = 0 342 case v == "V66": 343 insn.VexP = 1 344 case v == "VF2": 345 insn.VexP = 3 346 case v == "VF3": 347 insn.VexP = 2 348 case v == "VL128", v == "VL=0": 349 insn.VexL = -1 350 case v == "VL256", v == "VL=1": 351 insn.VexL = 1 352 case v == "NOVSR": 353 insn.VexNoR = true 354 case v == "NOEVSR": 355 insn.VexNoR = true 356 // VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0 357 case v == "SE_IMM8()": 358 addImm(insn, 1) 359 360 // Modes. 361 case v == "mode64": 362 insn.Mode &= 1 << iset.ModeLong64 363 case v == "not64": 364 insn.Mode &^= 1 << iset.ModeLong64 365 case v == "mode32": 366 insn.Mode &= 1 << iset.ModeProt32 367 case v == "mode16": 368 insn.Mode &= 1<<iset.ModeProt16 | 1<<iset.ModeReal16 369 case v == "eamode64", 370 v == "eamode32", 371 v == "eamode16", 372 v == "eanot16": 373 374 case v == "no_refining_prefix": 375 insn.NoRepPrefix = true 376 insn.No66Prefix = true 377 case v == "no66_prefix", v == "eosz32", v == "eosz64": 378 insn.No66Prefix = true 379 case v == "eosz16", v == "eosznot64", v == "REP!=3": 380 // TODO(dvyukov): this may have some effect on REP/66 prefixes, 381 // but this wasn't checked. These are just added here to unbreak build. 382 case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2": 383 insn.Prefix = append(insn.Prefix, 0xF2) 384 insn.NoRepPrefix = true 385 case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3": 386 insn.Prefix = append(insn.Prefix, 0xF3) 387 insn.NoRepPrefix = true 388 case v == "norep", v == "not_refining", v == "REP=0": 389 insn.NoRepPrefix = true 390 case v == "osz_refining_prefix": 391 insn.Prefix = append(insn.Prefix, 0x66) 392 insn.NoRepPrefix = true 393 case v == "rexw_prefix", v == "W1": 394 insn.Rexw = 1 395 case v == "norexw_prefix", v == "W0": 396 insn.Rexw = -1 397 case v == "MPXMODE=1", 398 v == "MPXMODE=0", 399 v == "TZCNT=1", 400 v == "TZCNT=0", 401 v == "LZCNT=1", 402 v == "LZCNT=0", 403 v == "CR_WIDTH()", 404 v == "DF64()", 405 v == "IMMUNE_REXW()", 406 v == "FORCE64()", 407 v == "EOSZ=1", 408 v == "EOSZ!=1", 409 v == "EOSZ=2", 410 v == "EOSZ!=2", 411 v == "EOSZ=3", 412 v == "EOSZ!=3", 413 v == "BRANCH_HINT()", 414 v == "P4=1", 415 v == "P4=0", 416 v == "rexb_prefix", 417 v == "norexb_prefix", 418 v == "IMMUNE66()", 419 v == "REFINING66()", 420 v == "IGNORE66()", 421 v == "IMMUNE66_LOOP64()", 422 v == "OVERRIDE_SEG0()", 423 v == "OVERRIDE_SEG1()", 424 v == "REMOVE_SEGMENT()", 425 v == "ONE()", 426 v == "nolock_prefix", 427 v == "MODRM()", 428 v == "VMODRM_XMM()", 429 v == "VMODRM_YMM()", 430 v == "BCRC=0", 431 v == "BCRC=1", 432 v == "ESIZE_8_BITS()", 433 v == "ESIZE_16_BITS()", 434 v == "ESIZE_32_BITS()", 435 v == "ESIZE_64_BITS()", 436 v == "ESIZE_128_BITS()", 437 v == "NELEM_GPR_WRITER_STORE()", 438 v == "NELEM_GPR_WRITER_STORE_BYTE()", 439 v == "NELEM_GPR_WRITER_STORE_WORD()", 440 v == "NELEM_GPR_WRITER_LDOP_Q()", 441 v == "NELEM_GPR_WRITER_LDOP_D()", 442 v == "NELEM_GPR_READER()", 443 v == "NELEM_GPR_READER_BYTE()", 444 v == "NELEM_GPR_READER_WORD()", 445 v == "NELEM_GSCAT()", 446 v == "NELEM_HALF()", 447 v == "NELEM_FULL()", 448 v == "NELEM_FULLMEM()", 449 v == "NELEM_QUARTERMEM()", 450 v == "NELEM_EIGHTHMEM()", 451 v == "NELEM_HALFMEM()", 452 v == "NELEM_MEM128()", 453 v == "NELEM_SCALAR()", 454 v == "NELEM_TUPLE1()", 455 v == "NELEM_TUPLE2()", 456 v == "NELEM_TUPLE4()", 457 v == "NELEM_TUPLE8()", 458 v == "NELEM_TUPLE1_4X()", 459 v == "NELEM_TUPLE1_BYTE()", 460 v == "NELEM_TUPLE1_WORD()", 461 v == "NELEM_MOVDDUP()", 462 v == "UISA_VMODRM_XMM()", 463 v == "UISA_VMODRM_YMM()", 464 v == "UISA_VMODRM_ZMM()", 465 v == "MASK=0", 466 v == "FIX_ROUND_LEN128()", 467 v == "FIX_ROUND_LEN512()", 468 v == "AVX512_ROUND()", 469 v == "ZEROING=0", 470 v == "SAE()", 471 v == "VL512", // VL=2 472 v == "not_refining_f3", 473 v == "EVEXRR_ONE", 474 v == "CET=0", 475 v == "CET=1", 476 v == "WBNOINVD=0", 477 v == "WBNOINVD=1", 478 v == "CLDEMOTE=0", 479 v == "CLDEMOTE=1", 480 strings.HasPrefix(v, "MODEP5="): 481 default: 482 return errSkip(fmt.Sprintf("unknown pattern %v", v)) 483 } 484 } 485 if insn.Modrm { 486 switch insn.Mod { 487 case -3, -1, 0, 1, 2, 3: 488 default: 489 return fmt.Errorf("bad MOD value: %v", insn.Mod) 490 } 491 if insn.Reg < -1 || insn.Reg > 7 { 492 return fmt.Errorf("bad REG value: %v", insn.Mod) 493 } 494 if insn.Rm < -1 || insn.Rm > 7 { 495 return fmt.Errorf("bad RM value: %v", insn.Mod) 496 } 497 } 498 if insn.Imm != 0 && len(insn.Suffix) != 0 { 499 return fmt.Errorf("both immediate and suffix opcode") 500 } 501 if insn.Mode == 0 { 502 return errSkip("no modes for instruction") 503 } 504 return nil 505 } 506 507 func parseOperands(insn *x86.Insn, vals []string) error { 508 for _, v := range vals { 509 switch v { 510 case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w": 511 if insn.Reg != -1 { 512 return fmt.Errorf("REG=SEG() operand, but fixed reg") 513 } 514 insn.Reg = -6 515 case "REG0=CR_R():w", "REG1=CR_R():r": 516 if insn.Reg != -1 { 517 return fmt.Errorf("REG=CR_R() operand, but fixed reg") 518 } 519 insn.Reg = -8 520 insn.NoSibDisp = true 521 case "REG0=DR_R():w", "REG1=DR_R():r": 522 insn.NoSibDisp = true 523 case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int": 524 insn.Mem16 = true 525 case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int": 526 insn.Mem32 = true 527 } 528 } 529 return nil 530 } 531 532 func parseModrm(v string) (int8, error) { 533 if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' { 534 return 0, fmt.Errorf("malformed") 535 } 536 if v == "[mm]" || v == "[rrr]" || v == "[nnn]" { 537 return -1, nil 538 } 539 if !strings.HasPrefix(v, "[0b") { 540 return 0, fmt.Errorf("malformed") 541 } 542 var vv int8 543 for i := 3; i < len(v)-1; i++ { 544 if v[i] != '0' && v[i] != '1' { 545 return 0, fmt.Errorf("malformed") 546 } 547 vv *= 2 548 if v[i] == '1' { 549 vv++ 550 } 551 } 552 return vv, nil 553 } 554 555 func addImm(insn *x86.Insn, imm int8) { 556 if insn.Imm == 0 { 557 insn.Imm = imm 558 return 559 } 560 if insn.Imm2 == 0 { 561 insn.Imm2 = imm 562 return 563 } 564 panic("too many immediates") 565 }