github.com/tencent/goom@v1.0.1/internal/arch/x86asm/decode.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Table-driven decoding of x86 instructions. 6 7 package x86asm 8 9 import ( 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 ) 15 16 // Set trace to true to cause the decoder to print the PC sequence 17 // of the executed instruction codes. This is typically only useful 18 // when you are running a test of a single input case. 19 const trace = false 20 21 // A decodeOp is a single instruction in the decoder bytecode program. 22 // 23 // The decodeOps correspond to consuming and conditionally branching 24 // on input bytes, consuming additional fields, and then interpreting 25 // consumed data as instruction arguments. The names of the xRead and xArg 26 // operations are taken from the Intel manual conventions, for example 27 // Volume 2, Section 3.1.1, page 487 of 28 // http://www.intel.com/content/dam/www/public/us/en/documents 29 // /manuals/64-ia-32-architectures-software-developer-manual-325462.pdf 30 // 31 // The actual decoding program is generated by ../x86map. 32 // 33 // TODO(rsc): We may be able to merge various of the memory operands 34 // since we don't care about, say, the distinction between m80dec and m80bcd. 35 // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1. 36 37 type decodeOp uint16 38 39 const ( 40 xFail decodeOp = iota // invalid instruction (return) 41 xMatch // completed match 42 xJump // jump to pc 43 44 xCondByte // switch on instruction byte value 45 xCondSlashR // read and switch on instruction /r value 46 xCondPrefix // switch on presence of instruction prefix 47 xCondIs64 // switch on 64-bit processor mode 48 xCondDataSize // switch on operand size 49 xCondAddrSize // switch on address size 50 xCondIsMem // switch on memory vs register argument 51 52 xSetOp // set instruction opcode 53 54 xReadSlashR // read /r 55 xReadIb // read ib 56 xReadIw // read iw 57 xReadID // read id 58 xReadIo // read io 59 xReadCb // read cb 60 xReadCw // read cw 61 xReadCd // read cd 62 xReadCp // read cp 63 xReadCm // read cm 64 65 xArg1 // arg 1 66 xArg3 // arg 3 67 xArgAL // arg AL 68 xArgAX // arg AX 69 xArgCL // arg CL 70 xArgCR0dashCR7 // arg CR0-CR7 71 xArgCS // arg CS 72 xArgDR0dashDR7 // arg DR0-DR7 73 xArgDS // arg DS 74 xArgDX // arg DX 75 xArgEAX // arg EAX 76 xArgEDX // arg EDX 77 xArgES // arg ES 78 xArgFS // arg FS 79 xArgGS // arg GS 80 xArgImm16 // arg imm16 81 xArgImm32 // arg imm32 82 xArgImm64 // arg imm64 83 xArgImm8 // arg imm8 84 xArgImm8u // arg imm8 but record as unsigned 85 xArgImm16u // arg imm8 but record as unsigned 86 xArgM // arg m 87 xArgM128 // arg m128 88 xArgM256 // arg m256 89 xArgM1428byte // arg m14/28byte 90 xArgM16 // arg m16 91 xArgM16and16 // arg m16&16 92 xArgM16and32 // arg m16&32 93 xArgM16and64 // arg m16&64 94 xArgM16colon16 // arg m16:16 95 xArgM16colon32 // arg m16:32 96 xArgM16colon64 // arg m16:64 97 xArgM16int // arg m16int 98 xArgM2byte // arg m2byte 99 xArgM32 // arg m32 100 xArgM32and32 // arg m32&32 101 xArgM32fp // arg m32fp 102 xArgM32int // arg m32int 103 xArgM512byte // arg m512byte 104 xArgM64 // arg m64 105 xArgM64fp // arg m64fp 106 xArgM64int // arg m64int 107 xArgM8 // arg m8 108 xArgM80bcd // arg m80bcd 109 xArgM80dec // arg m80dec 110 xArgM80fp // arg m80fp 111 xArgM94108byte // arg m94/108byte 112 xArgMm // arg mm 113 xArgMm1 // arg mm1 114 xArgMm2 // arg mm2 115 xArgMm2M64 // arg mm2/m64 116 xArgMmM32 // arg mm/m32 117 xArgMmM64 // arg mm/m64 118 xArgMem // arg mem 119 xArgMoffs16 // arg moffs16 120 xArgMoffs32 // arg moffs32 121 xArgMoffs64 // arg moffs64 122 xArgMoffs8 // arg moffs8 123 xArgPtr16colon16 // arg ptr16:16 124 xArgPtr16colon32 // arg ptr16:32 125 xArgR16 // arg r16 126 xArgR16op // arg r16 with +rw in opcode 127 xArgR32 // arg r32 128 xArgR32M16 // arg r32/m16 129 xArgR32M8 // arg r32/m8 130 xArgR32op // arg r32 with +rd in opcode 131 xArgR64 // arg r64 132 xArgR64M16 // arg r64/m16 133 xArgR64op // arg r64 with +rd in opcode 134 xArgR8 // arg r8 135 xArgR8op // arg r8 with +rb in opcode 136 xArgRAX // arg RAX 137 xArgRDX // arg RDX 138 xArgRM16 // arg r/m16 139 xArgRM32 // arg r/m32 140 xArgRM64 // arg r/m64 141 xArgRM8 // arg r/m8 142 xArgRel16 // arg rel16 143 xArgRel32 // arg rel32 144 xArgRel8 // arg rel8 145 xArgSS // arg SS 146 xArgST // arg ST, aka ST(0) 147 xArgSTi // arg ST(i) with +i in opcode 148 xArgSreg // arg Sreg 149 xArgTR0dashTR7 // arg TR0-TR7 150 xArgXmm // arg xmm 151 xArgXMM0 // arg <XMM0> 152 xArgXmm1 // arg xmm1 153 xArgXmm2 // arg xmm2 154 xArgXmm2M128 // arg xmm2/m128 155 xArgYmm2M256 // arg ymm2/m256 156 xArgXmm2M16 // arg xmm2/m16 157 xArgXmm2M32 // arg xmm2/m32 158 xArgXmm2M64 // arg xmm2/m64 159 xArgXmmM128 // arg xmm/m128 160 xArgXmmM32 // arg xmm/m32 161 xArgXmmM64 // arg xmm/m64 162 xArgYmm1 // arg ymm1 163 xArgRmf16 // arg r/m16 but force mod=3 164 xArgRmf32 // arg r/m32 but force mod=3 165 xArgRmf64 // arg r/m64 but force mod=3 166 ) 167 168 // instPrefix returns an Inst describing just one prefix byte. 169 // It is only used if there is a prefix followed by an unintelligible 170 // or invalid instruction byte sequence. 171 func instPrefix(b byte, mode int) (Inst, error) { 172 // When tracing it is useful to see what called instPrefix to report an error. 173 if trace { 174 _, file, line, _ := runtime.Caller(1) 175 fmt.Printf("%s:%d\n", file, line) 176 } 177 178 p := Prefix(b) 179 switch p { 180 case PrefixDataSize: 181 if mode == 16 { 182 p = PrefixData32 183 } else { 184 p = PrefixData16 185 } 186 case PrefixAddrSize: 187 if mode == 32 { 188 p = PrefixAddr16 189 } else { 190 p = PrefixAddr32 191 } 192 } 193 // Note: using composite literal with Prefix key confuses 'bundle' tool. 194 inst := Inst{Len: 1} 195 inst.Prefix = Prefixes{p} 196 197 return inst, nil 198 } 199 200 // truncated reports a truncated instruction. 201 // For now we use instPrefix but perhaps later we will return 202 // a specific error here. 203 func truncated(src []byte, mode int) (Inst, error) { 204 if len(src) == 0 { 205 return Inst{}, ErrTruncated 206 } 207 208 return instPrefix(src[0], mode) // too long 209 } 210 211 // These are the errors returned by Decode. 212 var ( 213 ErrInvalidMode = errors.New("invalid x86 mode in Decode") 214 ErrTruncated = errors.New("truncated instruction") 215 ErrUnrecognized = errors.New("unrecognized instruction") 216 ) 217 218 // decoderCover records coverage information for which parts 219 // of the byte code have been executed. 220 var decoderCover []bool 221 222 // Decode decodes the leading bytes in src as a single instruction. 223 // The mode arguments specifies the assumed processor mode: 224 // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes. 225 func Decode(src []byte, mode int) (inst Inst, err error) { 226 return decode1(src, mode, false) 227 } 228 229 // decode1 is the implementation of Decode but takes an extra 230 // gnuCompat flag to cause it to change its behavior to mimic 231 // bugs (or at least unique features) of GNU libopcodes as used 232 // by objdump. We don't believe that logic is the right thing to do 233 // in general, but when testing against libopcodes it simplifies the 234 // comparison if we adjust a few small pieces of logic. 235 // The affected logic is in the conditional branch for "mandatory" prefixes, 236 // case xCondPrefix. 237 // nolint 238 func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) { 239 switch mode { 240 case 16, 32, 64: 241 // ok 242 // TODO(rsc): 64-bit mode not tested, probably not working. 243 default: 244 return Inst{}, ErrInvalidMode 245 } 246 247 // Maximum instruction size is 15 bytes. 248 // If we need to read more, return 'truncated instruction. 249 if len(src) > 15 { 250 src = src[:15] 251 } 252 253 var ( 254 // prefix decoding information 255 pos = 0 // position reading src 256 nprefix = 0 // number of prefixes 257 lockIndex = -1 // index of LOCK prefix in src and inst.Prefix 258 repIndex = -1 // index of REP/REPN prefix in src and inst.Prefix 259 segIndex = -1 // index of Group 2 prefix in src and inst.Prefix 260 dataSizeIndex = -1 // index of Group 3 prefix in src and inst.Prefix 261 addrSizeIndex = -1 // index of Group 4 prefix in src and inst.Prefix 262 rex Prefix // rex byte if present (or 0) 263 rexUsed Prefix // bits used in rex byte 264 rexIndex = -1 // index of rex byte 265 vex Prefix // use vex encoding 266 vexIndex = -1 // index of vex prefix 267 268 addrMode = mode // address mode (width in bits) 269 dataMode = mode // operand mode (width in bits) 270 271 // decoded ModR/M fields 272 haveModrm bool 273 modrm int 274 mod int 275 regop int 276 rm int 277 278 // if ModR/M is memory reference, Mem form 279 mem Mem 280 haveMem bool 281 282 // decoded SIB fields 283 haveSIB bool 284 sib int 285 scale int 286 index int 287 base int 288 displen int 289 dispoff int 290 291 // decoded immediate values 292 imm int64 293 imm8 int8 294 immc int64 295 immcpos int 296 297 // output 298 opshift int 299 inst Inst 300 narg int // number of arguments written to inst 301 ) 302 303 if mode == 64 { 304 dataMode = 32 305 } 306 307 // Read non-REX prefixes. 308 ReadPrefixes: 309 for ; pos < len(src); pos++ { 310 p := Prefix(src[pos]) 311 switch p { 312 default: 313 nprefix = pos 314 break ReadPrefixes 315 316 // Group 1 - lock and repeat prefixes 317 // According to Intel, there should only be one from this set, 318 // but according to AMD both can be present. 319 case 0xF0: 320 if lockIndex >= 0 { 321 inst.Prefix[lockIndex] |= PrefixIgnored 322 } 323 lockIndex = pos 324 case 0xF2, 0xF3: 325 if repIndex >= 0 { 326 inst.Prefix[repIndex] |= PrefixIgnored 327 } 328 repIndex = pos 329 330 // Group 2 - segment override / branch hints 331 case 0x26, 0x2E, 0x36, 0x3E: 332 if mode == 64 { 333 p |= PrefixIgnored 334 break 335 } 336 fallthrough 337 case 0x64, 0x65: 338 if segIndex >= 0 { 339 inst.Prefix[segIndex] |= PrefixIgnored 340 } 341 segIndex = pos 342 343 // Group 3 - operand size override 344 case 0x66: 345 if mode == 16 { 346 dataMode = 32 347 p = PrefixData32 348 } else { 349 dataMode = 16 350 p = PrefixData16 351 } 352 if dataSizeIndex >= 0 { 353 inst.Prefix[dataSizeIndex] |= PrefixIgnored 354 } 355 dataSizeIndex = pos 356 357 // Group 4 - address size override 358 case 0x67: 359 if mode == 32 { 360 addrMode = 16 361 p = PrefixAddr16 362 } else { 363 addrMode = 32 364 p = PrefixAddr32 365 } 366 if addrSizeIndex >= 0 { 367 inst.Prefix[addrSizeIndex] |= PrefixIgnored 368 } 369 addrSizeIndex = pos 370 371 //Group 5 - Vex encoding 372 case 0xC5: 373 if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) { 374 vex = p 375 vexIndex = pos 376 inst.Prefix[pos] = p 377 inst.Prefix[pos+1] = Prefix(src[pos+1]) 378 pos++ 379 continue 380 } else { 381 nprefix = pos 382 break ReadPrefixes 383 } 384 case 0xC4: 385 if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) { 386 vex = p 387 vexIndex = pos 388 inst.Prefix[pos] = p 389 inst.Prefix[pos+1] = Prefix(src[pos+1]) 390 inst.Prefix[pos+2] = Prefix(src[pos+2]) 391 pos += 2 392 continue 393 } else { 394 nprefix = pos 395 break ReadPrefixes 396 } 397 } 398 399 if pos >= len(inst.Prefix) { 400 return instPrefix(src[0], mode) // too long 401 } 402 403 inst.Prefix[pos] = p 404 } 405 406 // Read REX prefix. 407 if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 { 408 rex = Prefix(src[pos]) 409 410 rexIndex = pos 411 412 if pos >= len(inst.Prefix) { 413 return instPrefix(src[0], mode) // too long 414 } 415 416 inst.Prefix[pos] = rex 417 418 pos++ 419 420 if rex&PrefixREXW != 0 { 421 dataMode = 64 422 423 if dataSizeIndex >= 0 { 424 inst.Prefix[dataSizeIndex] |= PrefixIgnored 425 } 426 } 427 } 428 429 // Decode instruction stream, interpreting decoding instructions. 430 // opshift gives the shift to use when saving the next 431 // opcode byte into inst.Opcode. 432 opshift = 24 433 434 // Decode loop, executing decoder program. 435 var oldPC, prevPC int 436 Decode: 437 for pc := 1; ; { // TODO uint 438 oldPC = prevPC 439 prevPC = pc 440 if trace { 441 println("run", pc) 442 } 443 x := decoder[pc] 444 if decoderCover != nil { 445 decoderCover[pc] = true 446 } 447 pc++ 448 449 // Read and decode ModR/M if needed by opcode. 450 switch decodeOp(x) { 451 case xCondSlashR, xReadSlashR: 452 if haveModrm { 453 return Inst{Len: pos}, errInternal 454 } 455 haveModrm = true 456 if pos >= len(src) { 457 return truncated(src, mode) 458 } 459 modrm = int(src[pos]) 460 pos++ 461 if opshift >= 0 { 462 inst.Opcode |= uint32(modrm) << uint(opshift) 463 opshift -= 8 464 } 465 mod = modrm >> 6 466 regop = (modrm >> 3) & 07 467 rm = modrm & 07 468 if rex&PrefixREXR != 0 { 469 rexUsed |= PrefixREXR 470 regop |= 8 471 } 472 if addrMode == 16 { 473 // 16-bit modrm form 474 if mod != 3 { 475 haveMem = true 476 mem = addr16[rm] 477 if rm == 6 && mod == 0 { 478 mem.Base = 0 479 } 480 481 // Consume disp16 if present. 482 if mod == 0 && rm == 6 || mod == 2 { 483 if pos+2 > len(src) { 484 return truncated(src, mode) 485 } 486 mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:])) 487 pos += 2 488 } 489 490 // Consume disp8 if present. 491 if mod == 1 { 492 if pos >= len(src) { 493 return truncated(src, mode) 494 } 495 mem.Disp = int64(int8(src[pos])) 496 pos++ 497 } 498 } 499 } else { 500 haveMem = mod != 3 501 502 // 32-bit or 64-bit form 503 // Consume SIB encoding if present. 504 if rm == 4 && mod != 3 { 505 haveSIB = true 506 if pos >= len(src) { 507 return truncated(src, mode) 508 } 509 sib = int(src[pos]) 510 pos++ 511 if opshift >= 0 { 512 inst.Opcode |= uint32(sib) << uint(opshift) 513 opshift -= 8 514 } 515 scale = sib >> 6 516 index = (sib >> 3) & 07 517 base = sib & 07 518 if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 { 519 rexUsed |= PrefixREXB 520 base |= 8 521 } 522 if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 { 523 rexUsed |= PrefixREXX 524 index |= 8 525 } 526 527 mem.Scale = 1 << uint(scale) 528 if index == 4 { 529 // no mem.Index 530 } else { 531 mem.Index = baseRegForBits(addrMode) + Reg(index) 532 } 533 if base&7 == 5 && mod == 0 { 534 // no mem.Base 535 } else { 536 mem.Base = baseRegForBits(addrMode) + Reg(base) 537 } 538 } else { 539 if rex&PrefixREXB != 0 { 540 rexUsed |= PrefixREXB 541 rm |= 8 542 } 543 if mod == 0 && rm&7 == 5 || rm&7 == 4 { 544 // base omitted 545 } else if mod != 3 { 546 mem.Base = baseRegForBits(addrMode) + Reg(rm) 547 } 548 } 549 550 // Consume disp32 if present. 551 if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 { 552 if pos+4 > len(src) { 553 return truncated(src, mode) 554 } 555 dispoff = pos 556 displen = 4 557 mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:])) 558 pos += 4 559 } 560 561 // Consume disp8 if present. 562 if mod == 1 { 563 if pos >= len(src) { 564 return truncated(src, mode) 565 } 566 dispoff = pos 567 displen = 1 568 mem.Disp = int64(int8(src[pos])) 569 pos++ 570 } 571 572 // In 64-bit, mod=0 rm=5 is PC-relative instead of just disp. 573 // See Vol 2A. Table 2-7. 574 if mode == 64 && mod == 0 && rm&7 == 5 { 575 if addrMode == 32 { 576 mem.Base = EIP 577 } else { 578 mem.Base = RIP 579 } 580 } 581 } 582 583 if segIndex >= 0 { 584 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 585 } 586 } 587 588 // Execute single opcode. 589 switch decodeOp(x) { 590 default: 591 println("bad op", x, "at", pc-1, "from", oldPC) 592 return Inst{Len: pos}, errInternal 593 594 case xFail: 595 inst.Op = 0 596 break Decode 597 598 case xMatch: 599 break Decode 600 601 case xJump: 602 pc = int(decoder[pc]) 603 604 // Conditional branches. 605 606 case xCondByte: 607 if pos >= len(src) { 608 return truncated(src, mode) 609 } 610 b := src[pos] 611 n := int(decoder[pc]) 612 pc++ 613 for i := 0; i < n; i++ { 614 xb, xpc := decoder[pc], int(decoder[pc+1]) 615 pc += 2 616 if b == byte(xb) { 617 pc = xpc 618 pos++ 619 if opshift >= 0 { 620 inst.Opcode |= uint32(b) << uint(opshift) 621 opshift -= 8 622 } 623 continue Decode 624 } 625 } 626 // xCondByte is the only conditional with a fall through, 627 // so that it can be used to pick off special cases before 628 // an xCondSlash. If the fallthrough instruction is xFail, 629 // advance the position so that the decoded instruction 630 // size includes the byte we just compared against. 631 if decodeOp(decoder[pc]) == xJump { 632 pc = int(decoder[pc+1]) 633 } 634 if decodeOp(decoder[pc]) == xFail { 635 pos++ 636 } 637 638 case xCondIs64: 639 if mode == 64 { 640 pc = int(decoder[pc+1]) 641 } else { 642 pc = int(decoder[pc]) 643 } 644 645 case xCondIsMem: 646 mem := haveMem 647 if !haveModrm { 648 if pos >= len(src) { 649 return instPrefix(src[0], mode) // too long 650 } 651 mem = src[pos]>>6 != 3 652 } 653 if mem { 654 pc = int(decoder[pc+1]) 655 } else { 656 pc = int(decoder[pc]) 657 } 658 659 case xCondDataSize: 660 switch dataMode { 661 case 16: 662 if dataSizeIndex >= 0 { 663 inst.Prefix[dataSizeIndex] |= PrefixImplicit 664 } 665 pc = int(decoder[pc]) 666 case 32: 667 if dataSizeIndex >= 0 { 668 inst.Prefix[dataSizeIndex] |= PrefixImplicit 669 } 670 pc = int(decoder[pc+1]) 671 case 64: 672 rexUsed |= PrefixREXW 673 pc = int(decoder[pc+2]) 674 } 675 676 case xCondAddrSize: 677 switch addrMode { 678 case 16: 679 if addrSizeIndex >= 0 { 680 inst.Prefix[addrSizeIndex] |= PrefixImplicit 681 } 682 pc = int(decoder[pc]) 683 case 32: 684 if addrSizeIndex >= 0 { 685 inst.Prefix[addrSizeIndex] |= PrefixImplicit 686 } 687 pc = int(decoder[pc+1]) 688 case 64: 689 pc = int(decoder[pc+2]) 690 } 691 692 case xCondPrefix: 693 // Conditional branch based on presence or absence of prefixes. 694 // The conflict cases here are completely undocumented and 695 // differ significantly between GNU libopcodes and Intel xed. 696 // I have not written assembly code to divine what various CPUs 697 // do, but it wouldn't surprise me if they are not consistent either. 698 // 699 // The basic idea is to switch on the presence of a prefix, so that 700 // for example: 701 // 702 // xCondPrefix, 4 703 // 0xF3, 123, 704 // 0xF2, 234, 705 // 0x66, 345, 706 // 0, 456 707 // 708 // branch to 123 if the F3 prefix is present, 234 if the F2 prefix 709 // is present, 66 if the 345 prefix is present, and 456 otherwise. 710 // The prefixes are given in descending order so that the 0 will be last. 711 // 712 // It is unclear what should happen if multiple conditions are 713 // satisfied: what if F2 and F3 are both present, or if 66 and F2 714 // are present, or if all three are present? The one chosen becomes 715 // part of the opcode and the others do not. Perhaps the answer 716 // depends on the specific opcodes in question. 717 // 718 // The only clear example is that CRC32 is F2 0F 38 F1 /r, and 719 // it comes in 16-bit and 32-bit forms based on the 66 prefix, 720 // so 66 F2 0F 38 F1 /r should be treated as F2 taking priority, 721 // with the 66 being only an operand size override, and probably 722 // F2 66 0F 38 F1 /r should be treated the same. 723 // Perhaps that rule is specific to the case of CRC32, since no 724 // 66 0F 38 F1 instruction is defined (today) (that we know of). 725 // However, both libopcodes and xed seem to generalize this 726 // example and choose F2/F3 in preference to 66, and we 727 // do the same. 728 // 729 // Next, what if both F2 and F3 are present? Which wins? 730 // The Intel xed rule, and ours, is that the one that occurs last wins. 731 // The GNU libopcodes rule, which we implement only in gnuCompat mode, 732 // is that F3 beats F2 unless F3 has no special meaning, in which 733 // case F3 can be a modified on an F2 special meaning. 734 // 735 // Concretely, 736 // 66 0F D6 /r is MOVQ 737 // F2 0F D6 /r is MOVDQ2Q 738 // F3 0F D6 /r is MOVQ2DQ. 739 // 740 // F2 66 0F D6 /r is 66 + MOVDQ2Q always. 741 // 66 F2 0F D6 /r is 66 + MOVDQ2Q always. 742 // F3 66 0F D6 /r is 66 + MOVQ2DQ always. 743 // 66 F3 0F D6 /r is 66 + MOVQ2DQ always. 744 // F2 F3 0F D6 /r is F2 + MOVQ2DQ always. 745 // F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes. 746 // Adding 66 anywhere in the prefix section of the 747 // last two cases does not change the outcome. 748 // 749 // Finally, what if there is a variant in which 66 is a mandatory 750 // prefix rather than an operand size override, but we know of 751 // no corresponding F2/F3 form, and we see both F2/F3 and 66. 752 // Does F2/F3 still take priority, so that the result is an unknown 753 // instruction, or does the 66 take priority, so that the extended 754 // 66 instruction should be interpreted as having a REP/REPN prefix? 755 // Intel xed does the former and GNU libopcodes does the latter. 756 // We side with Intel xed, unless we are trying to match libopcodes 757 // more closely during the comparison-based test suite. 758 // 759 // In 64-bit mode REX.W is another valid prefix to test for, but 760 // there is less ambiguity about that. When present, REX.W is 761 // always the first entry in the table. 762 n := int(decoder[pc]) 763 pc++ 764 sawF3 := false 765 for j := 0; j < n; j++ { 766 prefix := Prefix(decoder[pc+2*j]) 767 if prefix.IsREX() { 768 rexUsed |= prefix 769 if rex&prefix == prefix { 770 pc = int(decoder[pc+2*j+1]) 771 continue Decode 772 } 773 continue 774 } 775 ok := false 776 if prefix == 0 { 777 ok = true 778 } else if prefix.IsREX() { 779 rexUsed |= prefix 780 if rex&prefix == prefix { 781 ok = true 782 } 783 } else if prefix == 0xC5 || prefix == 0xC4 { 784 if vex == prefix { 785 ok = true 786 } 787 } else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A || 788 prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) { 789 var vexM, vexP Prefix 790 if vex == 0xC5 { 791 vexM = 1 // 2 byte vex always implies 0F 792 vexP = inst.Prefix[vexIndex+1] 793 } else { 794 vexM = inst.Prefix[vexIndex+1] 795 vexP = inst.Prefix[vexIndex+2] 796 } 797 switch prefix { 798 case 0x66: 799 ok = vexP&3 == 1 800 case 0xF3: 801 ok = vexP&3 == 2 802 case 0xF2: 803 ok = vexP&3 == 3 804 case 0x0F: 805 ok = vexM&3 == 1 806 case 0x0F38: 807 ok = vexM&3 == 2 808 case 0x0F3A: 809 ok = vexM&3 == 3 810 } 811 } else { 812 if prefix == 0xF3 { 813 sawF3 = true 814 } 815 switch prefix { 816 case PrefixLOCK: 817 if lockIndex >= 0 { 818 inst.Prefix[lockIndex] |= PrefixImplicit 819 ok = true 820 } 821 case PrefixREP, PrefixREPN: 822 if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix { 823 inst.Prefix[repIndex] |= PrefixImplicit 824 ok = true 825 } 826 if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) { 827 // Check to see if earlier prefix F3 is present. 828 for i := repIndex - 1; i >= 0; i-- { 829 if inst.Prefix[i]&0xFF == prefix { 830 inst.Prefix[i] |= PrefixImplicit 831 ok = true 832 } 833 } 834 } 835 if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 { 836 // Check to see if earlier prefix F2 is present. 837 for i := repIndex - 1; i >= 0; i-- { 838 if inst.Prefix[i]&0xFF == prefix { 839 inst.Prefix[i] |= PrefixImplicit 840 ok = true 841 } 842 } 843 } 844 case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS: 845 if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix { 846 inst.Prefix[segIndex] |= PrefixImplicit 847 ok = true 848 } 849 case PrefixDataSize: 850 // Looking for 66 mandatory prefix. 851 // The F2/F3 mandatory prefixes take priority when both are present. 852 // If we got this far in the xCondPrefix table and an F2/F3 is present, 853 // it means the table didn't have any entry for that prefix. But if 66 has 854 // special meaning, perhaps F2/F3 have special meaning that we don't know. 855 // Intel xed works this way, treating the F2/F3 as inhibiting the 66. 856 // GNU libopcodes allows the 66 to match. We do what Intel xed does 857 // except in gnuCompat mode. 858 if repIndex >= 0 && !gnuCompat { 859 inst.Op = 0 860 break Decode 861 } 862 if dataSizeIndex >= 0 { 863 inst.Prefix[dataSizeIndex] |= PrefixImplicit 864 ok = true 865 } 866 case PrefixAddrSize: 867 if addrSizeIndex >= 0 { 868 inst.Prefix[addrSizeIndex] |= PrefixImplicit 869 ok = true 870 } 871 } 872 } 873 if ok { 874 pc = int(decoder[pc+2*j+1]) 875 continue Decode 876 } 877 } 878 inst.Op = 0 879 break Decode 880 881 case xCondSlashR: 882 pc = int(decoder[pc+regop&7]) 883 884 // Input. 885 886 case xReadSlashR: 887 // done above 888 889 case xReadIb: 890 if pos >= len(src) { 891 return truncated(src, mode) 892 } 893 imm8 = int8(src[pos]) 894 pos++ 895 896 case xReadIw: 897 if pos+2 > len(src) { 898 return truncated(src, mode) 899 } 900 imm = int64(binary.LittleEndian.Uint16(src[pos:])) 901 pos += 2 902 903 case xReadID: 904 if pos+4 > len(src) { 905 return truncated(src, mode) 906 } 907 imm = int64(binary.LittleEndian.Uint32(src[pos:])) 908 pos += 4 909 910 case xReadIo: 911 if pos+8 > len(src) { 912 return truncated(src, mode) 913 } 914 imm = int64(binary.LittleEndian.Uint64(src[pos:])) 915 pos += 8 916 917 case xReadCb: 918 if pos >= len(src) { 919 return truncated(src, mode) 920 } 921 immcpos = pos 922 immc = int64(src[pos]) 923 pos++ 924 925 case xReadCw: 926 if pos+2 > len(src) { 927 return truncated(src, mode) 928 } 929 immcpos = pos 930 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 931 pos += 2 932 933 case xReadCm: 934 immcpos = pos 935 if addrMode == 16 { 936 if pos+2 > len(src) { 937 return truncated(src, mode) 938 } 939 immc = int64(binary.LittleEndian.Uint16(src[pos:])) 940 pos += 2 941 } else if addrMode == 32 { 942 if pos+4 > len(src) { 943 return truncated(src, mode) 944 } 945 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 946 pos += 4 947 } else { 948 if pos+8 > len(src) { 949 return truncated(src, mode) 950 } 951 immc = int64(binary.LittleEndian.Uint64(src[pos:])) 952 pos += 8 953 } 954 case xReadCd: 955 immcpos = pos 956 if pos+4 > len(src) { 957 return truncated(src, mode) 958 } 959 immc = int64(binary.LittleEndian.Uint32(src[pos:])) 960 pos += 4 961 962 case xReadCp: 963 immcpos = pos 964 if pos+6 > len(src) { 965 return truncated(src, mode) 966 } 967 w := binary.LittleEndian.Uint32(src[pos:]) 968 w2 := binary.LittleEndian.Uint16(src[pos+4:]) 969 immc = int64(w2)<<32 | int64(w) 970 pos += 6 971 972 // Output. 973 974 case xSetOp: 975 inst.Op = Op(decoder[pc]) 976 pc++ 977 978 case xArg1, 979 xArg3, 980 xArgAL, 981 xArgAX, 982 xArgCL, 983 xArgCS, 984 xArgDS, 985 xArgDX, 986 xArgEAX, 987 xArgEDX, 988 xArgES, 989 xArgFS, 990 xArgGS, 991 xArgRAX, 992 xArgRDX, 993 xArgSS, 994 xArgST, 995 xArgXMM0: 996 inst.Args[narg] = fixedArg[x] 997 narg++ 998 999 case xArgImm8: 1000 inst.Args[narg] = Imm(imm8) 1001 narg++ 1002 1003 case xArgImm8u: 1004 inst.Args[narg] = Imm(uint8(imm8)) 1005 narg++ 1006 1007 case xArgImm16: 1008 inst.Args[narg] = Imm(int16(imm)) 1009 narg++ 1010 1011 case xArgImm16u: 1012 inst.Args[narg] = Imm(uint16(imm)) 1013 narg++ 1014 1015 case xArgImm32: 1016 inst.Args[narg] = Imm(int32(imm)) 1017 narg++ 1018 1019 case xArgImm64: 1020 inst.Args[narg] = Imm(imm) 1021 narg++ 1022 1023 case xArgM, 1024 xArgM128, 1025 xArgM256, 1026 xArgM1428byte, 1027 xArgM16, 1028 xArgM16and16, 1029 xArgM16and32, 1030 xArgM16and64, 1031 xArgM16colon16, 1032 xArgM16colon32, 1033 xArgM16colon64, 1034 xArgM16int, 1035 xArgM2byte, 1036 xArgM32, 1037 xArgM32and32, 1038 xArgM32fp, 1039 xArgM32int, 1040 xArgM512byte, 1041 xArgM64, 1042 xArgM64fp, 1043 xArgM64int, 1044 xArgM8, 1045 xArgM80bcd, 1046 xArgM80dec, 1047 xArgM80fp, 1048 xArgM94108byte, 1049 xArgMem: 1050 if !haveMem { 1051 inst.Op = 0 1052 break Decode 1053 } 1054 inst.Args[narg] = mem 1055 inst.MemBytes = int(memBytes[decodeOp(x)]) 1056 if mem.Base == RIP { 1057 inst.PCRel = displen 1058 inst.PCRelOff = dispoff 1059 } 1060 narg++ 1061 1062 case xArgPtr16colon16: 1063 inst.Args[narg] = Imm(immc >> 16) 1064 inst.Args[narg+1] = Imm(immc & (1<<16 - 1)) 1065 narg += 2 1066 1067 case xArgPtr16colon32: 1068 inst.Args[narg] = Imm(immc >> 32) 1069 inst.Args[narg+1] = Imm(immc & (1<<32 - 1)) 1070 narg += 2 1071 1072 case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64: 1073 // TODO(rsc): Can address be 64 bits? 1074 mem = Mem{Disp: int64(immc)} 1075 if segIndex >= 0 { 1076 mem.Segment = prefixToSegment(inst.Prefix[segIndex]) 1077 inst.Prefix[segIndex] |= PrefixImplicit 1078 } 1079 inst.Args[narg] = mem 1080 inst.MemBytes = int(memBytes[decodeOp(x)]) 1081 if mem.Base == RIP { 1082 inst.PCRel = displen 1083 inst.PCRelOff = dispoff 1084 } 1085 narg++ 1086 1087 case xArgYmm1: 1088 base := baseReg[x] 1089 index := Reg(regop) 1090 if inst.Prefix[vexIndex+1]&0x80 == 0 { 1091 index += 8 1092 } 1093 inst.Args[narg] = base + index 1094 narg++ 1095 1096 case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7: 1097 base := baseReg[x] 1098 index := Reg(regop) 1099 if rex != 0 && base == AL && index >= 4 { 1100 rexUsed |= PrefixREX 1101 index -= 4 1102 base = SPB 1103 } 1104 inst.Args[narg] = base + index 1105 narg++ 1106 1107 case xArgMm, xArgMm1, xArgTR0dashTR7: 1108 inst.Args[narg] = baseReg[x] + Reg(regop&7) 1109 narg++ 1110 1111 case xArgCR0dashCR7: 1112 // AMD documents an extension that the LOCK prefix 1113 // can be used in place of a REX prefix in order to access 1114 // CR8 from 32-bit mode. The LOCK prefix is allowed in 1115 // all modes, provided the corresponding CPUID bit is set. 1116 if lockIndex >= 0 { 1117 inst.Prefix[lockIndex] |= PrefixImplicit 1118 regop += 8 1119 } 1120 inst.Args[narg] = CR0 + Reg(regop) 1121 narg++ 1122 1123 case xArgSreg: 1124 regop &= 7 1125 if regop >= 6 { 1126 inst.Op = 0 1127 break Decode 1128 } 1129 inst.Args[narg] = ES + Reg(regop) 1130 narg++ 1131 1132 case xArgRmf16, xArgRmf32, xArgRmf64: 1133 base := baseReg[x] 1134 index := Reg(modrm & 07) 1135 if rex&PrefixREXB != 0 { 1136 rexUsed |= PrefixREXB 1137 index += 8 1138 } 1139 inst.Args[narg] = base + index 1140 narg++ 1141 1142 case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi: 1143 n := inst.Opcode >> uint(opshift+8) & 07 1144 base := baseReg[x] 1145 index := Reg(n) 1146 if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi { 1147 rexUsed |= PrefixREXB 1148 index += 8 1149 } 1150 if rex != 0 && base == AL && index >= 4 { 1151 rexUsed |= PrefixREX 1152 index -= 4 1153 base = SPB 1154 } 1155 inst.Args[narg] = base + index 1156 narg++ 1157 case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16, 1158 xArgMmM32, xArgMmM64, xArgMm2M64, 1159 xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128, 1160 xArgYmm2M256: 1161 if haveMem { 1162 inst.Args[narg] = mem 1163 inst.MemBytes = int(memBytes[decodeOp(x)]) 1164 if mem.Base == RIP { 1165 inst.PCRel = displen 1166 inst.PCRelOff = dispoff 1167 } 1168 } else { 1169 base := baseReg[x] 1170 index := Reg(rm) 1171 switch decodeOp(x) { 1172 case xArgMmM32, xArgMmM64, xArgMm2M64: 1173 // There are only 8 MMX registers, so these ignore the REX.X bit. 1174 index &= 7 1175 case xArgRM8: 1176 if rex != 0 && index >= 4 { 1177 rexUsed |= PrefixREX 1178 index -= 4 1179 base = SPB 1180 } 1181 case xArgYmm2M256: 1182 if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 { 1183 index += 8 1184 } 1185 } 1186 inst.Args[narg] = base + index 1187 } 1188 narg++ 1189 1190 case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1191 if haveMem { 1192 inst.Op = 0 1193 break Decode 1194 } 1195 inst.Args[narg] = baseReg[x] + Reg(rm&7) 1196 narg++ 1197 1198 case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag 1199 if haveMem { 1200 inst.Op = 0 1201 break Decode 1202 } 1203 inst.Args[narg] = baseReg[x] + Reg(rm) 1204 narg++ 1205 1206 case xArgRel8: 1207 inst.PCRelOff = immcpos 1208 inst.PCRel = 1 1209 inst.Args[narg] = Rel(int8(immc)) 1210 narg++ 1211 1212 case xArgRel16: 1213 inst.PCRelOff = immcpos 1214 inst.PCRel = 2 1215 inst.Args[narg] = Rel(int16(immc)) 1216 narg++ 1217 1218 case xArgRel32: 1219 inst.PCRelOff = immcpos 1220 inst.PCRel = 4 1221 inst.Args[narg] = Rel(int32(immc)) 1222 narg++ 1223 } 1224 } 1225 1226 if inst.Op == 0 { 1227 // Invalid instruction. 1228 if nprefix > 0 { 1229 return instPrefix(src[0], mode) // invalid instruction 1230 } 1231 1232 return Inst{Len: pos}, ErrUnrecognized 1233 } 1234 1235 // Matched! Hooray! 1236 1237 // 90 decodes as XCHG EAX, EAX but is NOP. 1238 // 66 90 decodes as XCHG AX, AX and is NOP too. 1239 // 48 90 decodes as XCHG RAX, RAX and is NOP too. 1240 // 43 90 decodes as XCHG R8D, EAX and is *not* NOP. 1241 // F3 90 decodes as REP XCHG EAX, EAX but is PAUSE. 1242 // It's all too special to handle in the decoding tables, at least for now. 1243 if inst.Op == XCHG && inst.Opcode>>24 == 0x90 { 1244 if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX { 1245 inst.Op = NOP 1246 if dataSizeIndex >= 0 { 1247 inst.Prefix[dataSizeIndex] &^= PrefixImplicit 1248 } 1249 1250 inst.Args[0] = nil 1251 inst.Args[1] = nil 1252 } 1253 1254 if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 { 1255 inst.Prefix[repIndex] |= PrefixImplicit 1256 inst.Op = PAUSE 1257 inst.Args[0] = nil 1258 inst.Args[1] = nil 1259 } else if gnuCompat { 1260 for i := nprefix - 1; i >= 0; i-- { 1261 if inst.Prefix[i]&0xFF == 0xF3 { 1262 inst.Prefix[i] |= PrefixImplicit 1263 inst.Op = PAUSE 1264 inst.Args[0] = nil 1265 inst.Args[1] = nil 1266 break 1267 } 1268 } 1269 } 1270 } 1271 1272 // defaultSeg returns the default segment for an implicit 1273 // memory reference: the final override if present, or else DS. 1274 defaultSeg := func() Reg { 1275 if segIndex >= 0 { 1276 inst.Prefix[segIndex] |= PrefixImplicit 1277 return prefixToSegment(inst.Prefix[segIndex]) 1278 } 1279 1280 return DS 1281 } 1282 1283 // Add implicit arguments not present in the tables. 1284 // Normally we shy away from making implicit arguments explicit, 1285 // following the Intel manuals, but adding the arguments seems 1286 // the best way to express the effect of the segment override prefixes. 1287 // TODO(rsc): Perhaps add these to the tables and 1288 // create bytecode instructions for them. 1289 usedAddrSize := false 1290 1291 switch inst.Op { 1292 case INSB, INSW, INSD: 1293 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1294 inst.Args[1] = DX 1295 usedAddrSize = true 1296 1297 case OUTSB, OUTSW, OUTSD: 1298 inst.Args[0] = DX 1299 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1300 usedAddrSize = true 1301 1302 case MOVSB, MOVSW, MOVSD, MOVSQ: 1303 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1304 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1305 usedAddrSize = true 1306 1307 case CMPSB, CMPSW, CMPSD, CMPSQ: 1308 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1309 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1310 usedAddrSize = true 1311 1312 case LODSB, LODSW, LODSD, LODSQ: 1313 switch inst.Op { 1314 case LODSB: 1315 inst.Args[0] = AL 1316 case LODSW: 1317 inst.Args[0] = AX 1318 case LODSD: 1319 inst.Args[0] = EAX 1320 case LODSQ: 1321 inst.Args[0] = RAX 1322 } 1323 1324 inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX} 1325 usedAddrSize = true 1326 1327 case STOSB, STOSW, STOSD, STOSQ: 1328 inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1329 switch inst.Op { 1330 case STOSB: 1331 inst.Args[1] = AL 1332 case STOSW: 1333 inst.Args[1] = AX 1334 case STOSD: 1335 inst.Args[1] = EAX 1336 case STOSQ: 1337 inst.Args[1] = RAX 1338 } 1339 1340 usedAddrSize = true 1341 1342 case SCASB, SCASW, SCASD, SCASQ: 1343 inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX} 1344 switch inst.Op { 1345 case SCASB: 1346 inst.Args[0] = AL 1347 case SCASW: 1348 inst.Args[0] = AX 1349 case SCASD: 1350 inst.Args[0] = EAX 1351 case SCASQ: 1352 inst.Args[0] = RAX 1353 } 1354 1355 usedAddrSize = true 1356 1357 case XLATB: 1358 inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX} 1359 usedAddrSize = true 1360 } 1361 1362 // If we used the address size annotation to construct the 1363 // argument list, mark that prefix as implicit: it doesn't need 1364 // to be shown when printing the instruction. 1365 if haveMem || usedAddrSize { 1366 if addrSizeIndex >= 0 { 1367 inst.Prefix[addrSizeIndex] |= PrefixImplicit 1368 } 1369 } 1370 1371 // Similarly, if there's some memory operand, the segment 1372 // will be shown there and doesn't need to be shown as an 1373 // explicit prefix. 1374 if haveMem { 1375 if segIndex >= 0 { 1376 inst.Prefix[segIndex] |= PrefixImplicit 1377 } 1378 } 1379 1380 // Branch predict prefixes are overloaded segment prefixes, 1381 // since segment prefixes don't make sense on conditional jumps. 1382 // Rewrite final instance to prediction prefix. 1383 // The set of instructions to which the prefixes apply (other then the 1384 // Jcc conditional jumps) is not 100% clear from the manuals, but 1385 // the disassemblers seem to agree about the LOOP and JCXZ instructions, 1386 // so we'll follow along. 1387 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1388 if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ { 1389 PredictLoop: 1390 for i := nprefix - 1; i >= 0; i-- { 1391 p := inst.Prefix[i] 1392 switch p & 0xFF { 1393 case PrefixCS: 1394 inst.Prefix[i] = PrefixPN 1395 break PredictLoop 1396 case PrefixDS: 1397 inst.Prefix[i] = PrefixPT 1398 break PredictLoop 1399 } 1400 } 1401 } 1402 1403 // The BND prefix is part of the Intel Memory Protection Extensions (MPX). 1404 // A REPN applied to certain control transfers is a BND prefix to bound 1405 // the range of possible destinations. There's surprisingly little documentation 1406 // about this, so we just do what libopcodes and xed agree on. 1407 // In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions 1408 // does not turn into a BND. 1409 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1410 if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET { 1411 for i := nprefix - 1; i >= 0; i-- { 1412 p := inst.Prefix[i] 1413 if p&^PrefixIgnored == PrefixREPN { 1414 inst.Prefix[i] = PrefixBND 1415 break 1416 } 1417 } 1418 } 1419 1420 // The LOCK prefix only applies to certain instructions, and then only 1421 // to instances of the instruction with a memory destination. 1422 // Other uses of LOCK are invalid and cause a processor exception, 1423 // in contrast to the "just ignore it" spirit applied to all other prefixes. 1424 // Mark invalid lock prefixes. 1425 hasLock := false 1426 1427 if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 { 1428 switch inst.Op { 1429 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1430 case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG: 1431 if isMem(inst.Args[0]) { 1432 hasLock = true 1433 break 1434 } 1435 1436 fallthrough 1437 default: 1438 inst.Prefix[lockIndex] |= PrefixInvalid 1439 } 1440 } 1441 1442 // In certain cases, all of which require a memory destination, 1443 // the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE 1444 // from the Intel Transactional Synchroniation Extensions (TSX). 1445 // 1446 // The specific rules are: 1447 // (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE. 1448 // (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE. 1449 // (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE. 1450 if isMem(inst.Args[0]) { 1451 if inst.Op == XCHG { 1452 hasLock = true 1453 } 1454 1455 for i := len(inst.Prefix) - 1; i >= 0; i-- { 1456 p := inst.Prefix[i] &^ PrefixIgnored 1457 switch p { 1458 case PrefixREPN: 1459 if hasLock { 1460 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE 1461 } 1462 1463 case PrefixREP: 1464 if hasLock { 1465 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1466 } 1467 1468 if inst.Op == MOV { 1469 op := (inst.Opcode >> 24) &^ 1 1470 if op == 0x88 || op == 0xC6 { 1471 inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE 1472 } 1473 } 1474 } 1475 } 1476 } 1477 1478 // If REP is used on a non-REP-able instruction, mark the prefix as ignored. 1479 if repIndex >= 0 { 1480 switch inst.Prefix[repIndex] { 1481 case PrefixREP, PrefixREPN: 1482 switch inst.Op { 1483 // According to the manuals, the REP/REPE prefix applies to all of these, 1484 // while the REPN applies only to some of them. However, both libopcodes 1485 // and xed show both prefixes explicitly for all instructions, so we do the same. 1486 // TODO(rsc): Perhaps this instruction class should be derived from the CSV. 1487 case INSB, INSW, INSD, 1488 MOVSB, MOVSW, MOVSD, MOVSQ, 1489 OUTSB, OUTSW, OUTSD, 1490 LODSB, LODSW, LODSD, LODSQ, 1491 CMPSB, CMPSW, CMPSD, CMPSQ, 1492 SCASB, SCASW, SCASD, SCASQ, 1493 STOSB, STOSW, STOSD, STOSQ: 1494 // ok 1495 default: 1496 inst.Prefix[repIndex] |= PrefixIgnored 1497 } 1498 } 1499 } 1500 1501 // If REX was present, mark implicit if all the 1 bits were consumed. 1502 if rexIndex >= 0 { 1503 if rexUsed != 0 { 1504 rexUsed |= PrefixREX 1505 } 1506 1507 if rex&^rexUsed == 0 { 1508 inst.Prefix[rexIndex] |= PrefixImplicit 1509 } 1510 } 1511 1512 inst.DataSize = dataMode 1513 inst.AddrSize = addrMode 1514 inst.Mode = mode 1515 inst.Len = pos 1516 1517 return inst, nil 1518 } 1519 1520 var errInternal = errors.New("internal error") 1521 1522 // addr16 records the eight 16-bit addressing modes. 1523 var addr16 = [8]Mem{ 1524 {Base: BX, Scale: 1, Index: SI}, 1525 {Base: BX, Scale: 1, Index: DI}, 1526 {Base: BP, Scale: 1, Index: SI}, 1527 {Base: BP, Scale: 1, Index: DI}, 1528 {Base: SI}, 1529 {Base: DI}, 1530 {Base: BP}, 1531 {Base: BX}, 1532 } 1533 1534 // baseReg returns the base register for a given register size in bits. 1535 func baseRegForBits(bits int) Reg { 1536 switch bits { 1537 case 8: 1538 return AL 1539 case 16: 1540 return AX 1541 case 32: 1542 return EAX 1543 case 64: 1544 return RAX 1545 } 1546 1547 return 0 1548 } 1549 1550 // baseReg records the base register for argument types that specify 1551 // a range of registers indexed by op, regop, or rm. 1552 var baseReg = [...]Reg{ 1553 xArgDR0dashDR7: DR0, 1554 xArgMm1: M0, 1555 xArgMm2: M0, 1556 xArgMm2M64: M0, 1557 xArgMm: M0, 1558 xArgMmM32: M0, 1559 xArgMmM64: M0, 1560 xArgR16: AX, 1561 xArgR16op: AX, 1562 xArgR32: EAX, 1563 xArgR32M16: EAX, 1564 xArgR32M8: EAX, 1565 xArgR32op: EAX, 1566 xArgR64: RAX, 1567 xArgR64M16: RAX, 1568 xArgR64op: RAX, 1569 xArgR8: AL, 1570 xArgR8op: AL, 1571 xArgRM16: AX, 1572 xArgRM32: EAX, 1573 xArgRM64: RAX, 1574 xArgRM8: AL, 1575 xArgRmf16: AX, 1576 xArgRmf32: EAX, 1577 xArgRmf64: RAX, 1578 xArgSTi: F0, 1579 xArgTR0dashTR7: TR0, 1580 xArgXmm1: X0, 1581 xArgYmm1: X0, 1582 xArgXmm2: X0, 1583 xArgXmm2M128: X0, 1584 xArgYmm2M256: X0, 1585 xArgXmm2M16: X0, 1586 xArgXmm2M32: X0, 1587 xArgXmm2M64: X0, 1588 xArgXmm: X0, 1589 xArgXmmM128: X0, 1590 xArgXmmM32: X0, 1591 xArgXmmM64: X0, 1592 } 1593 1594 // prefixToSegment returns the segment register 1595 // corresponding to a particular segment prefix. 1596 func prefixToSegment(p Prefix) Reg { 1597 switch p &^ PrefixImplicit { 1598 case PrefixCS: 1599 return CS 1600 case PrefixDS: 1601 return DS 1602 case PrefixES: 1603 return ES 1604 case PrefixFS: 1605 return FS 1606 case PrefixGS: 1607 return GS 1608 case PrefixSS: 1609 return SS 1610 } 1611 1612 return 0 1613 } 1614 1615 // fixedArg records the fixed arguments corresponding to the given bytecodes. 1616 var fixedArg = [...]Arg{ 1617 xArg1: Imm(1), 1618 xArg3: Imm(3), 1619 xArgAL: AL, 1620 xArgAX: AX, 1621 xArgDX: DX, 1622 xArgEAX: EAX, 1623 xArgEDX: EDX, 1624 xArgRAX: RAX, 1625 xArgRDX: RDX, 1626 xArgCL: CL, 1627 xArgCS: CS, 1628 xArgDS: DS, 1629 xArgES: ES, 1630 xArgFS: FS, 1631 xArgGS: GS, 1632 xArgSS: SS, 1633 xArgST: F0, 1634 xArgXMM0: X0, 1635 } 1636 1637 // memBytes records the size of the memory pointed at 1638 // by a memory argument of the given form. 1639 var memBytes = [...]int8{ 1640 xArgM128: 128 / 8, 1641 xArgM256: 256 / 8, 1642 xArgM16: 16 / 8, 1643 xArgM16and16: (16 + 16) / 8, 1644 xArgM16colon16: (16 + 16) / 8, 1645 xArgM16colon32: (16 + 32) / 8, 1646 xArgM16int: 16 / 8, 1647 xArgM2byte: 2, 1648 xArgM32: 32 / 8, 1649 xArgM32and32: (32 + 32) / 8, 1650 xArgM32fp: 32 / 8, 1651 xArgM32int: 32 / 8, 1652 xArgM64: 64 / 8, 1653 xArgM64fp: 64 / 8, 1654 xArgM64int: 64 / 8, 1655 xArgMm2M64: 64 / 8, 1656 xArgMmM32: 32 / 8, 1657 xArgMmM64: 64 / 8, 1658 xArgMoffs16: 16 / 8, 1659 xArgMoffs32: 32 / 8, 1660 xArgMoffs64: 64 / 8, 1661 xArgMoffs8: 1, // 8 / 8, 1662 xArgR32M16: 16 / 8, 1663 xArgR32M8: 1, //8 / 8, 1664 xArgR64M16: 16 / 8, 1665 xArgRM16: 16 / 8, 1666 xArgRM32: 32 / 8, 1667 xArgRM64: 64 / 8, 1668 xArgRM8: 1, //8 / 8, 1669 xArgXmm2M128: 128 / 8, 1670 xArgYmm2M256: 256 / 8, 1671 xArgXmm2M16: 16 / 8, 1672 xArgXmm2M32: 32 / 8, 1673 xArgXmm2M64: 64 / 8, 1674 xArgXmm: 128 / 8, 1675 xArgXmmM128: 128 / 8, 1676 xArgXmmM32: 32 / 8, 1677 xArgXmmM64: 64 / 8, 1678 } 1679 1680 // isCondJmp records the conditional jumps. 1681 var isCondJmp = [maxOp + 1]bool{ 1682 JA: true, 1683 JAE: true, 1684 JB: true, 1685 JBE: true, 1686 JE: true, 1687 JG: true, 1688 JGE: true, 1689 JL: true, 1690 JLE: true, 1691 JNE: true, 1692 JNO: true, 1693 JNP: true, 1694 JNS: true, 1695 JO: true, 1696 JP: true, 1697 JS: true, 1698 } 1699 1700 // isLoop records the loop operators. 1701 var isLoop = [maxOp + 1]bool{ 1702 LOOP: true, 1703 LOOPE: true, 1704 LOOPNE: true, 1705 JECXZ: true, 1706 JRCXZ: true, 1707 }