github.com/bir3/gocompiler@v0.9.2202/src/xvendor/golang.org/x/arch/x86/x86asm/decode.go

github.com/bir3/gocompiler@v0.9.2202/src/xvendor/golang.org/x/arch/x86/x86asm/decode.go (about)

     1  // Copyright 2014 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Table-driven decoding of x86 instructions.
     6  
     7  package x86asm
     8  
     9  import (
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"runtime"
    14  )
    15  
    16  // Set trace to true to cause the decoder to print the PC sequence
    17  // of the executed instruction codes. This is typically only useful
    18  // when you are running a test of a single input case.
    19  const trace = false
    20  
    21  // A decodeOp is a single instruction in the decoder bytecode program.
    22  //
    23  // The decodeOps correspond to consuming and conditionally branching
    24  // on input bytes, consuming additional fields, and then interpreting
    25  // consumed data as instruction arguments. The names of the xRead and xArg
    26  // operations are taken from the Intel manual conventions, for example
    27  // Volume 2, Section 3.1.1, page 487 of
    28  // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
    29  //
    30  // The actual decoding program is generated by ../x86map.
    31  //
    32  // TODO(rsc): We may be able to merge various of the memory operands
    33  // since we don't care about, say, the distinction between m80dec and m80bcd.
    34  // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
    35  
    36  type decodeOp uint16
    37  
    38  const (
    39  	xFail  decodeOp = iota // invalid instruction (return)
    40  	xMatch                 // completed match
    41  	xJump                  // jump to pc
    42  
    43  	xCondByte     // switch on instruction byte value
    44  	xCondSlashR   // read and switch on instruction /r value
    45  	xCondPrefix   // switch on presence of instruction prefix
    46  	xCondIs64     // switch on 64-bit processor mode
    47  	xCondDataSize // switch on operand size
    48  	xCondAddrSize // switch on address size
    49  	xCondIsMem    // switch on memory vs register argument
    50  
    51  	xSetOp // set instruction opcode
    52  
    53  	xReadSlashR // read /r
    54  	xReadIb     // read ib
    55  	xReadIw     // read iw
    56  	xReadId     // read id
    57  	xReadIo     // read io
    58  	xReadCb     // read cb
    59  	xReadCw     // read cw
    60  	xReadCd     // read cd
    61  	xReadCp     // read cp
    62  	xReadCm     // read cm
    63  
    64  	xArg1            // arg 1
    65  	xArg3            // arg 3
    66  	xArgAL           // arg AL
    67  	xArgAX           // arg AX
    68  	xArgCL           // arg CL
    69  	xArgCR0dashCR7   // arg CR0-CR7
    70  	xArgCS           // arg CS
    71  	xArgDR0dashDR7   // arg DR0-DR7
    72  	xArgDS           // arg DS
    73  	xArgDX           // arg DX
    74  	xArgEAX          // arg EAX
    75  	xArgEDX          // arg EDX
    76  	xArgES           // arg ES
    77  	xArgFS           // arg FS
    78  	xArgGS           // arg GS
    79  	xArgImm16        // arg imm16
    80  	xArgImm32        // arg imm32
    81  	xArgImm64        // arg imm64
    82  	xArgImm8         // arg imm8
    83  	xArgImm8u        // arg imm8 but record as unsigned
    84  	xArgImm16u       // arg imm8 but record as unsigned
    85  	xArgM            // arg m
    86  	xArgM128         // arg m128
    87  	xArgM256         // arg m256
    88  	xArgM1428byte    // arg m14/28byte
    89  	xArgM16          // arg m16
    90  	xArgM16and16     // arg m16&16
    91  	xArgM16and32     // arg m16&32
    92  	xArgM16and64     // arg m16&64
    93  	xArgM16colon16   // arg m16:16
    94  	xArgM16colon32   // arg m16:32
    95  	xArgM16colon64   // arg m16:64
    96  	xArgM16int       // arg m16int
    97  	xArgM2byte       // arg m2byte
    98  	xArgM32          // arg m32
    99  	xArgM32and32     // arg m32&32
   100  	xArgM32fp        // arg m32fp
   101  	xArgM32int       // arg m32int
   102  	xArgM512byte     // arg m512byte
   103  	xArgM64          // arg m64
   104  	xArgM64fp        // arg m64fp
   105  	xArgM64int       // arg m64int
   106  	xArgM8           // arg m8
   107  	xArgM80bcd       // arg m80bcd
   108  	xArgM80dec       // arg m80dec
   109  	xArgM80fp        // arg m80fp
   110  	xArgM94108byte   // arg m94/108byte
   111  	xArgMm           // arg mm
   112  	xArgMm1          // arg mm1
   113  	xArgMm2          // arg mm2
   114  	xArgMm2M64       // arg mm2/m64
   115  	xArgMmM32        // arg mm/m32
   116  	xArgMmM64        // arg mm/m64
   117  	xArgMem          // arg mem
   118  	xArgMoffs16      // arg moffs16
   119  	xArgMoffs32      // arg moffs32
   120  	xArgMoffs64      // arg moffs64
   121  	xArgMoffs8       // arg moffs8
   122  	xArgPtr16colon16 // arg ptr16:16
   123  	xArgPtr16colon32 // arg ptr16:32
   124  	xArgR16          // arg r16
   125  	xArgR16op        // arg r16 with +rw in opcode
   126  	xArgR32          // arg r32
   127  	xArgR32M16       // arg r32/m16
   128  	xArgR32M8        // arg r32/m8
   129  	xArgR32op        // arg r32 with +rd in opcode
   130  	xArgR64          // arg r64
   131  	xArgR64M16       // arg r64/m16
   132  	xArgR64op        // arg r64 with +rd in opcode
   133  	xArgR8           // arg r8
   134  	xArgR8op         // arg r8 with +rb in opcode
   135  	xArgRAX          // arg RAX
   136  	xArgRDX          // arg RDX
   137  	xArgRM           // arg r/m
   138  	xArgRM16         // arg r/m16
   139  	xArgRM32         // arg r/m32
   140  	xArgRM64         // arg r/m64
   141  	xArgRM8          // arg r/m8
   142  	xArgReg          // arg reg
   143  	xArgRegM16       // arg reg/m16
   144  	xArgRegM32       // arg reg/m32
   145  	xArgRegM8        // arg reg/m8
   146  	xArgRel16        // arg rel16
   147  	xArgRel32        // arg rel32
   148  	xArgRel8         // arg rel8
   149  	xArgSS           // arg SS
   150  	xArgST           // arg ST, aka ST(0)
   151  	xArgSTi          // arg ST(i) with +i in opcode
   152  	xArgSreg         // arg Sreg
   153  	xArgTR0dashTR7   // arg TR0-TR7
   154  	xArgXmm          // arg xmm
   155  	xArgXMM0         // arg <XMM0>
   156  	xArgXmm1         // arg xmm1
   157  	xArgXmm2         // arg xmm2
   158  	xArgXmm2M128     // arg xmm2/m128
   159  	xArgYmm2M256     // arg ymm2/m256
   160  	xArgXmm2M16      // arg xmm2/m16
   161  	xArgXmm2M32      // arg xmm2/m32
   162  	xArgXmm2M64      // arg xmm2/m64
   163  	xArgXmmM128      // arg xmm/m128
   164  	xArgXmmM32       // arg xmm/m32
   165  	xArgXmmM64       // arg xmm/m64
   166  	xArgYmm1         // arg ymm1
   167  	xArgRmf16        // arg r/m16 but force mod=3
   168  	xArgRmf32        // arg r/m32 but force mod=3
   169  	xArgRmf64        // arg r/m64 but force mod=3
   170  )
   171  
   172  // instPrefix returns an Inst describing just one prefix byte.
   173  // It is only used if there is a prefix followed by an unintelligible
   174  // or invalid instruction byte sequence.
   175  func instPrefix(b byte, mode int) (Inst, error) {
   176  	// When tracing it is useful to see what called instPrefix to report an error.
   177  	if trace {
   178  		_, file, line, _ := runtime.Caller(1)
   179  		fmt.Printf("%s:%d\n", file, line)
   180  	}
   181  	p := Prefix(b)
   182  	switch p {
   183  	case PrefixDataSize:
   184  		if mode == 16 {
   185  			p = PrefixData32
   186  		} else {
   187  			p = PrefixData16
   188  		}
   189  	case PrefixAddrSize:
   190  		if mode == 32 {
   191  			p = PrefixAddr16
   192  		} else {
   193  			p = PrefixAddr32
   194  		}
   195  	}
   196  	// Note: using composite literal with Prefix key confuses 'bundle' tool.
   197  	inst := Inst{Len: 1}
   198  	inst.Prefix = Prefixes{p}
   199  	return inst, nil
   200  }
   201  
   202  // truncated reports a truncated instruction.
   203  // For now we use instPrefix but perhaps later we will return
   204  // a specific error here.
   205  func truncated(src []byte, mode int) (Inst, error) {
   206  	if len(src) == 0 {
   207  		return Inst{}, ErrTruncated
   208  	}
   209  	return instPrefix(src[0], mode) // too long
   210  }
   211  
   212  // These are the errors returned by Decode.
   213  var (
   214  	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
   215  	ErrTruncated    = errors.New("truncated instruction")
   216  	ErrUnrecognized = errors.New("unrecognized instruction")
   217  )
   218  
   219  // decoderCover records coverage information for which parts
   220  // of the byte code have been executed.
   221  var decoderCover []bool
   222  
   223  // Decode decodes the leading bytes in src as a single instruction.
   224  // The mode arguments specifies the assumed processor mode:
   225  // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
   226  func Decode(src []byte, mode int) (inst Inst, err error) {
   227  	return decode1(src, mode, false)
   228  }
   229  
   230  // decode1 is the implementation of Decode but takes an extra
   231  // gnuCompat flag to cause it to change its behavior to mimic
   232  // bugs (or at least unique features) of GNU libopcodes as used
   233  // by objdump. We don't believe that logic is the right thing to do
   234  // in general, but when testing against libopcodes it simplifies the
   235  // comparison if we adjust a few small pieces of logic.
   236  // The affected logic is in the conditional branch for "mandatory" prefixes,
   237  // case xCondPrefix.
   238  func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
   239  	switch mode {
   240  	case 16, 32, 64:
   241  		// ok
   242  		// TODO(rsc): 64-bit mode not tested, probably not working.
   243  	default:
   244  		return Inst{}, ErrInvalidMode
   245  	}
   246  
   247  	// Maximum instruction size is 15 bytes.
   248  	// If we need to read more, return 'truncated instruction.
   249  	if len(src) > 15 {
   250  		src = src[:15]
   251  	}
   252  
   253  	var (
   254  		// prefix decoding information
   255  		pos           = 0    // position reading src
   256  		nprefix       = 0    // number of prefixes
   257  		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
   258  		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
   259  		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
   260  		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
   261  		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
   262  		rex           Prefix // rex byte if present (or 0)
   263  		rexUsed       Prefix // bits used in rex byte
   264  		rexIndex      = -1   // index of rex byte
   265  		vex           Prefix // use vex encoding
   266  		vexIndex      = -1   // index of vex prefix
   267  
   268  		addrMode = mode // address mode (width in bits)
   269  		dataMode = mode // operand mode (width in bits)
   270  
   271  		// decoded ModR/M fields
   272  		haveModrm bool
   273  		modrm     int
   274  		mod       int
   275  		regop     int
   276  		rm        int
   277  
   278  		// if ModR/M is memory reference, Mem form
   279  		mem     Mem
   280  		haveMem bool
   281  
   282  		// decoded SIB fields
   283  		haveSIB bool
   284  		sib     int
   285  		scale   int
   286  		index   int
   287  		base    int
   288  		displen int
   289  		dispoff int
   290  
   291  		// decoded immediate values
   292  		imm     int64
   293  		imm8    int8
   294  		immc    int64
   295  		immcpos int
   296  
   297  		// output
   298  		opshift int
   299  		inst    Inst
   300  		narg    int // number of arguments written to inst
   301  	)
   302  
   303  	if mode == 64 {
   304  		dataMode = 32
   305  	}
   306  
   307  	// Prefixes are certainly the most complex and underspecified part of
   308  	// decoding x86 instructions. Although the manuals say things like
   309  	// up to four prefixes, one from each group, nearly everyone seems to
   310  	// agree that in practice as many prefixes as possible, including multiple
   311  	// from a particular group or repetitions of a given prefix, can be used on
   312  	// an instruction, provided the total instruction length including prefixes
   313  	// does not exceed the agreed-upon maximum of 15 bytes.
   314  	// Everyone also agrees that if one of these prefixes is the LOCK prefix
   315  	// and the instruction is not one of the instructions that can be used with
   316  	// the LOCK prefix or if the destination is not a memory operand,
   317  	// then the instruction is invalid and produces the #UD exception.
   318  	// However, that is the end of any semblance of agreement.
   319  	//
   320  	// What happens if prefixes are given that conflict with other prefixes?
   321  	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
   322  	// conflict with each other: only one segment can be in effect.
   323  	// Disassemblers seem to agree that later prefixes take priority over
   324  	// earlier ones. I have not taken the time to write assembly programs
   325  	// to check to see if the hardware agrees.
   326  	//
   327  	// What happens if prefixes are given that have no meaning for the
   328  	// specific instruction to which they are attached? It depends.
   329  	// If they really have no meaning, they are ignored. However, a future
   330  	// processor may assign a different meaning. As a disassembler, we
   331  	// don't really know whether we're seeing a meaningless prefix or one
   332  	// whose meaning we simply haven't been told yet.
   333  	//
   334  	// Combining the two questions, what happens when conflicting
   335  	// extension prefixes are given? No one seems to know for sure.
   336  	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
   337  	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
   338  	// Which prefix wins? See the xCondPrefix prefix for more.
   339  	//
   340  	// Writing assembly test cases to divine which interpretation the
   341  	// CPU uses might clarify the situation, but more likely it would
   342  	// make the situation even less clear.
   343  
   344  	// Read non-REX prefixes.
   345  ReadPrefixes:
   346  	for ; pos < len(src); pos++ {
   347  		p := Prefix(src[pos])
   348  		switch p {
   349  		default:
   350  			nprefix = pos
   351  			break ReadPrefixes
   352  
   353  		// Group 1 - lock and repeat prefixes
   354  		// According to Intel, there should only be one from this set,
   355  		// but according to AMD both can be present.
   356  		case 0xF0:
   357  			if lockIndex >= 0 {
   358  				inst.Prefix[lockIndex] |= PrefixIgnored
   359  			}
   360  			lockIndex = pos
   361  		case 0xF2, 0xF3:
   362  			if repIndex >= 0 {
   363  				inst.Prefix[repIndex] |= PrefixIgnored
   364  			}
   365  			repIndex = pos
   366  
   367  		// Group 2 - segment override / branch hints
   368  		case 0x26, 0x2E, 0x36, 0x3E:
   369  			if mode == 64 {
   370  				p |= PrefixIgnored
   371  				break
   372  			}
   373  			fallthrough
   374  		case 0x64, 0x65:
   375  			if segIndex >= 0 {
   376  				inst.Prefix[segIndex] |= PrefixIgnored
   377  			}
   378  			segIndex = pos
   379  
   380  		// Group 3 - operand size override
   381  		case 0x66:
   382  			if mode == 16 {
   383  				dataMode = 32
   384  				p = PrefixData32
   385  			} else {
   386  				dataMode = 16
   387  				p = PrefixData16
   388  			}
   389  			if dataSizeIndex >= 0 {
   390  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   391  			}
   392  			dataSizeIndex = pos
   393  
   394  		// Group 4 - address size override
   395  		case 0x67:
   396  			if mode == 32 {
   397  				addrMode = 16
   398  				p = PrefixAddr16
   399  			} else {
   400  				addrMode = 32
   401  				p = PrefixAddr32
   402  			}
   403  			if addrSizeIndex >= 0 {
   404  				inst.Prefix[addrSizeIndex] |= PrefixIgnored
   405  			}
   406  			addrSizeIndex = pos
   407  
   408  		//Group 5 - Vex encoding
   409  		case 0xC5:
   410  			if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   411  				vex = p
   412  				vexIndex = pos
   413  				inst.Prefix[pos] = p
   414  				inst.Prefix[pos+1] = Prefix(src[pos+1])
   415  				pos += 1
   416  				continue
   417  			} else {
   418  				nprefix = pos
   419  				break ReadPrefixes
   420  			}
   421  		case 0xC4:
   422  			if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   423  				vex = p
   424  				vexIndex = pos
   425  				inst.Prefix[pos] = p
   426  				inst.Prefix[pos+1] = Prefix(src[pos+1])
   427  				inst.Prefix[pos+2] = Prefix(src[pos+2])
   428  				pos += 2
   429  				continue
   430  			} else {
   431  				nprefix = pos
   432  				break ReadPrefixes
   433  			}
   434  		}
   435  
   436  		if pos >= len(inst.Prefix) {
   437  			return instPrefix(src[0], mode) // too long
   438  		}
   439  
   440  		inst.Prefix[pos] = p
   441  	}
   442  
   443  	// Read REX prefix.
   444  	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
   445  		rex = Prefix(src[pos])
   446  		rexIndex = pos
   447  		if pos >= len(inst.Prefix) {
   448  			return instPrefix(src[0], mode) // too long
   449  		}
   450  		inst.Prefix[pos] = rex
   451  		pos++
   452  		if rex&PrefixREXW != 0 {
   453  			dataMode = 64
   454  			if dataSizeIndex >= 0 {
   455  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   456  			}
   457  		}
   458  	}
   459  
   460  	// Decode instruction stream, interpreting decoding instructions.
   461  	// opshift gives the shift to use when saving the next
   462  	// opcode byte into inst.Opcode.
   463  	opshift = 24
   464  
   465  	// Decode loop, executing decoder program.
   466  	var oldPC, prevPC int
   467  Decode:
   468  	for pc := 1; ; { // TODO uint
   469  		oldPC = prevPC
   470  		prevPC = pc
   471  		if trace {
   472  			println("run", pc)
   473  		}
   474  		x := decoder[pc]
   475  		if decoderCover != nil {
   476  			decoderCover[pc] = true
   477  		}
   478  		pc++
   479  
   480  		// Read and decode ModR/M if needed by opcode.
   481  		switch decodeOp(x) {
   482  		case xCondSlashR, xReadSlashR:
   483  			if haveModrm {
   484  				return Inst{Len: pos}, errInternal
   485  			}
   486  			haveModrm = true
   487  			if pos >= len(src) {
   488  				return truncated(src, mode)
   489  			}
   490  			modrm = int(src[pos])
   491  			pos++
   492  			if opshift >= 0 {
   493  				inst.Opcode |= uint32(modrm) << uint(opshift)
   494  				opshift -= 8
   495  			}
   496  			mod = modrm >> 6
   497  			regop = (modrm >> 3) & 07
   498  			rm = modrm & 07
   499  			if rex&PrefixREXR != 0 {
   500  				rexUsed |= PrefixREXR
   501  				regop |= 8
   502  			}
   503  			if addrMode == 16 {
   504  				// 16-bit modrm form
   505  				if mod != 3 {
   506  					haveMem = true
   507  					mem = addr16[rm]
   508  					if rm == 6 && mod == 0 {
   509  						mem.Base = 0
   510  					}
   511  
   512  					// Consume disp16 if present.
   513  					if mod == 0 && rm == 6 || mod == 2 {
   514  						if pos+2 > len(src) {
   515  							return truncated(src, mode)
   516  						}
   517  						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
   518  						pos += 2
   519  					}
   520  
   521  					// Consume disp8 if present.
   522  					if mod == 1 {
   523  						if pos >= len(src) {
   524  							return truncated(src, mode)
   525  						}
   526  						mem.Disp = int64(int8(src[pos]))
   527  						pos++
   528  					}
   529  				}
   530  			} else {
   531  				haveMem = mod != 3
   532  
   533  				// 32-bit or 64-bit form
   534  				// Consume SIB encoding if present.
   535  				if rm == 4 && mod != 3 {
   536  					haveSIB = true
   537  					if pos >= len(src) {
   538  						return truncated(src, mode)
   539  					}
   540  					sib = int(src[pos])
   541  					pos++
   542  					if opshift >= 0 {
   543  						inst.Opcode |= uint32(sib) << uint(opshift)
   544  						opshift -= 8
   545  					}
   546  					scale = sib >> 6
   547  					index = (sib >> 3) & 07
   548  					base = sib & 07
   549  					if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
   550  						rexUsed |= PrefixREXB
   551  						base |= 8
   552  					}
   553  					if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
   554  						rexUsed |= PrefixREXX
   555  						index |= 8
   556  					}
   557  
   558  					mem.Scale = 1 << uint(scale)
   559  					if index == 4 {
   560  						// no mem.Index
   561  					} else {
   562  						mem.Index = baseRegForBits(addrMode) + Reg(index)
   563  					}
   564  					if base&7 == 5 && mod == 0 {
   565  						// no mem.Base
   566  					} else {
   567  						mem.Base = baseRegForBits(addrMode) + Reg(base)
   568  					}
   569  				} else {
   570  					if rex&PrefixREXB != 0 {
   571  						rexUsed |= PrefixREXB
   572  						rm |= 8
   573  					}
   574  					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
   575  						// base omitted
   576  					} else if mod != 3 {
   577  						mem.Base = baseRegForBits(addrMode) + Reg(rm)
   578  					}
   579  				}
   580  
   581  				// Consume disp32 if present.
   582  				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
   583  					if pos+4 > len(src) {
   584  						return truncated(src, mode)
   585  					}
   586  					dispoff = pos
   587  					displen = 4
   588  					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
   589  					pos += 4
   590  				}
   591  
   592  				// Consume disp8 if present.
   593  				if mod == 1 {
   594  					if pos >= len(src) {
   595  						return truncated(src, mode)
   596  					}
   597  					dispoff = pos
   598  					displen = 1
   599  					mem.Disp = int64(int8(src[pos]))
   600  					pos++
   601  				}
   602  
   603  				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
   604  				// See Vol 2A. Table 2-7.
   605  				if mode == 64 && mod == 0 && rm&7 == 5 {
   606  					if addrMode == 32 {
   607  						mem.Base = EIP
   608  					} else {
   609  						mem.Base = RIP
   610  					}
   611  				}
   612  			}
   613  
   614  			if segIndex >= 0 {
   615  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
   616  			}
   617  		}
   618  
   619  		// Execute single opcode.
   620  		switch decodeOp(x) {
   621  		default:
   622  			println("bad op", x, "at", pc-1, "from", oldPC)
   623  			return Inst{Len: pos}, errInternal
   624  
   625  		case xFail:
   626  			inst.Op = 0
   627  			break Decode
   628  
   629  		case xMatch:
   630  			break Decode
   631  
   632  		case xJump:
   633  			pc = int(decoder[pc])
   634  
   635  		// Conditional branches.
   636  
   637  		case xCondByte:
   638  			if pos >= len(src) {
   639  				return truncated(src, mode)
   640  			}
   641  			b := src[pos]
   642  			n := int(decoder[pc])
   643  			pc++
   644  			for i := 0; i < n; i++ {
   645  				xb, xpc := decoder[pc], int(decoder[pc+1])
   646  				pc += 2
   647  				if b == byte(xb) {
   648  					pc = xpc
   649  					pos++
   650  					if opshift >= 0 {
   651  						inst.Opcode |= uint32(b) << uint(opshift)
   652  						opshift -= 8
   653  					}
   654  					continue Decode
   655  				}
   656  			}
   657  			// xCondByte is the only conditional with a fall through,
   658  			// so that it can be used to pick off special cases before
   659  			// an xCondSlash. If the fallthrough instruction is xFail,
   660  			// advance the position so that the decoded instruction
   661  			// size includes the byte we just compared against.
   662  			if decodeOp(decoder[pc]) == xJump {
   663  				pc = int(decoder[pc+1])
   664  			}
   665  			if decodeOp(decoder[pc]) == xFail {
   666  				pos++
   667  			}
   668  
   669  		case xCondIs64:
   670  			if mode == 64 {
   671  				pc = int(decoder[pc+1])
   672  			} else {
   673  				pc = int(decoder[pc])
   674  			}
   675  
   676  		case xCondIsMem:
   677  			mem := haveMem
   678  			if !haveModrm {
   679  				if pos >= len(src) {
   680  					return instPrefix(src[0], mode) // too long
   681  				}
   682  				mem = src[pos]>>6 != 3
   683  			}
   684  			if mem {
   685  				pc = int(decoder[pc+1])
   686  			} else {
   687  				pc = int(decoder[pc])
   688  			}
   689  
   690  		case xCondDataSize:
   691  			switch dataMode {
   692  			case 16:
   693  				if dataSizeIndex >= 0 {
   694  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   695  				}
   696  				pc = int(decoder[pc])
   697  			case 32:
   698  				if dataSizeIndex >= 0 {
   699  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   700  				}
   701  				pc = int(decoder[pc+1])
   702  			case 64:
   703  				rexUsed |= PrefixREXW
   704  				pc = int(decoder[pc+2])
   705  			}
   706  
   707  		case xCondAddrSize:
   708  			switch addrMode {
   709  			case 16:
   710  				if addrSizeIndex >= 0 {
   711  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   712  				}
   713  				pc = int(decoder[pc])
   714  			case 32:
   715  				if addrSizeIndex >= 0 {
   716  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   717  				}
   718  				pc = int(decoder[pc+1])
   719  			case 64:
   720  				pc = int(decoder[pc+2])
   721  			}
   722  
   723  		case xCondPrefix:
   724  			// Conditional branch based on presence or absence of prefixes.
   725  			// The conflict cases here are completely undocumented and
   726  			// differ significantly between GNU libopcodes and Intel xed.
   727  			// I have not written assembly code to divine what various CPUs
   728  			// do, but it wouldn't surprise me if they are not consistent either.
   729  			//
   730  			// The basic idea is to switch on the presence of a prefix, so that
   731  			// for example:
   732  			//
   733  			//	xCondPrefix, 4
   734  			//	0xF3, 123,
   735  			//	0xF2, 234,
   736  			//	0x66, 345,
   737  			//	0, 456
   738  			//
   739  			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
   740  			// is present, 66 if the 345 prefix is present, and 456 otherwise.
   741  			// The prefixes are given in descending order so that the 0 will be last.
   742  			//
   743  			// It is unclear what should happen if multiple conditions are
   744  			// satisfied: what if F2 and F3 are both present, or if 66 and F2
   745  			// are present, or if all three are present? The one chosen becomes
   746  			// part of the opcode and the others do not. Perhaps the answer
   747  			// depends on the specific opcodes in question.
   748  			//
   749  			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
   750  			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
   751  			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
   752  			// with the 66 being only an operand size override, and probably
   753  			// F2 66 0F 38 F1 /r should be treated the same.
   754  			// Perhaps that rule is specific to the case of CRC32, since no
   755  			// 66 0F 38 F1 instruction is defined (today) (that we know of).
   756  			// However, both libopcodes and xed seem to generalize this
   757  			// example and choose F2/F3 in preference to 66, and we
   758  			// do the same.
   759  			//
   760  			// Next, what if both F2 and F3 are present? Which wins?
   761  			// The Intel xed rule, and ours, is that the one that occurs last wins.
   762  			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
   763  			// is that F3 beats F2 unless F3 has no special meaning, in which
   764  			// case F3 can be a modified on an F2 special meaning.
   765  			//
   766  			// Concretely,
   767  			//	66 0F D6 /r is MOVQ
   768  			//	F2 0F D6 /r is MOVDQ2Q
   769  			//	F3 0F D6 /r is MOVQ2DQ.
   770  			//
   771  			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
   772  			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
   773  			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
   774  			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
   775  			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
   776  			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
   777  			//	Adding 66 anywhere in the prefix section of the
   778  			//	last two cases does not change the outcome.
   779  			//
   780  			// Finally, what if there is a variant in which 66 is a mandatory
   781  			// prefix rather than an operand size override, but we know of
   782  			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
   783  			// Does F2/F3 still take priority, so that the result is an unknown
   784  			// instruction, or does the 66 take priority, so that the extended
   785  			// 66 instruction should be interpreted as having a REP/REPN prefix?
   786  			// Intel xed does the former and GNU libopcodes does the latter.
   787  			// We side with Intel xed, unless we are trying to match libopcodes
   788  			// more closely during the comparison-based test suite.
   789  			//
   790  			// In 64-bit mode REX.W is another valid prefix to test for, but
   791  			// there is less ambiguity about that. When present, REX.W is
   792  			// always the first entry in the table.
   793  			n := int(decoder[pc])
   794  			pc++
   795  			sawF3 := false
   796  			for j := 0; j < n; j++ {
   797  				prefix := Prefix(decoder[pc+2*j])
   798  				if prefix.IsREX() {
   799  					rexUsed |= prefix
   800  					if rex&prefix == prefix {
   801  						pc = int(decoder[pc+2*j+1])
   802  						continue Decode
   803  					}
   804  					continue
   805  				}
   806  				ok := false
   807  				if prefix == 0 {
   808  					ok = true
   809  				} else if prefix.IsREX() {
   810  					rexUsed |= prefix
   811  					if rex&prefix == prefix {
   812  						ok = true
   813  					}
   814  				} else if prefix == 0xC5 || prefix == 0xC4 {
   815  					if vex == prefix {
   816  						ok = true
   817  					}
   818  				} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
   819  					prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
   820  					var vexM, vexP Prefix
   821  					if vex == 0xC5 {
   822  						vexM = 1 // 2 byte vex always implies 0F
   823  						vexP = inst.Prefix[vexIndex+1]
   824  					} else {
   825  						vexM = inst.Prefix[vexIndex+1]
   826  						vexP = inst.Prefix[vexIndex+2]
   827  					}
   828  					switch prefix {
   829  					case 0x66:
   830  						ok = vexP&3 == 1
   831  					case 0xF3:
   832  						ok = vexP&3 == 2
   833  					case 0xF2:
   834  						ok = vexP&3 == 3
   835  					case 0x0F:
   836  						ok = vexM&3 == 1
   837  					case 0x0F38:
   838  						ok = vexM&3 == 2
   839  					case 0x0F3A:
   840  						ok = vexM&3 == 3
   841  					}
   842  				} else {
   843  					if prefix == 0xF3 {
   844  						sawF3 = true
   845  					}
   846  					switch prefix {
   847  					case PrefixLOCK:
   848  						if lockIndex >= 0 {
   849  							inst.Prefix[lockIndex] |= PrefixImplicit
   850  							ok = true
   851  						}
   852  					case PrefixREP, PrefixREPN:
   853  						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
   854  							inst.Prefix[repIndex] |= PrefixImplicit
   855  							ok = true
   856  						}
   857  						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
   858  							// Check to see if earlier prefix F3 is present.
   859  							for i := repIndex - 1; i >= 0; i-- {
   860  								if inst.Prefix[i]&0xFF == prefix {
   861  									inst.Prefix[i] |= PrefixImplicit
   862  									ok = true
   863  								}
   864  							}
   865  						}
   866  						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
   867  							// Check to see if earlier prefix F2 is present.
   868  							for i := repIndex - 1; i >= 0; i-- {
   869  								if inst.Prefix[i]&0xFF == prefix {
   870  									inst.Prefix[i] |= PrefixImplicit
   871  									ok = true
   872  								}
   873  							}
   874  						}
   875  					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
   876  						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
   877  							inst.Prefix[segIndex] |= PrefixImplicit
   878  							ok = true
   879  						}
   880  					case PrefixDataSize:
   881  						// Looking for 66 mandatory prefix.
   882  						// The F2/F3 mandatory prefixes take priority when both are present.
   883  						// If we got this far in the xCondPrefix table and an F2/F3 is present,
   884  						// it means the table didn't have any entry for that prefix. But if 66 has
   885  						// special meaning, perhaps F2/F3 have special meaning that we don't know.
   886  						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
   887  						// GNU libopcodes allows the 66 to match. We do what Intel xed does
   888  						// except in gnuCompat mode.
   889  						if repIndex >= 0 && !gnuCompat {
   890  							inst.Op = 0
   891  							break Decode
   892  						}
   893  						if dataSizeIndex >= 0 {
   894  							inst.Prefix[dataSizeIndex] |= PrefixImplicit
   895  							ok = true
   896  						}
   897  					case PrefixAddrSize:
   898  						if addrSizeIndex >= 0 {
   899  							inst.Prefix[addrSizeIndex] |= PrefixImplicit
   900  							ok = true
   901  						}
   902  					}
   903  				}
   904  				if ok {
   905  					pc = int(decoder[pc+2*j+1])
   906  					continue Decode
   907  				}
   908  			}
   909  			inst.Op = 0
   910  			break Decode
   911  
   912  		case xCondSlashR:
   913  			pc = int(decoder[pc+regop&7])
   914  
   915  		// Input.
   916  
   917  		case xReadSlashR:
   918  			// done above
   919  
   920  		case xReadIb:
   921  			if pos >= len(src) {
   922  				return truncated(src, mode)
   923  			}
   924  			imm8 = int8(src[pos])
   925  			pos++
   926  
   927  		case xReadIw:
   928  			if pos+2 > len(src) {
   929  				return truncated(src, mode)
   930  			}
   931  			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
   932  			pos += 2
   933  
   934  		case xReadId:
   935  			if pos+4 > len(src) {
   936  				return truncated(src, mode)
   937  			}
   938  			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
   939  			pos += 4
   940  
   941  		case xReadIo:
   942  			if pos+8 > len(src) {
   943  				return truncated(src, mode)
   944  			}
   945  			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
   946  			pos += 8
   947  
   948  		case xReadCb:
   949  			if pos >= len(src) {
   950  				return truncated(src, mode)
   951  			}
   952  			immcpos = pos
   953  			immc = int64(src[pos])
   954  			pos++
   955  
   956  		case xReadCw:
   957  			if pos+2 > len(src) {
   958  				return truncated(src, mode)
   959  			}
   960  			immcpos = pos
   961  			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   962  			pos += 2
   963  
   964  		case xReadCm:
   965  			immcpos = pos
   966  			if addrMode == 16 {
   967  				if pos+2 > len(src) {
   968  					return truncated(src, mode)
   969  				}
   970  				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   971  				pos += 2
   972  			} else if addrMode == 32 {
   973  				if pos+4 > len(src) {
   974  					return truncated(src, mode)
   975  				}
   976  				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   977  				pos += 4
   978  			} else {
   979  				if pos+8 > len(src) {
   980  					return truncated(src, mode)
   981  				}
   982  				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
   983  				pos += 8
   984  			}
   985  		case xReadCd:
   986  			immcpos = pos
   987  			if pos+4 > len(src) {
   988  				return truncated(src, mode)
   989  			}
   990  			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   991  			pos += 4
   992  
   993  		case xReadCp:
   994  			immcpos = pos
   995  			if pos+6 > len(src) {
   996  				return truncated(src, mode)
   997  			}
   998  			w := binary.LittleEndian.Uint32(src[pos:])
   999  			w2 := binary.LittleEndian.Uint16(src[pos+4:])
  1000  			immc = int64(w2)<<32 | int64(w)
  1001  			pos += 6
  1002  
  1003  		// Output.
  1004  
  1005  		case xSetOp:
  1006  			inst.Op = Op(decoder[pc])
  1007  			pc++
  1008  
  1009  		case xArg1,
  1010  			xArg3,
  1011  			xArgAL,
  1012  			xArgAX,
  1013  			xArgCL,
  1014  			xArgCS,
  1015  			xArgDS,
  1016  			xArgDX,
  1017  			xArgEAX,
  1018  			xArgEDX,
  1019  			xArgES,
  1020  			xArgFS,
  1021  			xArgGS,
  1022  			xArgRAX,
  1023  			xArgRDX,
  1024  			xArgSS,
  1025  			xArgST,
  1026  			xArgXMM0:
  1027  			inst.Args[narg] = fixedArg[x]
  1028  			narg++
  1029  
  1030  		case xArgImm8:
  1031  			inst.Args[narg] = Imm(imm8)
  1032  			narg++
  1033  
  1034  		case xArgImm8u:
  1035  			inst.Args[narg] = Imm(uint8(imm8))
  1036  			narg++
  1037  
  1038  		case xArgImm16:
  1039  			inst.Args[narg] = Imm(int16(imm))
  1040  			narg++
  1041  
  1042  		case xArgImm16u:
  1043  			inst.Args[narg] = Imm(uint16(imm))
  1044  			narg++
  1045  
  1046  		case xArgImm32:
  1047  			inst.Args[narg] = Imm(int32(imm))
  1048  			narg++
  1049  
  1050  		case xArgImm64:
  1051  			inst.Args[narg] = Imm(imm)
  1052  			narg++
  1053  
  1054  		case xArgM,
  1055  			xArgM128,
  1056  			xArgM256,
  1057  			xArgM1428byte,
  1058  			xArgM16,
  1059  			xArgM16and16,
  1060  			xArgM16and32,
  1061  			xArgM16and64,
  1062  			xArgM16colon16,
  1063  			xArgM16colon32,
  1064  			xArgM16colon64,
  1065  			xArgM16int,
  1066  			xArgM2byte,
  1067  			xArgM32,
  1068  			xArgM32and32,
  1069  			xArgM32fp,
  1070  			xArgM32int,
  1071  			xArgM512byte,
  1072  			xArgM64,
  1073  			xArgM64fp,
  1074  			xArgM64int,
  1075  			xArgM8,
  1076  			xArgM80bcd,
  1077  			xArgM80dec,
  1078  			xArgM80fp,
  1079  			xArgM94108byte,
  1080  			xArgMem:
  1081  			if !haveMem {
  1082  				inst.Op = 0
  1083  				break Decode
  1084  			}
  1085  			inst.Args[narg] = mem
  1086  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1087  			if mem.Base == RIP {
  1088  				inst.PCRel = displen
  1089  				inst.PCRelOff = dispoff
  1090  			}
  1091  			narg++
  1092  
  1093  		case xArgPtr16colon16:
  1094  			inst.Args[narg] = Imm(immc >> 16)
  1095  			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
  1096  			narg += 2
  1097  
  1098  		case xArgPtr16colon32:
  1099  			inst.Args[narg] = Imm(immc >> 32)
  1100  			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
  1101  			narg += 2
  1102  
  1103  		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
  1104  			// TODO(rsc): Can address be 64 bits?
  1105  			mem = Mem{Disp: int64(immc)}
  1106  			if segIndex >= 0 {
  1107  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  1108  				inst.Prefix[segIndex] |= PrefixImplicit
  1109  			}
  1110  			inst.Args[narg] = mem
  1111  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1112  			if mem.Base == RIP {
  1113  				inst.PCRel = displen
  1114  				inst.PCRelOff = dispoff
  1115  			}
  1116  			narg++
  1117  
  1118  		case xArgYmm1:
  1119  			base := baseReg[x]
  1120  			index := Reg(regop)
  1121  			if inst.Prefix[vexIndex+1]&0x80 == 0 {
  1122  				index += 8
  1123  			}
  1124  			inst.Args[narg] = base + index
  1125  			narg++
  1126  
  1127  		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
  1128  			base := baseReg[x]
  1129  			index := Reg(regop)
  1130  			if rex != 0 && base == AL && index >= 4 {
  1131  				rexUsed |= PrefixREX
  1132  				index -= 4
  1133  				base = SPB
  1134  			}
  1135  			inst.Args[narg] = base + index
  1136  			narg++
  1137  
  1138  		case xArgMm, xArgMm1, xArgTR0dashTR7:
  1139  			inst.Args[narg] = baseReg[x] + Reg(regop&7)
  1140  			narg++
  1141  
  1142  		case xArgCR0dashCR7:
  1143  			// AMD documents an extension that the LOCK prefix
  1144  			// can be used in place of a REX prefix in order to access
  1145  			// CR8 from 32-bit mode. The LOCK prefix is allowed in
  1146  			// all modes, provided the corresponding CPUID bit is set.
  1147  			if lockIndex >= 0 {
  1148  				inst.Prefix[lockIndex] |= PrefixImplicit
  1149  				regop += 8
  1150  			}
  1151  			inst.Args[narg] = CR0 + Reg(regop)
  1152  			narg++
  1153  
  1154  		case xArgSreg:
  1155  			regop &= 7
  1156  			if regop >= 6 {
  1157  				inst.Op = 0
  1158  				break Decode
  1159  			}
  1160  			inst.Args[narg] = ES + Reg(regop)
  1161  			narg++
  1162  
  1163  		case xArgRmf16, xArgRmf32, xArgRmf64:
  1164  			base := baseReg[x]
  1165  			index := Reg(modrm & 07)
  1166  			if rex&PrefixREXB != 0 {
  1167  				rexUsed |= PrefixREXB
  1168  				index += 8
  1169  			}
  1170  			inst.Args[narg] = base + index
  1171  			narg++
  1172  
  1173  		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
  1174  			n := inst.Opcode >> uint(opshift+8) & 07
  1175  			base := baseReg[x]
  1176  			index := Reg(n)
  1177  			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
  1178  				rexUsed |= PrefixREXB
  1179  				index += 8
  1180  			}
  1181  			if rex != 0 && base == AL && index >= 4 {
  1182  				rexUsed |= PrefixREX
  1183  				index -= 4
  1184  				base = SPB
  1185  			}
  1186  			inst.Args[narg] = base + index
  1187  			narg++
  1188  		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
  1189  			xArgMmM32, xArgMmM64, xArgMm2M64,
  1190  			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
  1191  			xArgYmm2M256:
  1192  			if haveMem {
  1193  				inst.Args[narg] = mem
  1194  				inst.MemBytes = int(memBytes[decodeOp(x)])
  1195  				if mem.Base == RIP {
  1196  					inst.PCRel = displen
  1197  					inst.PCRelOff = dispoff
  1198  				}
  1199  			} else {
  1200  				base := baseReg[x]
  1201  				index := Reg(rm)
  1202  				switch decodeOp(x) {
  1203  				case xArgMmM32, xArgMmM64, xArgMm2M64:
  1204  					// There are only 8 MMX registers, so these ignore the REX.X bit.
  1205  					index &= 7
  1206  				case xArgRM8:
  1207  					if rex != 0 && index >= 4 {
  1208  						rexUsed |= PrefixREX
  1209  						index -= 4
  1210  						base = SPB
  1211  					}
  1212  				case xArgYmm2M256:
  1213  					if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
  1214  						index += 8
  1215  					}
  1216  				}
  1217  				inst.Args[narg] = base + index
  1218  			}
  1219  			narg++
  1220  
  1221  		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1222  			if haveMem {
  1223  				inst.Op = 0
  1224  				break Decode
  1225  			}
  1226  			inst.Args[narg] = baseReg[x] + Reg(rm&7)
  1227  			narg++
  1228  
  1229  		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1230  			if haveMem {
  1231  				inst.Op = 0
  1232  				break Decode
  1233  			}
  1234  			inst.Args[narg] = baseReg[x] + Reg(rm)
  1235  			narg++
  1236  
  1237  		case xArgRel8:
  1238  			inst.PCRelOff = immcpos
  1239  			inst.PCRel = 1
  1240  			inst.Args[narg] = Rel(int8(immc))
  1241  			narg++
  1242  
  1243  		case xArgRel16:
  1244  			inst.PCRelOff = immcpos
  1245  			inst.PCRel = 2
  1246  			inst.Args[narg] = Rel(int16(immc))
  1247  			narg++
  1248  
  1249  		case xArgRel32:
  1250  			inst.PCRelOff = immcpos
  1251  			inst.PCRel = 4
  1252  			inst.Args[narg] = Rel(int32(immc))
  1253  			narg++
  1254  		}
  1255  	}
  1256  
  1257  	if inst.Op == 0 {
  1258  		// Invalid instruction.
  1259  		if nprefix > 0 {
  1260  			return instPrefix(src[0], mode) // invalid instruction
  1261  		}
  1262  		return Inst{Len: pos}, ErrUnrecognized
  1263  	}
  1264  
  1265  	// Matched! Hooray!
  1266  
  1267  	// 90 decodes as XCHG EAX, EAX but is NOP.
  1268  	// 66 90 decodes as XCHG AX, AX and is NOP too.
  1269  	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
  1270  	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
  1271  	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
  1272  	// It's all too special to handle in the decoding tables, at least for now.
  1273  	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
  1274  		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
  1275  			inst.Op = NOP
  1276  			if dataSizeIndex >= 0 {
  1277  				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
  1278  			}
  1279  			inst.Args[0] = nil
  1280  			inst.Args[1] = nil
  1281  		}
  1282  		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
  1283  			inst.Prefix[repIndex] |= PrefixImplicit
  1284  			inst.Op = PAUSE
  1285  			inst.Args[0] = nil
  1286  			inst.Args[1] = nil
  1287  		} else if gnuCompat {
  1288  			for i := nprefix - 1; i >= 0; i-- {
  1289  				if inst.Prefix[i]&0xFF == 0xF3 {
  1290  					inst.Prefix[i] |= PrefixImplicit
  1291  					inst.Op = PAUSE
  1292  					inst.Args[0] = nil
  1293  					inst.Args[1] = nil
  1294  					break
  1295  				}
  1296  			}
  1297  		}
  1298  	}
  1299  
  1300  	// defaultSeg returns the default segment for an implicit
  1301  	// memory reference: the final override if present, or else DS.
  1302  	defaultSeg := func() Reg {
  1303  		if segIndex >= 0 {
  1304  			inst.Prefix[segIndex] |= PrefixImplicit
  1305  			return prefixToSegment(inst.Prefix[segIndex])
  1306  		}
  1307  		return DS
  1308  	}
  1309  
  1310  	// Add implicit arguments not present in the tables.
  1311  	// Normally we shy away from making implicit arguments explicit,
  1312  	// following the Intel manuals, but adding the arguments seems
  1313  	// the best way to express the effect of the segment override prefixes.
  1314  	// TODO(rsc): Perhaps add these to the tables and
  1315  	// create bytecode instructions for them.
  1316  	usedAddrSize := false
  1317  	switch inst.Op {
  1318  	case INSB, INSW, INSD:
  1319  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1320  		inst.Args[1] = DX
  1321  		usedAddrSize = true
  1322  
  1323  	case OUTSB, OUTSW, OUTSD:
  1324  		inst.Args[0] = DX
  1325  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1326  		usedAddrSize = true
  1327  
  1328  	case MOVSB, MOVSW, MOVSD, MOVSQ:
  1329  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1330  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1331  		usedAddrSize = true
  1332  
  1333  	case CMPSB, CMPSW, CMPSD, CMPSQ:
  1334  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1335  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1336  		usedAddrSize = true
  1337  
  1338  	case LODSB, LODSW, LODSD, LODSQ:
  1339  		switch inst.Op {
  1340  		case LODSB:
  1341  			inst.Args[0] = AL
  1342  		case LODSW:
  1343  			inst.Args[0] = AX
  1344  		case LODSD:
  1345  			inst.Args[0] = EAX
  1346  		case LODSQ:
  1347  			inst.Args[0] = RAX
  1348  		}
  1349  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1350  		usedAddrSize = true
  1351  
  1352  	case STOSB, STOSW, STOSD, STOSQ:
  1353  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1354  		switch inst.Op {
  1355  		case STOSB:
  1356  			inst.Args[1] = AL
  1357  		case STOSW:
  1358  			inst.Args[1] = AX
  1359  		case STOSD:
  1360  			inst.Args[1] = EAX
  1361  		case STOSQ:
  1362  			inst.Args[1] = RAX
  1363  		}
  1364  		usedAddrSize = true
  1365  
  1366  	case SCASB, SCASW, SCASD, SCASQ:
  1367  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1368  		switch inst.Op {
  1369  		case SCASB:
  1370  			inst.Args[0] = AL
  1371  		case SCASW:
  1372  			inst.Args[0] = AX
  1373  		case SCASD:
  1374  			inst.Args[0] = EAX
  1375  		case SCASQ:
  1376  			inst.Args[0] = RAX
  1377  		}
  1378  		usedAddrSize = true
  1379  
  1380  	case XLATB:
  1381  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
  1382  		usedAddrSize = true
  1383  	}
  1384  
  1385  	// If we used the address size annotation to construct the
  1386  	// argument list, mark that prefix as implicit: it doesn't need
  1387  	// to be shown when printing the instruction.
  1388  	if haveMem || usedAddrSize {
  1389  		if addrSizeIndex >= 0 {
  1390  			inst.Prefix[addrSizeIndex] |= PrefixImplicit
  1391  		}
  1392  	}
  1393  
  1394  	// Similarly, if there's some memory operand, the segment
  1395  	// will be shown there and doesn't need to be shown as an
  1396  	// explicit prefix.
  1397  	if haveMem {
  1398  		if segIndex >= 0 {
  1399  			inst.Prefix[segIndex] |= PrefixImplicit
  1400  		}
  1401  	}
  1402  
  1403  	// Branch predict prefixes are overloaded segment prefixes,
  1404  	// since segment prefixes don't make sense on conditional jumps.
  1405  	// Rewrite final instance to prediction prefix.
  1406  	// The set of instructions to which the prefixes apply (other then the
  1407  	// Jcc conditional jumps) is not 100% clear from the manuals, but
  1408  	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
  1409  	// so we'll follow along.
  1410  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1411  	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
  1412  	PredictLoop:
  1413  		for i := nprefix - 1; i >= 0; i-- {
  1414  			p := inst.Prefix[i]
  1415  			switch p & 0xFF {
  1416  			case PrefixCS:
  1417  				inst.Prefix[i] = PrefixPN
  1418  				break PredictLoop
  1419  			case PrefixDS:
  1420  				inst.Prefix[i] = PrefixPT
  1421  				break PredictLoop
  1422  			}
  1423  		}
  1424  	}
  1425  
  1426  	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
  1427  	// A REPN applied to certain control transfers is a BND prefix to bound
  1428  	// the range of possible destinations. There's surprisingly little documentation
  1429  	// about this, so we just do what libopcodes and xed agree on.
  1430  	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
  1431  	// does not turn into a BND.
  1432  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1433  	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
  1434  		for i := nprefix - 1; i >= 0; i-- {
  1435  			p := inst.Prefix[i]
  1436  			if p&^PrefixIgnored == PrefixREPN {
  1437  				inst.Prefix[i] = PrefixBND
  1438  				break
  1439  			}
  1440  		}
  1441  	}
  1442  
  1443  	// The LOCK prefix only applies to certain instructions, and then only
  1444  	// to instances of the instruction with a memory destination.
  1445  	// Other uses of LOCK are invalid and cause a processor exception,
  1446  	// in contrast to the "just ignore it" spirit applied to all other prefixes.
  1447  	// Mark invalid lock prefixes.
  1448  	hasLock := false
  1449  	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
  1450  		switch inst.Op {
  1451  		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1452  		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
  1453  			if isMem(inst.Args[0]) {
  1454  				hasLock = true
  1455  				break
  1456  			}
  1457  			fallthrough
  1458  		default:
  1459  			inst.Prefix[lockIndex] |= PrefixInvalid
  1460  		}
  1461  	}
  1462  
  1463  	// In certain cases, all of which require a memory destination,
  1464  	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
  1465  	// from the Intel Transactional Synchroniation Extensions (TSX).
  1466  	//
  1467  	// The specific rules are:
  1468  	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
  1469  	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
  1470  	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
  1471  	if isMem(inst.Args[0]) {
  1472  		if inst.Op == XCHG {
  1473  			hasLock = true
  1474  		}
  1475  
  1476  		for i := len(inst.Prefix) - 1; i >= 0; i-- {
  1477  			p := inst.Prefix[i] &^ PrefixIgnored
  1478  			switch p {
  1479  			case PrefixREPN:
  1480  				if hasLock {
  1481  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
  1482  				}
  1483  
  1484  			case PrefixREP:
  1485  				if hasLock {
  1486  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1487  				}
  1488  
  1489  				if inst.Op == MOV {
  1490  					op := (inst.Opcode >> 24) &^ 1
  1491  					if op == 0x88 || op == 0xC6 {
  1492  						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1493  					}
  1494  				}
  1495  			}
  1496  		}
  1497  	}
  1498  
  1499  	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
  1500  	if repIndex >= 0 {
  1501  		switch inst.Prefix[repIndex] {
  1502  		case PrefixREP, PrefixREPN:
  1503  			switch inst.Op {
  1504  			// According to the manuals, the REP/REPE prefix applies to all of these,
  1505  			// while the REPN applies only to some of them. However, both libopcodes
  1506  			// and xed show both prefixes explicitly for all instructions, so we do the same.
  1507  			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1508  			case INSB, INSW, INSD,
  1509  				MOVSB, MOVSW, MOVSD, MOVSQ,
  1510  				OUTSB, OUTSW, OUTSD,
  1511  				LODSB, LODSW, LODSD, LODSQ,
  1512  				CMPSB, CMPSW, CMPSD, CMPSQ,
  1513  				SCASB, SCASW, SCASD, SCASQ,
  1514  				STOSB, STOSW, STOSD, STOSQ:
  1515  				// ok
  1516  			default:
  1517  				inst.Prefix[repIndex] |= PrefixIgnored
  1518  			}
  1519  		}
  1520  	}
  1521  
  1522  	// If REX was present, mark implicit if all the 1 bits were consumed.
  1523  	if rexIndex >= 0 {
  1524  		if rexUsed != 0 {
  1525  			rexUsed |= PrefixREX
  1526  		}
  1527  		if rex&^rexUsed == 0 {
  1528  			inst.Prefix[rexIndex] |= PrefixImplicit
  1529  		}
  1530  	}
  1531  
  1532  	inst.DataSize = dataMode
  1533  	inst.AddrSize = addrMode
  1534  	inst.Mode = mode
  1535  	inst.Len = pos
  1536  	return inst, nil
  1537  }
  1538  
  1539  var errInternal = errors.New("internal error")
  1540  
  1541  // addr16 records the eight 16-bit addressing modes.
  1542  var addr16 = [8]Mem{
  1543  	{Base: BX, Scale: 1, Index: SI},
  1544  	{Base: BX, Scale: 1, Index: DI},
  1545  	{Base: BP, Scale: 1, Index: SI},
  1546  	{Base: BP, Scale: 1, Index: DI},
  1547  	{Base: SI},
  1548  	{Base: DI},
  1549  	{Base: BP},
  1550  	{Base: BX},
  1551  }
  1552  
  1553  // baseRegForBits returns the base register for a given register size in bits.
  1554  func baseRegForBits(bits int) Reg {
  1555  	switch bits {
  1556  	case 8:
  1557  		return AL
  1558  	case 16:
  1559  		return AX
  1560  	case 32:
  1561  		return EAX
  1562  	case 64:
  1563  		return RAX
  1564  	}
  1565  	return 0
  1566  }
  1567  
  1568  // baseReg records the base register for argument types that specify
  1569  // a range of registers indexed by op, regop, or rm.
  1570  var baseReg = [...]Reg{
  1571  	xArgDR0dashDR7: DR0,
  1572  	xArgMm1:        M0,
  1573  	xArgMm2:        M0,
  1574  	xArgMm2M64:     M0,
  1575  	xArgMm:         M0,
  1576  	xArgMmM32:      M0,
  1577  	xArgMmM64:      M0,
  1578  	xArgR16:        AX,
  1579  	xArgR16op:      AX,
  1580  	xArgR32:        EAX,
  1581  	xArgR32M16:     EAX,
  1582  	xArgR32M8:      EAX,
  1583  	xArgR32op:      EAX,
  1584  	xArgR64:        RAX,
  1585  	xArgR64M16:     RAX,
  1586  	xArgR64op:      RAX,
  1587  	xArgR8:         AL,
  1588  	xArgR8op:       AL,
  1589  	xArgRM16:       AX,
  1590  	xArgRM32:       EAX,
  1591  	xArgRM64:       RAX,
  1592  	xArgRM8:        AL,
  1593  	xArgRmf16:      AX,
  1594  	xArgRmf32:      EAX,
  1595  	xArgRmf64:      RAX,
  1596  	xArgSTi:        F0,
  1597  	xArgTR0dashTR7: TR0,
  1598  	xArgXmm1:       X0,
  1599  	xArgYmm1:       X0,
  1600  	xArgXmm2:       X0,
  1601  	xArgXmm2M128:   X0,
  1602  	xArgYmm2M256:   X0,
  1603  	xArgXmm2M16:    X0,
  1604  	xArgXmm2M32:    X0,
  1605  	xArgXmm2M64:    X0,
  1606  	xArgXmm:        X0,
  1607  	xArgXmmM128:    X0,
  1608  	xArgXmmM32:     X0,
  1609  	xArgXmmM64:     X0,
  1610  }
  1611  
  1612  // prefixToSegment returns the segment register
  1613  // corresponding to a particular segment prefix.
  1614  func prefixToSegment(p Prefix) Reg {
  1615  	switch p &^ PrefixImplicit {
  1616  	case PrefixCS:
  1617  		return CS
  1618  	case PrefixDS:
  1619  		return DS
  1620  	case PrefixES:
  1621  		return ES
  1622  	case PrefixFS:
  1623  		return FS
  1624  	case PrefixGS:
  1625  		return GS
  1626  	case PrefixSS:
  1627  		return SS
  1628  	}
  1629  	return 0
  1630  }
  1631  
  1632  // fixedArg records the fixed arguments corresponding to the given bytecodes.
  1633  var fixedArg = [...]Arg{
  1634  	xArg1:    Imm(1),
  1635  	xArg3:    Imm(3),
  1636  	xArgAL:   AL,
  1637  	xArgAX:   AX,
  1638  	xArgDX:   DX,
  1639  	xArgEAX:  EAX,
  1640  	xArgEDX:  EDX,
  1641  	xArgRAX:  RAX,
  1642  	xArgRDX:  RDX,
  1643  	xArgCL:   CL,
  1644  	xArgCS:   CS,
  1645  	xArgDS:   DS,
  1646  	xArgES:   ES,
  1647  	xArgFS:   FS,
  1648  	xArgGS:   GS,
  1649  	xArgSS:   SS,
  1650  	xArgST:   F0,
  1651  	xArgXMM0: X0,
  1652  }
  1653  
  1654  // memBytes records the size of the memory pointed at
  1655  // by a memory argument of the given form.
  1656  var memBytes = [...]int8{
  1657  	xArgM128:       128 / 8,
  1658  	xArgM256:       256 / 8,
  1659  	xArgM16:        16 / 8,
  1660  	xArgM16and16:   (16 + 16) / 8,
  1661  	xArgM16colon16: (16 + 16) / 8,
  1662  	xArgM16colon32: (16 + 32) / 8,
  1663  	xArgM16int:     16 / 8,
  1664  	xArgM2byte:     2,
  1665  	xArgM32:        32 / 8,
  1666  	xArgM32and32:   (32 + 32) / 8,
  1667  	xArgM32fp:      32 / 8,
  1668  	xArgM32int:     32 / 8,
  1669  	xArgM64:        64 / 8,
  1670  	xArgM64fp:      64 / 8,
  1671  	xArgM64int:     64 / 8,
  1672  	xArgMm2M64:     64 / 8,
  1673  	xArgMmM32:      32 / 8,
  1674  	xArgMmM64:      64 / 8,
  1675  	xArgMoffs16:    16 / 8,
  1676  	xArgMoffs32:    32 / 8,
  1677  	xArgMoffs64:    64 / 8,
  1678  	xArgMoffs8:     8 / 8,
  1679  	xArgR32M16:     16 / 8,
  1680  	xArgR32M8:      8 / 8,
  1681  	xArgR64M16:     16 / 8,
  1682  	xArgRM16:       16 / 8,
  1683  	xArgRM32:       32 / 8,
  1684  	xArgRM64:       64 / 8,
  1685  	xArgRM8:        8 / 8,
  1686  	xArgXmm2M128:   128 / 8,
  1687  	xArgYmm2M256:   256 / 8,
  1688  	xArgXmm2M16:    16 / 8,
  1689  	xArgXmm2M32:    32 / 8,
  1690  	xArgXmm2M64:    64 / 8,
  1691  	xArgXmm:        128 / 8,
  1692  	xArgXmmM128:    128 / 8,
  1693  	xArgXmmM32:     32 / 8,
  1694  	xArgXmmM64:     64 / 8,
  1695  }
  1696  
  1697  // isCondJmp records the conditional jumps.
  1698  var isCondJmp = [maxOp + 1]bool{
  1699  	JA:  true,
  1700  	JAE: true,
  1701  	JB:  true,
  1702  	JBE: true,
  1703  	JE:  true,
  1704  	JG:  true,
  1705  	JGE: true,
  1706  	JL:  true,
  1707  	JLE: true,
  1708  	JNE: true,
  1709  	JNO: true,
  1710  	JNP: true,
  1711  	JNS: true,
  1712  	JO:  true,
  1713  	JP:  true,
  1714  	JS:  true,
  1715  }
  1716  
  1717  // isLoop records the loop operators.
  1718  var isLoop = [maxOp + 1]bool{
  1719  	LOOP:   true,
  1720  	LOOPE:  true,
  1721  	LOOPNE: true,
  1722  	JECXZ:  true,
  1723  	JRCXZ:  true,
  1724  }