github.com/akaros/go-akaros@v0.0.0-20181004170632-85005d477eab/src/cmd/internal/rsc.io/x86/x86asm/decode.go (about)

     1  // Copyright 2014 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Table-driven decoding of x86 instructions.
     6  
     7  package x86asm
     8  
     9  import (
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"runtime"
    14  )
    15  
    16  // Set trace to true to cause the decoder to print the PC sequence
    17  // of the executed instruction codes. This is typically only useful
    18  // when you are running a test of a single input case.
    19  const trace = false
    20  
    21  // A decodeOp is a single instruction in the decoder bytecode program.
    22  //
    23  // The decodeOps correspond to consuming and conditionally branching
    24  // on input bytes, consuming additional fields, and then interpreting
    25  // consumed data as instruction arguments. The names of the xRead and xArg
    26  // operations are taken from the Intel manual conventions, for example
    27  // Volume 2, Section 3.1.1, page 487 of
    28  // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
    29  //
    30  // The actual decoding program is generated by ../x86map.
    31  //
    32  // TODO(rsc): We may be able to merge various of the memory operands
    33  // since we don't care about, say, the distinction between m80dec and m80bcd.
    34  // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
    35  
    36  type decodeOp uint16
    37  
    38  const (
    39  	xFail  decodeOp = iota // invalid instruction (return)
    40  	xMatch                 // completed match
    41  	xJump                  // jump to pc
    42  
    43  	xCondByte     // switch on instruction byte value
    44  	xCondSlashR   // read and switch on instruction /r value
    45  	xCondPrefix   // switch on presence of instruction prefix
    46  	xCondIs64     // switch on 64-bit processor mode
    47  	xCondDataSize // switch on operand size
    48  	xCondAddrSize // switch on address size
    49  	xCondIsMem    // switch on memory vs register argument
    50  
    51  	xSetOp // set instruction opcode
    52  
    53  	xReadSlashR // read /r
    54  	xReadIb     // read ib
    55  	xReadIw     // read iw
    56  	xReadId     // read id
    57  	xReadIo     // read io
    58  	xReadCb     // read cb
    59  	xReadCw     // read cw
    60  	xReadCd     // read cd
    61  	xReadCp     // read cp
    62  	xReadCm     // read cm
    63  
    64  	xArg1            // arg 1
    65  	xArg3            // arg 3
    66  	xArgAL           // arg AL
    67  	xArgAX           // arg AX
    68  	xArgCL           // arg CL
    69  	xArgCR0dashCR7   // arg CR0-CR7
    70  	xArgCS           // arg CS
    71  	xArgDR0dashDR7   // arg DR0-DR7
    72  	xArgDS           // arg DS
    73  	xArgDX           // arg DX
    74  	xArgEAX          // arg EAX
    75  	xArgEDX          // arg EDX
    76  	xArgES           // arg ES
    77  	xArgFS           // arg FS
    78  	xArgGS           // arg GS
    79  	xArgImm16        // arg imm16
    80  	xArgImm32        // arg imm32
    81  	xArgImm64        // arg imm64
    82  	xArgImm8         // arg imm8
    83  	xArgImm8u        // arg imm8 but record as unsigned
    84  	xArgImm16u       // arg imm8 but record as unsigned
    85  	xArgM            // arg m
    86  	xArgM128         // arg m128
    87  	xArgM1428byte    // arg m14/28byte
    88  	xArgM16          // arg m16
    89  	xArgM16and16     // arg m16&16
    90  	xArgM16and32     // arg m16&32
    91  	xArgM16and64     // arg m16&64
    92  	xArgM16colon16   // arg m16:16
    93  	xArgM16colon32   // arg m16:32
    94  	xArgM16colon64   // arg m16:64
    95  	xArgM16int       // arg m16int
    96  	xArgM2byte       // arg m2byte
    97  	xArgM32          // arg m32
    98  	xArgM32and32     // arg m32&32
    99  	xArgM32fp        // arg m32fp
   100  	xArgM32int       // arg m32int
   101  	xArgM512byte     // arg m512byte
   102  	xArgM64          // arg m64
   103  	xArgM64fp        // arg m64fp
   104  	xArgM64int       // arg m64int
   105  	xArgM8           // arg m8
   106  	xArgM80bcd       // arg m80bcd
   107  	xArgM80dec       // arg m80dec
   108  	xArgM80fp        // arg m80fp
   109  	xArgM94108byte   // arg m94/108byte
   110  	xArgMm           // arg mm
   111  	xArgMm1          // arg mm1
   112  	xArgMm2          // arg mm2
   113  	xArgMm2M64       // arg mm2/m64
   114  	xArgMmM32        // arg mm/m32
   115  	xArgMmM64        // arg mm/m64
   116  	xArgMem          // arg mem
   117  	xArgMoffs16      // arg moffs16
   118  	xArgMoffs32      // arg moffs32
   119  	xArgMoffs64      // arg moffs64
   120  	xArgMoffs8       // arg moffs8
   121  	xArgPtr16colon16 // arg ptr16:16
   122  	xArgPtr16colon32 // arg ptr16:32
   123  	xArgR16          // arg r16
   124  	xArgR16op        // arg r16 with +rw in opcode
   125  	xArgR32          // arg r32
   126  	xArgR32M16       // arg r32/m16
   127  	xArgR32M8        // arg r32/m8
   128  	xArgR32op        // arg r32 with +rd in opcode
   129  	xArgR64          // arg r64
   130  	xArgR64M16       // arg r64/m16
   131  	xArgR64op        // arg r64 with +rd in opcode
   132  	xArgR8           // arg r8
   133  	xArgR8op         // arg r8 with +rb in opcode
   134  	xArgRAX          // arg RAX
   135  	xArgRDX          // arg RDX
   136  	xArgRM           // arg r/m
   137  	xArgRM16         // arg r/m16
   138  	xArgRM32         // arg r/m32
   139  	xArgRM64         // arg r/m64
   140  	xArgRM8          // arg r/m8
   141  	xArgReg          // arg reg
   142  	xArgRegM16       // arg reg/m16
   143  	xArgRegM32       // arg reg/m32
   144  	xArgRegM8        // arg reg/m8
   145  	xArgRel16        // arg rel16
   146  	xArgRel32        // arg rel32
   147  	xArgRel8         // arg rel8
   148  	xArgSS           // arg SS
   149  	xArgST           // arg ST, aka ST(0)
   150  	xArgSTi          // arg ST(i) with +i in opcode
   151  	xArgSreg         // arg Sreg
   152  	xArgTR0dashTR7   // arg TR0-TR7
   153  	xArgXmm          // arg xmm
   154  	xArgXMM0         // arg <XMM0>
   155  	xArgXmm1         // arg xmm1
   156  	xArgXmm2         // arg xmm2
   157  	xArgXmm2M128     // arg xmm2/m128
   158  	xArgXmm2M16      // arg xmm2/m16
   159  	xArgXmm2M32      // arg xmm2/m32
   160  	xArgXmm2M64      // arg xmm2/m64
   161  	xArgXmmM128      // arg xmm/m128
   162  	xArgXmmM32       // arg xmm/m32
   163  	xArgXmmM64       // arg xmm/m64
   164  	xArgRmf16        // arg r/m16 but force mod=3
   165  	xArgRmf32        // arg r/m32 but force mod=3
   166  	xArgRmf64        // arg r/m64 but force mod=3
   167  )
   168  
   169  // instPrefix returns an Inst describing just one prefix byte.
   170  // It is only used if there is a prefix followed by an unintelligible
   171  // or invalid instruction byte sequence.
   172  func instPrefix(b byte, mode int) (Inst, error) {
   173  	// When tracing it is useful to see what called instPrefix to report an error.
   174  	if trace {
   175  		_, file, line, _ := runtime.Caller(1)
   176  		fmt.Printf("%s:%d\n", file, line)
   177  	}
   178  	p := Prefix(b)
   179  	switch p {
   180  	case PrefixDataSize:
   181  		if mode == 16 {
   182  			p = PrefixData32
   183  		} else {
   184  			p = PrefixData16
   185  		}
   186  	case PrefixAddrSize:
   187  		if mode == 32 {
   188  			p = PrefixAddr16
   189  		} else {
   190  			p = PrefixAddr32
   191  		}
   192  	}
   193  	// Note: using composite literal with Prefix key confuses 'bundle' tool.
   194  	inst := Inst{Len: 1}
   195  	inst.Prefix = Prefixes{p}
   196  	return inst, nil
   197  }
   198  
   199  // truncated reports a truncated instruction.
   200  // For now we use instPrefix but perhaps later we will return
   201  // a specific error here.
   202  func truncated(src []byte, mode int) (Inst, error) {
   203  	//	return Inst{}, len(src), ErrTruncated
   204  	return instPrefix(src[0], mode) // too long
   205  }
   206  
   207  // These are the errors returned by Decode.
   208  var (
   209  	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
   210  	ErrTruncated    = errors.New("truncated instruction")
   211  	ErrUnrecognized = errors.New("unrecognized instruction")
   212  )
   213  
   214  // decoderCover records coverage information for which parts
   215  // of the byte code have been executed.
   216  // TODO(rsc): This is for testing. Only use this if a flag is given.
   217  var decoderCover []bool
   218  
   219  // Decode decodes the leading bytes in src as a single instruction.
   220  // The mode arguments specifies the assumed processor mode:
   221  // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
   222  func Decode(src []byte, mode int) (inst Inst, err error) {
   223  	return decode1(src, mode, false)
   224  }
   225  
   226  // decode1 is the implementation of Decode but takes an extra
   227  // gnuCompat flag to cause it to change its behavior to mimic
   228  // bugs (or at least unique features) of GNU libopcodes as used
   229  // by objdump. We don't believe that logic is the right thing to do
   230  // in general, but when testing against libopcodes it simplifies the
   231  // comparison if we adjust a few small pieces of logic.
   232  // The affected logic is in the conditional branch for "mandatory" prefixes,
   233  // case xCondPrefix.
   234  func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
   235  	switch mode {
   236  	case 16, 32, 64:
   237  		// ok
   238  		// TODO(rsc): 64-bit mode not tested, probably not working.
   239  	default:
   240  		return Inst{}, ErrInvalidMode
   241  	}
   242  
   243  	// Maximum instruction size is 15 bytes.
   244  	// If we need to read more, return 'truncated instruction.
   245  	if len(src) > 15 {
   246  		src = src[:15]
   247  	}
   248  
   249  	var (
   250  		// prefix decoding information
   251  		pos           = 0    // position reading src
   252  		nprefix       = 0    // number of prefixes
   253  		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
   254  		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
   255  		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
   256  		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
   257  		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
   258  		rex           Prefix // rex byte if present (or 0)
   259  		rexUsed       Prefix // bits used in rex byte
   260  		rexIndex      = -1   // index of rex byte
   261  
   262  		addrMode = mode // address mode (width in bits)
   263  		dataMode = mode // operand mode (width in bits)
   264  
   265  		// decoded ModR/M fields
   266  		haveModrm bool
   267  		modrm     int
   268  		mod       int
   269  		regop     int
   270  		rm        int
   271  
   272  		// if ModR/M is memory reference, Mem form
   273  		mem     Mem
   274  		haveMem bool
   275  
   276  		// decoded SIB fields
   277  		haveSIB bool
   278  		sib     int
   279  		scale   int
   280  		index   int
   281  		base    int
   282  
   283  		// decoded immediate values
   284  		imm  int64
   285  		imm8 int8
   286  		immc int64
   287  
   288  		// output
   289  		opshift int
   290  		inst    Inst
   291  		narg    int // number of arguments written to inst
   292  	)
   293  
   294  	if mode == 64 {
   295  		dataMode = 32
   296  	}
   297  
   298  	// Prefixes are certainly the most complex and underspecified part of
   299  	// decoding x86 instructions. Although the manuals say things like
   300  	// up to four prefixes, one from each group, nearly everyone seems to
   301  	// agree that in practice as many prefixes as possible, including multiple
   302  	// from a particular group or repetitions of a given prefix, can be used on
   303  	// an instruction, provided the total instruction length including prefixes
   304  	// does not exceed the agreed-upon maximum of 15 bytes.
   305  	// Everyone also agrees that if one of these prefixes is the LOCK prefix
   306  	// and the instruction is not one of the instructions that can be used with
   307  	// the LOCK prefix or if the destination is not a memory operand,
   308  	// then the instruction is invalid and produces the #UD exception.
   309  	// However, that is the end of any semblance of agreement.
   310  	//
   311  	// What happens if prefixes are given that conflict with other prefixes?
   312  	// For example, the memory segment overrides CS, DS, ES, FS, GS, SS
   313  	// conflict with each other: only one segment can be in effect.
   314  	// Disassemblers seem to agree that later prefixes take priority over
   315  	// earlier ones. I have not taken the time to write assembly programs
   316  	// to check to see if the hardware agrees.
   317  	//
   318  	// What happens if prefixes are given that have no meaning for the
   319  	// specific instruction to which they are attached? It depends.
   320  	// If they really have no meaning, they are ignored. However, a future
   321  	// processor may assign a different meaning. As a disassembler, we
   322  	// don't really know whether we're seeing a meaningless prefix or one
   323  	// whose meaning we simply haven't been told yet.
   324  	//
   325  	// Combining the two questions, what happens when conflicting
   326  	// extension prefixes are given? No one seems to know for sure.
   327  	// For example, MOVQ is 66 0F D6 /r, MOVDQ2Q is F2 0F D6 /r,
   328  	// and MOVQ2DQ is F3 0F D6 /r. What is '66 F2 F3 0F D6 /r'?
   329  	// Which prefix wins? See the xCondPrefix prefix for more.
   330  	//
   331  	// Writing assembly test cases to divine which interpretation the
   332  	// CPU uses might clarify the situation, but more likely it would
   333  	// make the situation even less clear.
   334  
   335  	// Read non-REX prefixes.
   336  ReadPrefixes:
   337  	for ; pos < len(src); pos++ {
   338  		p := Prefix(src[pos])
   339  		switch p {
   340  		default:
   341  			nprefix = pos
   342  			break ReadPrefixes
   343  
   344  		// Group 1 - lock and repeat prefixes
   345  		// According to Intel, there should only be one from this set,
   346  		// but according to AMD both can be present.
   347  		case 0xF0:
   348  			if lockIndex >= 0 {
   349  				inst.Prefix[lockIndex] |= PrefixIgnored
   350  			}
   351  			lockIndex = pos
   352  		case 0xF2, 0xF3:
   353  			if repIndex >= 0 {
   354  				inst.Prefix[repIndex] |= PrefixIgnored
   355  			}
   356  			repIndex = pos
   357  
   358  		// Group 2 - segment override / branch hints
   359  		case 0x26, 0x2E, 0x36, 0x3E:
   360  			if mode == 64 {
   361  				p |= PrefixIgnored
   362  				break
   363  			}
   364  			fallthrough
   365  		case 0x64, 0x65:
   366  			if segIndex >= 0 {
   367  				inst.Prefix[segIndex] |= PrefixIgnored
   368  			}
   369  			segIndex = pos
   370  
   371  		// Group 3 - operand size override
   372  		case 0x66:
   373  			if mode == 16 {
   374  				dataMode = 32
   375  				p = PrefixData32
   376  			} else {
   377  				dataMode = 16
   378  				p = PrefixData16
   379  			}
   380  			if dataSizeIndex >= 0 {
   381  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   382  			}
   383  			dataSizeIndex = pos
   384  
   385  		// Group 4 - address size override
   386  		case 0x67:
   387  			if mode == 32 {
   388  				addrMode = 16
   389  				p = PrefixAddr16
   390  			} else {
   391  				addrMode = 32
   392  				p = PrefixAddr32
   393  			}
   394  			if addrSizeIndex >= 0 {
   395  				inst.Prefix[addrSizeIndex] |= PrefixIgnored
   396  			}
   397  			addrSizeIndex = pos
   398  		}
   399  
   400  		if pos >= len(inst.Prefix) {
   401  			return instPrefix(src[0], mode) // too long
   402  		}
   403  
   404  		inst.Prefix[pos] = p
   405  	}
   406  
   407  	// Read REX prefix.
   408  	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() {
   409  		rex = Prefix(src[pos])
   410  		rexIndex = pos
   411  		if pos >= len(inst.Prefix) {
   412  			return instPrefix(src[0], mode) // too long
   413  		}
   414  		inst.Prefix[pos] = rex
   415  		pos++
   416  		if rex&PrefixREXW != 0 {
   417  			dataMode = 64
   418  			if dataSizeIndex >= 0 {
   419  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   420  			}
   421  		}
   422  	}
   423  
   424  	// Decode instruction stream, interpreting decoding instructions.
   425  	// opshift gives the shift to use when saving the next
   426  	// opcode byte into inst.Opcode.
   427  	opshift = 24
   428  	if decoderCover == nil {
   429  		decoderCover = make([]bool, len(decoder))
   430  	}
   431  
   432  	// Decode loop, executing decoder program.
   433  	var oldPC, prevPC int
   434  Decode:
   435  	for pc := 1; ; { // TODO uint
   436  		oldPC = prevPC
   437  		prevPC = pc
   438  		if trace {
   439  			println("run", pc)
   440  		}
   441  		x := decoder[pc]
   442  		decoderCover[pc] = true
   443  		pc++
   444  
   445  		// Read and decode ModR/M if needed by opcode.
   446  		switch decodeOp(x) {
   447  		case xCondSlashR, xReadSlashR:
   448  			if haveModrm {
   449  				return Inst{Len: pos}, errInternal
   450  			}
   451  			haveModrm = true
   452  			if pos >= len(src) {
   453  				return truncated(src, mode)
   454  			}
   455  			modrm = int(src[pos])
   456  			pos++
   457  			if opshift >= 0 {
   458  				inst.Opcode |= uint32(modrm) << uint(opshift)
   459  				opshift -= 8
   460  			}
   461  			mod = modrm >> 6
   462  			regop = (modrm >> 3) & 07
   463  			rm = modrm & 07
   464  			if rex&PrefixREXR != 0 {
   465  				rexUsed |= PrefixREXR
   466  				regop |= 8
   467  			}
   468  			if addrMode == 16 {
   469  				// 16-bit modrm form
   470  				if mod != 3 {
   471  					haveMem = true
   472  					mem = addr16[rm]
   473  					if rm == 6 && mod == 0 {
   474  						mem.Base = 0
   475  					}
   476  
   477  					// Consume disp16 if present.
   478  					if mod == 0 && rm == 6 || mod == 2 {
   479  						if pos+2 > len(src) {
   480  							return truncated(src, mode)
   481  						}
   482  						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
   483  						pos += 2
   484  					}
   485  
   486  					// Consume disp8 if present.
   487  					if mod == 1 {
   488  						if pos >= len(src) {
   489  							return truncated(src, mode)
   490  						}
   491  						mem.Disp = int64(int8(src[pos]))
   492  						pos++
   493  					}
   494  				}
   495  			} else {
   496  				haveMem = mod != 3
   497  
   498  				// 32-bit or 64-bit form
   499  				// Consume SIB encoding if present.
   500  				if rm == 4 && mod != 3 {
   501  					haveSIB = true
   502  					if pos >= len(src) {
   503  						return truncated(src, mode)
   504  					}
   505  					sib = int(src[pos])
   506  					pos++
   507  					if opshift >= 0 {
   508  						inst.Opcode |= uint32(sib) << uint(opshift)
   509  						opshift -= 8
   510  					}
   511  					scale = sib >> 6
   512  					index = (sib >> 3) & 07
   513  					base = sib & 07
   514  					if rex&PrefixREXB != 0 {
   515  						rexUsed |= PrefixREXB
   516  						base |= 8
   517  					}
   518  					if rex&PrefixREXX != 0 {
   519  						rexUsed |= PrefixREXX
   520  						index |= 8
   521  					}
   522  
   523  					mem.Scale = 1 << uint(scale)
   524  					if index == 4 {
   525  						// no mem.Index
   526  					} else {
   527  						mem.Index = baseRegForBits(addrMode) + Reg(index)
   528  					}
   529  					if base&7 == 5 && mod == 0 {
   530  						// no mem.Base
   531  					} else {
   532  						mem.Base = baseRegForBits(addrMode) + Reg(base)
   533  					}
   534  				} else {
   535  					if rex&PrefixREXB != 0 {
   536  						rexUsed |= PrefixREXB
   537  						rm |= 8
   538  					}
   539  					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
   540  						// base omitted
   541  					} else if mod != 3 {
   542  						mem.Base = baseRegForBits(addrMode) + Reg(rm)
   543  					}
   544  				}
   545  
   546  				// Consume disp32 if present.
   547  				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
   548  					if pos+4 > len(src) {
   549  						return truncated(src, mode)
   550  					}
   551  					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
   552  					pos += 4
   553  				}
   554  
   555  				// Consume disp8 if present.
   556  				if mod == 1 {
   557  					if pos >= len(src) {
   558  						return truncated(src, mode)
   559  					}
   560  					mem.Disp = int64(int8(src[pos]))
   561  					pos++
   562  				}
   563  
   564  				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
   565  				// See Vol 2A. Table 2-7.
   566  				if mode == 64 && mod == 0 && rm&7 == 5 {
   567  					if addrMode == 32 {
   568  						mem.Base = EIP
   569  					} else {
   570  						mem.Base = RIP
   571  					}
   572  				}
   573  			}
   574  
   575  			if segIndex >= 0 {
   576  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
   577  			}
   578  		}
   579  
   580  		// Execute single opcode.
   581  		switch decodeOp(x) {
   582  		default:
   583  			println("bad op", x, "at", pc-1, "from", oldPC)
   584  			return Inst{Len: pos}, errInternal
   585  
   586  		case xFail:
   587  			inst.Op = 0
   588  			break Decode
   589  
   590  		case xMatch:
   591  			break Decode
   592  
   593  		case xJump:
   594  			pc = int(decoder[pc])
   595  
   596  		// Conditional branches.
   597  
   598  		case xCondByte:
   599  			if pos >= len(src) {
   600  				return truncated(src, mode)
   601  			}
   602  			b := src[pos]
   603  			n := int(decoder[pc])
   604  			pc++
   605  			for i := 0; i < n; i++ {
   606  				xb, xpc := decoder[pc], int(decoder[pc+1])
   607  				pc += 2
   608  				if b == byte(xb) {
   609  					pc = xpc
   610  					pos++
   611  					if opshift >= 0 {
   612  						inst.Opcode |= uint32(b) << uint(opshift)
   613  						opshift -= 8
   614  					}
   615  					continue Decode
   616  				}
   617  			}
   618  			// xCondByte is the only conditional with a fall through,
   619  			// so that it can be used to pick off special cases before
   620  			// an xCondSlash. If the fallthrough instruction is xFail,
   621  			// advance the position so that the decoded instruction
   622  			// size includes the byte we just compared against.
   623  			if decodeOp(decoder[pc]) == xJump {
   624  				pc = int(decoder[pc+1])
   625  			}
   626  			if decodeOp(decoder[pc]) == xFail {
   627  				pos++
   628  			}
   629  
   630  		case xCondIs64:
   631  			if mode == 64 {
   632  				pc = int(decoder[pc+1])
   633  			} else {
   634  				pc = int(decoder[pc])
   635  			}
   636  
   637  		case xCondIsMem:
   638  			mem := haveMem
   639  			if !haveModrm {
   640  				if pos >= len(src) {
   641  					return instPrefix(src[0], mode) // too long
   642  				}
   643  				mem = src[pos]>>6 != 3
   644  			}
   645  			if mem {
   646  				pc = int(decoder[pc+1])
   647  			} else {
   648  				pc = int(decoder[pc])
   649  			}
   650  
   651  		case xCondDataSize:
   652  			switch dataMode {
   653  			case 16:
   654  				if dataSizeIndex >= 0 {
   655  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   656  				}
   657  				pc = int(decoder[pc])
   658  			case 32:
   659  				if dataSizeIndex >= 0 {
   660  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   661  				}
   662  				pc = int(decoder[pc+1])
   663  			case 64:
   664  				rexUsed |= PrefixREXW
   665  				pc = int(decoder[pc+2])
   666  			}
   667  
   668  		case xCondAddrSize:
   669  			switch addrMode {
   670  			case 16:
   671  				if addrSizeIndex >= 0 {
   672  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   673  				}
   674  				pc = int(decoder[pc])
   675  			case 32:
   676  				if addrSizeIndex >= 0 {
   677  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   678  				}
   679  				pc = int(decoder[pc+1])
   680  			case 64:
   681  				pc = int(decoder[pc+2])
   682  			}
   683  
   684  		case xCondPrefix:
   685  			// Conditional branch based on presence or absence of prefixes.
   686  			// The conflict cases here are completely undocumented and
   687  			// differ significantly between GNU libopcodes and Intel xed.
   688  			// I have not written assembly code to divine what various CPUs
   689  			// do, but it wouldn't surprise me if they are not consistent either.
   690  			//
   691  			// The basic idea is to switch on the presence of a prefix, so that
   692  			// for example:
   693  			//
   694  			//	xCondPrefix, 4
   695  			//	0xF3, 123,
   696  			//	0xF2, 234,
   697  			//	0x66, 345,
   698  			//	0, 456
   699  			//
   700  			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
   701  			// is present, 66 if the 345 prefix is present, and 456 otherwise.
   702  			// The prefixes are given in descending order so that the 0 will be last.
   703  			//
   704  			// It is unclear what should happen if multiple conditions are
   705  			// satisfied: what if F2 and F3 are both present, or if 66 and F2
   706  			// are present, or if all three are present? The one chosen becomes
   707  			// part of the opcode and the others do not. Perhaps the answer
   708  			// depends on the specific opcodes in question.
   709  			//
   710  			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
   711  			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
   712  			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
   713  			// with the 66 being only an operand size override, and probably
   714  			// F2 66 0F 38 F1 /r should be treated the same.
   715  			// Perhaps that rule is specific to the case of CRC32, since no
   716  			// 66 0F 38 F1 instruction is defined (today) (that we know of).
   717  			// However, both libopcodes and xed seem to generalize this
   718  			// example and choose F2/F3 in preference to 66, and we
   719  			// do the same.
   720  			//
   721  			// Next, what if both F2 and F3 are present? Which wins?
   722  			// The Intel xed rule, and ours, is that the one that occurs last wins.
   723  			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
   724  			// is that F3 beats F2 unless F3 has no special meaning, in which
   725  			// case F3 can be a modified on an F2 special meaning.
   726  			//
   727  			// Concretely,
   728  			//	66 0F D6 /r is MOVQ
   729  			//	F2 0F D6 /r is MOVDQ2Q
   730  			//	F3 0F D6 /r is MOVQ2DQ.
   731  			//
   732  			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
   733  			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
   734  			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
   735  			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
   736  			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
   737  			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
   738  			//	Adding 66 anywhere in the prefix section of the
   739  			//	last two cases does not change the outcome.
   740  			//
   741  			// Finally, what if there is a variant in which 66 is a mandatory
   742  			// prefix rather than an operand size override, but we know of
   743  			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
   744  			// Does F2/F3 still take priority, so that the result is an unknown
   745  			// instruction, or does the 66 take priority, so that the extended
   746  			// 66 instruction should be interpreted as having a REP/REPN prefix?
   747  			// Intel xed does the former and GNU libopcodes does the latter.
   748  			// We side with Intel xed, unless we are trying to match libopcodes
   749  			// more closely during the comparison-based test suite.
   750  			//
   751  			// In 64-bit mode REX.W is another valid prefix to test for, but
   752  			// there is less ambiguity about that. When present, REX.W is
   753  			// always the first entry in the table.
   754  			n := int(decoder[pc])
   755  			pc++
   756  			sawF3 := false
   757  			for j := 0; j < n; j++ {
   758  				prefix := Prefix(decoder[pc+2*j])
   759  				if prefix.IsREX() {
   760  					rexUsed |= prefix
   761  					if rex&prefix == prefix {
   762  						pc = int(decoder[pc+2*j+1])
   763  						continue Decode
   764  					}
   765  					continue
   766  				}
   767  				ok := false
   768  				if prefix == 0 {
   769  					ok = true
   770  				} else if prefix.IsREX() {
   771  					rexUsed |= prefix
   772  					if rex&prefix == prefix {
   773  						ok = true
   774  					}
   775  				} else {
   776  					if prefix == 0xF3 {
   777  						sawF3 = true
   778  					}
   779  					switch prefix {
   780  					case PrefixLOCK:
   781  						if lockIndex >= 0 {
   782  							inst.Prefix[lockIndex] |= PrefixImplicit
   783  							ok = true
   784  						}
   785  					case PrefixREP, PrefixREPN:
   786  						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
   787  							inst.Prefix[repIndex] |= PrefixImplicit
   788  							ok = true
   789  						}
   790  						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
   791  							// Check to see if earlier prefix F3 is present.
   792  							for i := repIndex - 1; i >= 0; i-- {
   793  								if inst.Prefix[i]&0xFF == prefix {
   794  									inst.Prefix[i] |= PrefixImplicit
   795  									ok = true
   796  								}
   797  							}
   798  						}
   799  						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
   800  							// Check to see if earlier prefix F2 is present.
   801  							for i := repIndex - 1; i >= 0; i-- {
   802  								if inst.Prefix[i]&0xFF == prefix {
   803  									inst.Prefix[i] |= PrefixImplicit
   804  									ok = true
   805  								}
   806  							}
   807  						}
   808  					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
   809  						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
   810  							inst.Prefix[segIndex] |= PrefixImplicit
   811  							ok = true
   812  						}
   813  					case PrefixDataSize:
   814  						// Looking for 66 mandatory prefix.
   815  						// The F2/F3 mandatory prefixes take priority when both are present.
   816  						// If we got this far in the xCondPrefix table and an F2/F3 is present,
   817  						// it means the table didn't have any entry for that prefix. But if 66 has
   818  						// special meaning, perhaps F2/F3 have special meaning that we don't know.
   819  						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
   820  						// GNU libopcodes allows the 66 to match. We do what Intel xed does
   821  						// except in gnuCompat mode.
   822  						if repIndex >= 0 && !gnuCompat {
   823  							inst.Op = 0
   824  							break Decode
   825  						}
   826  						if dataSizeIndex >= 0 {
   827  							inst.Prefix[dataSizeIndex] |= PrefixImplicit
   828  							ok = true
   829  						}
   830  					case PrefixAddrSize:
   831  						if addrSizeIndex >= 0 {
   832  							inst.Prefix[addrSizeIndex] |= PrefixImplicit
   833  							ok = true
   834  						}
   835  					}
   836  				}
   837  				if ok {
   838  					pc = int(decoder[pc+2*j+1])
   839  					continue Decode
   840  				}
   841  			}
   842  			inst.Op = 0
   843  			break Decode
   844  
   845  		case xCondSlashR:
   846  			pc = int(decoder[pc+regop&7])
   847  
   848  		// Input.
   849  
   850  		case xReadSlashR:
   851  			// done above
   852  
   853  		case xReadIb:
   854  			if pos >= len(src) {
   855  				return truncated(src, mode)
   856  			}
   857  			imm8 = int8(src[pos])
   858  			pos++
   859  
   860  		case xReadIw:
   861  			if pos+2 > len(src) {
   862  				return truncated(src, mode)
   863  			}
   864  			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
   865  			pos += 2
   866  
   867  		case xReadId:
   868  			if pos+4 > len(src) {
   869  				return truncated(src, mode)
   870  			}
   871  			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
   872  			pos += 4
   873  
   874  		case xReadIo:
   875  			if pos+8 > len(src) {
   876  				return truncated(src, mode)
   877  			}
   878  			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
   879  			pos += 8
   880  
   881  		case xReadCb:
   882  			if pos >= len(src) {
   883  				return truncated(src, mode)
   884  			}
   885  			immc = int64(src[pos])
   886  			pos++
   887  
   888  		case xReadCw:
   889  			if pos+2 > len(src) {
   890  				return truncated(src, mode)
   891  			}
   892  			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   893  			pos += 2
   894  
   895  		case xReadCm:
   896  			if addrMode == 16 {
   897  				if pos+2 > len(src) {
   898  					return truncated(src, mode)
   899  				}
   900  				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   901  				pos += 2
   902  			} else if addrMode == 32 {
   903  				if pos+4 > len(src) {
   904  					return truncated(src, mode)
   905  				}
   906  				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   907  				pos += 4
   908  			} else {
   909  				if pos+8 > len(src) {
   910  					return truncated(src, mode)
   911  				}
   912  				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
   913  				pos += 8
   914  			}
   915  		case xReadCd:
   916  			if pos+4 > len(src) {
   917  				return truncated(src, mode)
   918  			}
   919  			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   920  			pos += 4
   921  
   922  		case xReadCp:
   923  			if pos+6 > len(src) {
   924  				return truncated(src, mode)
   925  			}
   926  			w := binary.LittleEndian.Uint32(src[pos:])
   927  			w2 := binary.LittleEndian.Uint16(src[pos+4:])
   928  			immc = int64(w2)<<32 | int64(w)
   929  			pos += 6
   930  
   931  		// Output.
   932  
   933  		case xSetOp:
   934  			inst.Op = Op(decoder[pc])
   935  			pc++
   936  
   937  		case xArg1,
   938  			xArg3,
   939  			xArgAL,
   940  			xArgAX,
   941  			xArgCL,
   942  			xArgCS,
   943  			xArgDS,
   944  			xArgDX,
   945  			xArgEAX,
   946  			xArgEDX,
   947  			xArgES,
   948  			xArgFS,
   949  			xArgGS,
   950  			xArgRAX,
   951  			xArgRDX,
   952  			xArgSS,
   953  			xArgST,
   954  			xArgXMM0:
   955  			inst.Args[narg] = fixedArg[x]
   956  			narg++
   957  
   958  		case xArgImm8:
   959  			inst.Args[narg] = Imm(imm8)
   960  			narg++
   961  
   962  		case xArgImm8u:
   963  			inst.Args[narg] = Imm(uint8(imm8))
   964  			narg++
   965  
   966  		case xArgImm16:
   967  			inst.Args[narg] = Imm(int16(imm))
   968  			narg++
   969  
   970  		case xArgImm16u:
   971  			inst.Args[narg] = Imm(uint16(imm))
   972  			narg++
   973  
   974  		case xArgImm32:
   975  			inst.Args[narg] = Imm(int32(imm))
   976  			narg++
   977  
   978  		case xArgImm64:
   979  			inst.Args[narg] = Imm(imm)
   980  			narg++
   981  
   982  		case xArgM,
   983  			xArgM128,
   984  			xArgM1428byte,
   985  			xArgM16,
   986  			xArgM16and16,
   987  			xArgM16and32,
   988  			xArgM16and64,
   989  			xArgM16colon16,
   990  			xArgM16colon32,
   991  			xArgM16colon64,
   992  			xArgM16int,
   993  			xArgM2byte,
   994  			xArgM32,
   995  			xArgM32and32,
   996  			xArgM32fp,
   997  			xArgM32int,
   998  			xArgM512byte,
   999  			xArgM64,
  1000  			xArgM64fp,
  1001  			xArgM64int,
  1002  			xArgM8,
  1003  			xArgM80bcd,
  1004  			xArgM80dec,
  1005  			xArgM80fp,
  1006  			xArgM94108byte,
  1007  			xArgMem:
  1008  			if !haveMem {
  1009  				inst.Op = 0
  1010  				break Decode
  1011  			}
  1012  			inst.Args[narg] = mem
  1013  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1014  			narg++
  1015  
  1016  		case xArgPtr16colon16:
  1017  			inst.Args[narg] = Imm(immc >> 16)
  1018  			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
  1019  			narg += 2
  1020  
  1021  		case xArgPtr16colon32:
  1022  			inst.Args[narg] = Imm(immc >> 32)
  1023  			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
  1024  			narg += 2
  1025  
  1026  		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
  1027  			// TODO(rsc): Can address be 64 bits?
  1028  			mem = Mem{Disp: int64(immc)}
  1029  			if segIndex >= 0 {
  1030  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  1031  				inst.Prefix[segIndex] |= PrefixImplicit
  1032  			}
  1033  			inst.Args[narg] = mem
  1034  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1035  			narg++
  1036  
  1037  		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
  1038  			base := baseReg[x]
  1039  			index := Reg(regop)
  1040  			if rex != 0 && base == AL && index >= 4 {
  1041  				rexUsed |= PrefixREX
  1042  				index -= 4
  1043  				base = SPB
  1044  			}
  1045  			inst.Args[narg] = base + index
  1046  			narg++
  1047  
  1048  		case xArgMm, xArgMm1, xArgTR0dashTR7:
  1049  			inst.Args[narg] = baseReg[x] + Reg(regop&7)
  1050  			narg++
  1051  
  1052  		case xArgCR0dashCR7:
  1053  			// AMD documents an extension that the LOCK prefix
  1054  			// can be used in place of a REX prefix in order to access
  1055  			// CR8 from 32-bit mode. The LOCK prefix is allowed in
  1056  			// all modes, provided the corresponding CPUID bit is set.
  1057  			if lockIndex >= 0 {
  1058  				inst.Prefix[lockIndex] |= PrefixImplicit
  1059  				regop += 8
  1060  			}
  1061  			inst.Args[narg] = CR0 + Reg(regop)
  1062  			narg++
  1063  
  1064  		case xArgSreg:
  1065  			regop &= 7
  1066  			if regop >= 6 {
  1067  				inst.Op = 0
  1068  				break Decode
  1069  			}
  1070  			inst.Args[narg] = ES + Reg(regop)
  1071  			narg++
  1072  
  1073  		case xArgRmf16, xArgRmf32, xArgRmf64:
  1074  			base := baseReg[x]
  1075  			index := Reg(modrm & 07)
  1076  			if rex&PrefixREXB != 0 {
  1077  				rexUsed |= PrefixREXB
  1078  				index += 8
  1079  			}
  1080  			inst.Args[narg] = base + index
  1081  			narg++
  1082  
  1083  		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
  1084  			n := inst.Opcode >> uint(opshift+8) & 07
  1085  			base := baseReg[x]
  1086  			index := Reg(n)
  1087  			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
  1088  				rexUsed |= PrefixREXB
  1089  				index += 8
  1090  			}
  1091  			if rex != 0 && base == AL && index >= 4 {
  1092  				rexUsed |= PrefixREX
  1093  				index -= 4
  1094  				base = SPB
  1095  			}
  1096  			inst.Args[narg] = base + index
  1097  			narg++
  1098  
  1099  		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
  1100  			xArgMmM32, xArgMmM64, xArgMm2M64,
  1101  			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128:
  1102  			if haveMem {
  1103  				inst.Args[narg] = mem
  1104  				inst.MemBytes = int(memBytes[decodeOp(x)])
  1105  			} else {
  1106  				base := baseReg[x]
  1107  				index := Reg(rm)
  1108  				switch decodeOp(x) {
  1109  				case xArgMmM32, xArgMmM64, xArgMm2M64:
  1110  					// There are only 8 MMX registers, so these ignore the REX.X bit.
  1111  					index &= 7
  1112  				case xArgRM8:
  1113  					if rex != 0 && index >= 4 {
  1114  						rexUsed |= PrefixREX
  1115  						index -= 4
  1116  						base = SPB
  1117  					}
  1118  				}
  1119  				inst.Args[narg] = base + index
  1120  			}
  1121  			narg++
  1122  
  1123  		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1124  			if haveMem {
  1125  				inst.Op = 0
  1126  				break Decode
  1127  			}
  1128  			inst.Args[narg] = baseReg[x] + Reg(rm&7)
  1129  			narg++
  1130  
  1131  		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1132  			if haveMem {
  1133  				inst.Op = 0
  1134  				break Decode
  1135  			}
  1136  			inst.Args[narg] = baseReg[x] + Reg(rm)
  1137  			narg++
  1138  
  1139  		case xArgRel8:
  1140  			inst.Args[narg] = Rel(int8(immc))
  1141  			narg++
  1142  
  1143  		case xArgRel16:
  1144  			inst.Args[narg] = Rel(int16(immc))
  1145  			narg++
  1146  
  1147  		case xArgRel32:
  1148  			inst.Args[narg] = Rel(int32(immc))
  1149  			narg++
  1150  		}
  1151  	}
  1152  
  1153  	if inst.Op == 0 {
  1154  		// Invalid instruction.
  1155  		if nprefix > 0 {
  1156  			return instPrefix(src[0], mode) // invalid instruction
  1157  		}
  1158  		return Inst{Len: pos}, ErrUnrecognized
  1159  	}
  1160  
  1161  	// Matched! Hooray!
  1162  
  1163  	// 90 decodes as XCHG EAX, EAX but is NOP.
  1164  	// 66 90 decodes as XCHG AX, AX and is NOP too.
  1165  	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
  1166  	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
  1167  	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
  1168  	// It's all too special to handle in the decoding tables, at least for now.
  1169  	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
  1170  		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
  1171  			inst.Op = NOP
  1172  			if dataSizeIndex >= 0 {
  1173  				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
  1174  			}
  1175  			inst.Args[0] = nil
  1176  			inst.Args[1] = nil
  1177  		}
  1178  		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
  1179  			inst.Prefix[repIndex] |= PrefixImplicit
  1180  			inst.Op = PAUSE
  1181  			inst.Args[0] = nil
  1182  			inst.Args[1] = nil
  1183  		} else if gnuCompat {
  1184  			for i := nprefix - 1; i >= 0; i-- {
  1185  				if inst.Prefix[i]&0xFF == 0xF3 {
  1186  					inst.Prefix[i] |= PrefixImplicit
  1187  					inst.Op = PAUSE
  1188  					inst.Args[0] = nil
  1189  					inst.Args[1] = nil
  1190  					break
  1191  				}
  1192  			}
  1193  		}
  1194  	}
  1195  
  1196  	// defaultSeg returns the default segment for an implicit
  1197  	// memory reference: the final override if present, or else DS.
  1198  	defaultSeg := func() Reg {
  1199  		if segIndex >= 0 {
  1200  			inst.Prefix[segIndex] |= PrefixImplicit
  1201  			return prefixToSegment(inst.Prefix[segIndex])
  1202  		}
  1203  		return DS
  1204  	}
  1205  
  1206  	// Add implicit arguments not present in the tables.
  1207  	// Normally we shy away from making implicit arguments explicit,
  1208  	// following the Intel manuals, but adding the arguments seems
  1209  	// the best way to express the effect of the segment override prefixes.
  1210  	// TODO(rsc): Perhaps add these to the tables and
  1211  	// create bytecode instructions for them.
  1212  	usedAddrSize := false
  1213  	switch inst.Op {
  1214  	case INSB, INSW, INSD:
  1215  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1216  		inst.Args[1] = DX
  1217  		usedAddrSize = true
  1218  
  1219  	case OUTSB, OUTSW, OUTSD:
  1220  		inst.Args[0] = DX
  1221  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1222  		usedAddrSize = true
  1223  
  1224  	case MOVSB, MOVSW, MOVSD, MOVSQ:
  1225  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1226  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1227  		usedAddrSize = true
  1228  
  1229  	case CMPSB, CMPSW, CMPSD, CMPSQ:
  1230  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1231  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1232  		usedAddrSize = true
  1233  
  1234  	case LODSB, LODSW, LODSD, LODSQ:
  1235  		switch inst.Op {
  1236  		case LODSB:
  1237  			inst.Args[0] = AL
  1238  		case LODSW:
  1239  			inst.Args[0] = AX
  1240  		case LODSD:
  1241  			inst.Args[0] = EAX
  1242  		case LODSQ:
  1243  			inst.Args[0] = RAX
  1244  		}
  1245  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1246  		usedAddrSize = true
  1247  
  1248  	case STOSB, STOSW, STOSD, STOSQ:
  1249  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1250  		switch inst.Op {
  1251  		case STOSB:
  1252  			inst.Args[1] = AL
  1253  		case STOSW:
  1254  			inst.Args[1] = AX
  1255  		case STOSD:
  1256  			inst.Args[1] = EAX
  1257  		case STOSQ:
  1258  			inst.Args[1] = RAX
  1259  		}
  1260  		usedAddrSize = true
  1261  
  1262  	case SCASB, SCASW, SCASD, SCASQ:
  1263  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1264  		switch inst.Op {
  1265  		case SCASB:
  1266  			inst.Args[0] = AL
  1267  		case SCASW:
  1268  			inst.Args[0] = AX
  1269  		case SCASD:
  1270  			inst.Args[0] = EAX
  1271  		case SCASQ:
  1272  			inst.Args[0] = RAX
  1273  		}
  1274  		usedAddrSize = true
  1275  
  1276  	case XLATB:
  1277  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
  1278  		usedAddrSize = true
  1279  	}
  1280  
  1281  	// If we used the address size annotation to construct the
  1282  	// argument list, mark that prefix as implicit: it doesn't need
  1283  	// to be shown when printing the instruction.
  1284  	if haveMem || usedAddrSize {
  1285  		if addrSizeIndex >= 0 {
  1286  			inst.Prefix[addrSizeIndex] |= PrefixImplicit
  1287  		}
  1288  	}
  1289  
  1290  	// Similarly, if there's some memory operand, the segment
  1291  	// will be shown there and doesn't need to be shown as an
  1292  	// explicit prefix.
  1293  	if haveMem {
  1294  		if segIndex >= 0 {
  1295  			inst.Prefix[segIndex] |= PrefixImplicit
  1296  		}
  1297  	}
  1298  
  1299  	// Branch predict prefixes are overloaded segment prefixes,
  1300  	// since segment prefixes don't make sense on conditional jumps.
  1301  	// Rewrite final instance to prediction prefix.
  1302  	// The set of instructions to which the prefixes apply (other then the
  1303  	// Jcc conditional jumps) is not 100% clear from the manuals, but
  1304  	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
  1305  	// so we'll follow along.
  1306  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1307  	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
  1308  	PredictLoop:
  1309  		for i := nprefix - 1; i >= 0; i-- {
  1310  			p := inst.Prefix[i]
  1311  			switch p & 0xFF {
  1312  			case PrefixCS:
  1313  				inst.Prefix[i] = PrefixPN
  1314  				break PredictLoop
  1315  			case PrefixDS:
  1316  				inst.Prefix[i] = PrefixPT
  1317  				break PredictLoop
  1318  			}
  1319  		}
  1320  	}
  1321  
  1322  	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
  1323  	// A REPN applied to certain control transfers is a BND prefix to bound
  1324  	// the range of possible destinations. There's surprisingly little documentation
  1325  	// about this, so we just do what libopcodes and xed agree on.
  1326  	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
  1327  	// does not turn into a BND.
  1328  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1329  	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
  1330  		for i := nprefix - 1; i >= 0; i-- {
  1331  			p := inst.Prefix[i]
  1332  			if p&^PrefixIgnored == PrefixREPN {
  1333  				inst.Prefix[i] = PrefixBND
  1334  				break
  1335  			}
  1336  		}
  1337  	}
  1338  
  1339  	// The LOCK prefix only applies to certain instructions, and then only
  1340  	// to instances of the instruction with a memory destination.
  1341  	// Other uses of LOCK are invalid and cause a processor exception,
  1342  	// in contrast to the "just ignore it" spirit applied to all other prefixes.
  1343  	// Mark invalid lock prefixes.
  1344  	hasLock := false
  1345  	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
  1346  		switch inst.Op {
  1347  		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1348  		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
  1349  			if isMem(inst.Args[0]) {
  1350  				hasLock = true
  1351  				break
  1352  			}
  1353  			fallthrough
  1354  		default:
  1355  			inst.Prefix[lockIndex] |= PrefixInvalid
  1356  		}
  1357  	}
  1358  
  1359  	// In certain cases, all of which require a memory destination,
  1360  	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
  1361  	// from the Intel Transactional Synchroniation Extensions (TSX).
  1362  	//
  1363  	// The specific rules are:
  1364  	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
  1365  	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
  1366  	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
  1367  	if isMem(inst.Args[0]) {
  1368  		if inst.Op == XCHG {
  1369  			hasLock = true
  1370  		}
  1371  
  1372  		for i := len(inst.Prefix) - 1; i >= 0; i-- {
  1373  			p := inst.Prefix[i] &^ PrefixIgnored
  1374  			switch p {
  1375  			case PrefixREPN:
  1376  				if hasLock {
  1377  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
  1378  				}
  1379  
  1380  			case PrefixREP:
  1381  				if hasLock {
  1382  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1383  				}
  1384  
  1385  				if inst.Op == MOV {
  1386  					op := (inst.Opcode >> 24) &^ 1
  1387  					if op == 0x88 || op == 0xC6 {
  1388  						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1389  					}
  1390  				}
  1391  			}
  1392  		}
  1393  	}
  1394  
  1395  	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
  1396  	if repIndex >= 0 {
  1397  		switch inst.Prefix[repIndex] {
  1398  		case PrefixREP, PrefixREPN:
  1399  			switch inst.Op {
  1400  			// According to the manuals, the REP/REPE prefix applies to all of these,
  1401  			// while the REPN applies only to some of them. However, both libopcodes
  1402  			// and xed show both prefixes explicitly for all instructions, so we do the same.
  1403  			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1404  			case INSB, INSW, INSD,
  1405  				MOVSB, MOVSW, MOVSD, MOVSQ,
  1406  				OUTSB, OUTSW, OUTSD,
  1407  				LODSB, LODSW, LODSD, LODSQ,
  1408  				CMPSB, CMPSW, CMPSD, CMPSQ,
  1409  				SCASB, SCASW, SCASD, SCASQ,
  1410  				STOSB, STOSW, STOSD, STOSQ:
  1411  				// ok
  1412  			default:
  1413  				inst.Prefix[repIndex] |= PrefixIgnored
  1414  			}
  1415  		}
  1416  	}
  1417  
  1418  	// If REX was present, mark implicit if all the 1 bits were consumed.
  1419  	if rexIndex >= 0 {
  1420  		if rexUsed != 0 {
  1421  			rexUsed |= PrefixREX
  1422  		}
  1423  		if rex&^rexUsed == 0 {
  1424  			inst.Prefix[rexIndex] |= PrefixImplicit
  1425  		}
  1426  	}
  1427  
  1428  	inst.DataSize = dataMode
  1429  	inst.AddrSize = addrMode
  1430  	inst.Mode = mode
  1431  	inst.Len = pos
  1432  	return inst, nil
  1433  }
  1434  
  1435  var errInternal = errors.New("internal error")
  1436  
  1437  // addr16 records the eight 16-bit addressing modes.
  1438  var addr16 = [8]Mem{
  1439  	{Base: BX, Scale: 1, Index: SI},
  1440  	{Base: BX, Scale: 1, Index: DI},
  1441  	{Base: BP, Scale: 1, Index: SI},
  1442  	{Base: BP, Scale: 1, Index: DI},
  1443  	{Base: SI},
  1444  	{Base: DI},
  1445  	{Base: BP},
  1446  	{Base: BX},
  1447  }
  1448  
  1449  // baseReg returns the base register for a given register size in bits.
  1450  func baseRegForBits(bits int) Reg {
  1451  	switch bits {
  1452  	case 8:
  1453  		return AL
  1454  	case 16:
  1455  		return AX
  1456  	case 32:
  1457  		return EAX
  1458  	case 64:
  1459  		return RAX
  1460  	}
  1461  	return 0
  1462  }
  1463  
  1464  // baseReg records the base register for argument types that specify
  1465  // a range of registers indexed by op, regop, or rm.
  1466  var baseReg = [...]Reg{
  1467  	xArgDR0dashDR7: DR0,
  1468  	xArgMm1:        M0,
  1469  	xArgMm2:        M0,
  1470  	xArgMm2M64:     M0,
  1471  	xArgMm:         M0,
  1472  	xArgMmM32:      M0,
  1473  	xArgMmM64:      M0,
  1474  	xArgR16:        AX,
  1475  	xArgR16op:      AX,
  1476  	xArgR32:        EAX,
  1477  	xArgR32M16:     EAX,
  1478  	xArgR32M8:      EAX,
  1479  	xArgR32op:      EAX,
  1480  	xArgR64:        RAX,
  1481  	xArgR64M16:     RAX,
  1482  	xArgR64op:      RAX,
  1483  	xArgR8:         AL,
  1484  	xArgR8op:       AL,
  1485  	xArgRM16:       AX,
  1486  	xArgRM32:       EAX,
  1487  	xArgRM64:       RAX,
  1488  	xArgRM8:        AL,
  1489  	xArgRmf16:      AX,
  1490  	xArgRmf32:      EAX,
  1491  	xArgRmf64:      RAX,
  1492  	xArgSTi:        F0,
  1493  	xArgTR0dashTR7: TR0,
  1494  	xArgXmm1:       X0,
  1495  	xArgXmm2:       X0,
  1496  	xArgXmm2M128:   X0,
  1497  	xArgXmm2M16:    X0,
  1498  	xArgXmm2M32:    X0,
  1499  	xArgXmm2M64:    X0,
  1500  	xArgXmm:        X0,
  1501  	xArgXmmM128:    X0,
  1502  	xArgXmmM32:     X0,
  1503  	xArgXmmM64:     X0,
  1504  }
  1505  
  1506  // prefixToSegment returns the segment register
  1507  // corresponding to a particular segment prefix.
  1508  func prefixToSegment(p Prefix) Reg {
  1509  	switch p &^ PrefixImplicit {
  1510  	case PrefixCS:
  1511  		return CS
  1512  	case PrefixDS:
  1513  		return DS
  1514  	case PrefixES:
  1515  		return ES
  1516  	case PrefixFS:
  1517  		return FS
  1518  	case PrefixGS:
  1519  		return GS
  1520  	case PrefixSS:
  1521  		return SS
  1522  	}
  1523  	return 0
  1524  }
  1525  
  1526  // fixedArg records the fixed arguments corresponding to the given bytecodes.
  1527  var fixedArg = [...]Arg{
  1528  	xArg1:    Imm(1),
  1529  	xArg3:    Imm(3),
  1530  	xArgAL:   AL,
  1531  	xArgAX:   AX,
  1532  	xArgDX:   DX,
  1533  	xArgEAX:  EAX,
  1534  	xArgEDX:  EDX,
  1535  	xArgRAX:  RAX,
  1536  	xArgRDX:  RDX,
  1537  	xArgCL:   CL,
  1538  	xArgCS:   CS,
  1539  	xArgDS:   DS,
  1540  	xArgES:   ES,
  1541  	xArgFS:   FS,
  1542  	xArgGS:   GS,
  1543  	xArgSS:   SS,
  1544  	xArgST:   F0,
  1545  	xArgXMM0: X0,
  1546  }
  1547  
  1548  // memBytes records the size of the memory pointed at
  1549  // by a memory argument of the given form.
  1550  var memBytes = [...]int8{
  1551  	xArgM128:       128 / 8,
  1552  	xArgM16:        16 / 8,
  1553  	xArgM16and16:   (16 + 16) / 8,
  1554  	xArgM16colon16: (16 + 16) / 8,
  1555  	xArgM16colon32: (16 + 32) / 8,
  1556  	xArgM16int:     16 / 8,
  1557  	xArgM2byte:     2,
  1558  	xArgM32:        32 / 8,
  1559  	xArgM32and32:   (32 + 32) / 8,
  1560  	xArgM32fp:      32 / 8,
  1561  	xArgM32int:     32 / 8,
  1562  	xArgM64:        64 / 8,
  1563  	xArgM64fp:      64 / 8,
  1564  	xArgM64int:     64 / 8,
  1565  	xArgMm2M64:     64 / 8,
  1566  	xArgMmM32:      32 / 8,
  1567  	xArgMmM64:      64 / 8,
  1568  	xArgMoffs16:    16 / 8,
  1569  	xArgMoffs32:    32 / 8,
  1570  	xArgMoffs64:    64 / 8,
  1571  	xArgMoffs8:     8 / 8,
  1572  	xArgR32M16:     16 / 8,
  1573  	xArgR32M8:      8 / 8,
  1574  	xArgR64M16:     16 / 8,
  1575  	xArgRM16:       16 / 8,
  1576  	xArgRM32:       32 / 8,
  1577  	xArgRM64:       64 / 8,
  1578  	xArgRM8:        8 / 8,
  1579  	xArgXmm2M128:   128 / 8,
  1580  	xArgXmm2M16:    16 / 8,
  1581  	xArgXmm2M32:    32 / 8,
  1582  	xArgXmm2M64:    64 / 8,
  1583  	xArgXmm:        128 / 8,
  1584  	xArgXmmM128:    128 / 8,
  1585  	xArgXmmM32:     32 / 8,
  1586  	xArgXmmM64:     64 / 8,
  1587  }
  1588  
  1589  // isCondJmp records the conditional jumps.
  1590  var isCondJmp = [maxOp + 1]bool{
  1591  	JA:  true,
  1592  	JAE: true,
  1593  	JB:  true,
  1594  	JBE: true,
  1595  	JE:  true,
  1596  	JG:  true,
  1597  	JGE: true,
  1598  	JL:  true,
  1599  	JLE: true,
  1600  	JNE: true,
  1601  	JNO: true,
  1602  	JNP: true,
  1603  	JNS: true,
  1604  	JO:  true,
  1605  	JP:  true,
  1606  	JS:  true,
  1607  }
  1608  
  1609  // isLoop records the loop operators.
  1610  var isLoop = [maxOp + 1]bool{
  1611  	LOOP:   true,
  1612  	LOOPE:  true,
  1613  	LOOPNE: true,
  1614  	JECXZ:  true,
  1615  	JRCXZ:  true,
  1616  }