github.com/tencent/goom@v1.0.1/internal/arch/x86asm/decode.go

github.com/tencent/goom@v1.0.1/internal/arch/x86asm/decode.go (about)

     1  // Copyright 2014 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Table-driven decoding of x86 instructions.
     6  
     7  package x86asm
     8  
     9  import (
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"runtime"
    14  )
    15  
    16  // Set trace to true to cause the decoder to print the PC sequence
    17  // of the executed instruction codes. This is typically only useful
    18  // when you are running a test of a single input case.
    19  const trace = false
    20  
    21  // A decodeOp is a single instruction in the decoder bytecode program.
    22  //
    23  // The decodeOps correspond to consuming and conditionally branching
    24  // on input bytes, consuming additional fields, and then interpreting
    25  // consumed data as instruction arguments. The names of the xRead and xArg
    26  // operations are taken from the Intel manual conventions, for example
    27  // Volume 2, Section 3.1.1, page 487 of
    28  // http://www.intel.com/content/dam/www/public/us/en/documents
    29  // /manuals/64-ia-32-architectures-software-developer-manual-325462.pdf
    30  //
    31  // The actual decoding program is generated by ../x86map.
    32  //
    33  // TODO(rsc): We may be able to merge various of the memory operands
    34  // since we don't care about, say, the distinction between m80dec and m80bcd.
    35  // Similarly, mm and mm1 have identical meaning, as do xmm and xmm1.
    36  
    37  type decodeOp uint16
    38  
    39  const (
    40  	xFail  decodeOp = iota // invalid instruction (return)
    41  	xMatch                 // completed match
    42  	xJump                  // jump to pc
    43  
    44  	xCondByte     // switch on instruction byte value
    45  	xCondSlashR   // read and switch on instruction /r value
    46  	xCondPrefix   // switch on presence of instruction prefix
    47  	xCondIs64     // switch on 64-bit processor mode
    48  	xCondDataSize // switch on operand size
    49  	xCondAddrSize // switch on address size
    50  	xCondIsMem    // switch on memory vs register argument
    51  
    52  	xSetOp // set instruction opcode
    53  
    54  	xReadSlashR // read /r
    55  	xReadIb     // read ib
    56  	xReadIw     // read iw
    57  	xReadID     // read id
    58  	xReadIo     // read io
    59  	xReadCb     // read cb
    60  	xReadCw     // read cw
    61  	xReadCd     // read cd
    62  	xReadCp     // read cp
    63  	xReadCm     // read cm
    64  
    65  	xArg1            // arg 1
    66  	xArg3            // arg 3
    67  	xArgAL           // arg AL
    68  	xArgAX           // arg AX
    69  	xArgCL           // arg CL
    70  	xArgCR0dashCR7   // arg CR0-CR7
    71  	xArgCS           // arg CS
    72  	xArgDR0dashDR7   // arg DR0-DR7
    73  	xArgDS           // arg DS
    74  	xArgDX           // arg DX
    75  	xArgEAX          // arg EAX
    76  	xArgEDX          // arg EDX
    77  	xArgES           // arg ES
    78  	xArgFS           // arg FS
    79  	xArgGS           // arg GS
    80  	xArgImm16        // arg imm16
    81  	xArgImm32        // arg imm32
    82  	xArgImm64        // arg imm64
    83  	xArgImm8         // arg imm8
    84  	xArgImm8u        // arg imm8 but record as unsigned
    85  	xArgImm16u       // arg imm8 but record as unsigned
    86  	xArgM            // arg m
    87  	xArgM128         // arg m128
    88  	xArgM256         // arg m256
    89  	xArgM1428byte    // arg m14/28byte
    90  	xArgM16          // arg m16
    91  	xArgM16and16     // arg m16&16
    92  	xArgM16and32     // arg m16&32
    93  	xArgM16and64     // arg m16&64
    94  	xArgM16colon16   // arg m16:16
    95  	xArgM16colon32   // arg m16:32
    96  	xArgM16colon64   // arg m16:64
    97  	xArgM16int       // arg m16int
    98  	xArgM2byte       // arg m2byte
    99  	xArgM32          // arg m32
   100  	xArgM32and32     // arg m32&32
   101  	xArgM32fp        // arg m32fp
   102  	xArgM32int       // arg m32int
   103  	xArgM512byte     // arg m512byte
   104  	xArgM64          // arg m64
   105  	xArgM64fp        // arg m64fp
   106  	xArgM64int       // arg m64int
   107  	xArgM8           // arg m8
   108  	xArgM80bcd       // arg m80bcd
   109  	xArgM80dec       // arg m80dec
   110  	xArgM80fp        // arg m80fp
   111  	xArgM94108byte   // arg m94/108byte
   112  	xArgMm           // arg mm
   113  	xArgMm1          // arg mm1
   114  	xArgMm2          // arg mm2
   115  	xArgMm2M64       // arg mm2/m64
   116  	xArgMmM32        // arg mm/m32
   117  	xArgMmM64        // arg mm/m64
   118  	xArgMem          // arg mem
   119  	xArgMoffs16      // arg moffs16
   120  	xArgMoffs32      // arg moffs32
   121  	xArgMoffs64      // arg moffs64
   122  	xArgMoffs8       // arg moffs8
   123  	xArgPtr16colon16 // arg ptr16:16
   124  	xArgPtr16colon32 // arg ptr16:32
   125  	xArgR16          // arg r16
   126  	xArgR16op        // arg r16 with +rw in opcode
   127  	xArgR32          // arg r32
   128  	xArgR32M16       // arg r32/m16
   129  	xArgR32M8        // arg r32/m8
   130  	xArgR32op        // arg r32 with +rd in opcode
   131  	xArgR64          // arg r64
   132  	xArgR64M16       // arg r64/m16
   133  	xArgR64op        // arg r64 with +rd in opcode
   134  	xArgR8           // arg r8
   135  	xArgR8op         // arg r8 with +rb in opcode
   136  	xArgRAX          // arg RAX
   137  	xArgRDX          // arg RDX
   138  	xArgRM16         // arg r/m16
   139  	xArgRM32         // arg r/m32
   140  	xArgRM64         // arg r/m64
   141  	xArgRM8          // arg r/m8
   142  	xArgRel16        // arg rel16
   143  	xArgRel32        // arg rel32
   144  	xArgRel8         // arg rel8
   145  	xArgSS           // arg SS
   146  	xArgST           // arg ST, aka ST(0)
   147  	xArgSTi          // arg ST(i) with +i in opcode
   148  	xArgSreg         // arg Sreg
   149  	xArgTR0dashTR7   // arg TR0-TR7
   150  	xArgXmm          // arg xmm
   151  	xArgXMM0         // arg <XMM0>
   152  	xArgXmm1         // arg xmm1
   153  	xArgXmm2         // arg xmm2
   154  	xArgXmm2M128     // arg xmm2/m128
   155  	xArgYmm2M256     // arg ymm2/m256
   156  	xArgXmm2M16      // arg xmm2/m16
   157  	xArgXmm2M32      // arg xmm2/m32
   158  	xArgXmm2M64      // arg xmm2/m64
   159  	xArgXmmM128      // arg xmm/m128
   160  	xArgXmmM32       // arg xmm/m32
   161  	xArgXmmM64       // arg xmm/m64
   162  	xArgYmm1         // arg ymm1
   163  	xArgRmf16        // arg r/m16 but force mod=3
   164  	xArgRmf32        // arg r/m32 but force mod=3
   165  	xArgRmf64        // arg r/m64 but force mod=3
   166  )
   167  
   168  // instPrefix returns an Inst describing just one prefix byte.
   169  // It is only used if there is a prefix followed by an unintelligible
   170  // or invalid instruction byte sequence.
   171  func instPrefix(b byte, mode int) (Inst, error) {
   172  	// When tracing it is useful to see what called instPrefix to report an error.
   173  	if trace {
   174  		_, file, line, _ := runtime.Caller(1)
   175  		fmt.Printf("%s:%d\n", file, line)
   176  	}
   177  
   178  	p := Prefix(b)
   179  	switch p {
   180  	case PrefixDataSize:
   181  		if mode == 16 {
   182  			p = PrefixData32
   183  		} else {
   184  			p = PrefixData16
   185  		}
   186  	case PrefixAddrSize:
   187  		if mode == 32 {
   188  			p = PrefixAddr16
   189  		} else {
   190  			p = PrefixAddr32
   191  		}
   192  	}
   193  	// Note: using composite literal with Prefix key confuses 'bundle' tool.
   194  	inst := Inst{Len: 1}
   195  	inst.Prefix = Prefixes{p}
   196  
   197  	return inst, nil
   198  }
   199  
   200  // truncated reports a truncated instruction.
   201  // For now we use instPrefix but perhaps later we will return
   202  // a specific error here.
   203  func truncated(src []byte, mode int) (Inst, error) {
   204  	if len(src) == 0 {
   205  		return Inst{}, ErrTruncated
   206  	}
   207  
   208  	return instPrefix(src[0], mode) // too long
   209  }
   210  
   211  // These are the errors returned by Decode.
   212  var (
   213  	ErrInvalidMode  = errors.New("invalid x86 mode in Decode")
   214  	ErrTruncated    = errors.New("truncated instruction")
   215  	ErrUnrecognized = errors.New("unrecognized instruction")
   216  )
   217  
   218  // decoderCover records coverage information for which parts
   219  // of the byte code have been executed.
   220  var decoderCover []bool
   221  
   222  // Decode decodes the leading bytes in src as a single instruction.
   223  // The mode arguments specifies the assumed processor mode:
   224  // 16, 32, or 64 for 16-, 32-, and 64-bit execution modes.
   225  func Decode(src []byte, mode int) (inst Inst, err error) {
   226  	return decode1(src, mode, false)
   227  }
   228  
   229  // decode1 is the implementation of Decode but takes an extra
   230  // gnuCompat flag to cause it to change its behavior to mimic
   231  // bugs (or at least unique features) of GNU libopcodes as used
   232  // by objdump. We don't believe that logic is the right thing to do
   233  // in general, but when testing against libopcodes it simplifies the
   234  // comparison if we adjust a few small pieces of logic.
   235  // The affected logic is in the conditional branch for "mandatory" prefixes,
   236  // case xCondPrefix.
   237  // nolint
   238  func decode1(src []byte, mode int, gnuCompat bool) (Inst, error) {
   239  	switch mode {
   240  	case 16, 32, 64:
   241  		// ok
   242  		// TODO(rsc): 64-bit mode not tested, probably not working.
   243  	default:
   244  		return Inst{}, ErrInvalidMode
   245  	}
   246  
   247  	// Maximum instruction size is 15 bytes.
   248  	// If we need to read more, return 'truncated instruction.
   249  	if len(src) > 15 {
   250  		src = src[:15]
   251  	}
   252  
   253  	var (
   254  		// prefix decoding information
   255  		pos           = 0    // position reading src
   256  		nprefix       = 0    // number of prefixes
   257  		lockIndex     = -1   // index of LOCK prefix in src and inst.Prefix
   258  		repIndex      = -1   // index of REP/REPN prefix in src and inst.Prefix
   259  		segIndex      = -1   // index of Group 2 prefix in src and inst.Prefix
   260  		dataSizeIndex = -1   // index of Group 3 prefix in src and inst.Prefix
   261  		addrSizeIndex = -1   // index of Group 4 prefix in src and inst.Prefix
   262  		rex           Prefix // rex byte if present (or 0)
   263  		rexUsed       Prefix // bits used in rex byte
   264  		rexIndex      = -1   // index of rex byte
   265  		vex           Prefix // use vex encoding
   266  		vexIndex      = -1   // index of vex prefix
   267  
   268  		addrMode = mode // address mode (width in bits)
   269  		dataMode = mode // operand mode (width in bits)
   270  
   271  		// decoded ModR/M fields
   272  		haveModrm bool
   273  		modrm     int
   274  		mod       int
   275  		regop     int
   276  		rm        int
   277  
   278  		// if ModR/M is memory reference, Mem form
   279  		mem     Mem
   280  		haveMem bool
   281  
   282  		// decoded SIB fields
   283  		haveSIB bool
   284  		sib     int
   285  		scale   int
   286  		index   int
   287  		base    int
   288  		displen int
   289  		dispoff int
   290  
   291  		// decoded immediate values
   292  		imm     int64
   293  		imm8    int8
   294  		immc    int64
   295  		immcpos int
   296  
   297  		// output
   298  		opshift int
   299  		inst    Inst
   300  		narg    int // number of arguments written to inst
   301  	)
   302  
   303  	if mode == 64 {
   304  		dataMode = 32
   305  	}
   306  
   307  	// Read non-REX prefixes.
   308  ReadPrefixes:
   309  	for ; pos < len(src); pos++ {
   310  		p := Prefix(src[pos])
   311  		switch p {
   312  		default:
   313  			nprefix = pos
   314  			break ReadPrefixes
   315  
   316  		// Group 1 - lock and repeat prefixes
   317  		// According to Intel, there should only be one from this set,
   318  		// but according to AMD both can be present.
   319  		case 0xF0:
   320  			if lockIndex >= 0 {
   321  				inst.Prefix[lockIndex] |= PrefixIgnored
   322  			}
   323  			lockIndex = pos
   324  		case 0xF2, 0xF3:
   325  			if repIndex >= 0 {
   326  				inst.Prefix[repIndex] |= PrefixIgnored
   327  			}
   328  			repIndex = pos
   329  
   330  		// Group 2 - segment override / branch hints
   331  		case 0x26, 0x2E, 0x36, 0x3E:
   332  			if mode == 64 {
   333  				p |= PrefixIgnored
   334  				break
   335  			}
   336  			fallthrough
   337  		case 0x64, 0x65:
   338  			if segIndex >= 0 {
   339  				inst.Prefix[segIndex] |= PrefixIgnored
   340  			}
   341  			segIndex = pos
   342  
   343  		// Group 3 - operand size override
   344  		case 0x66:
   345  			if mode == 16 {
   346  				dataMode = 32
   347  				p = PrefixData32
   348  			} else {
   349  				dataMode = 16
   350  				p = PrefixData16
   351  			}
   352  			if dataSizeIndex >= 0 {
   353  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   354  			}
   355  			dataSizeIndex = pos
   356  
   357  		// Group 4 - address size override
   358  		case 0x67:
   359  			if mode == 32 {
   360  				addrMode = 16
   361  				p = PrefixAddr16
   362  			} else {
   363  				addrMode = 32
   364  				p = PrefixAddr32
   365  			}
   366  			if addrSizeIndex >= 0 {
   367  				inst.Prefix[addrSizeIndex] |= PrefixIgnored
   368  			}
   369  			addrSizeIndex = pos
   370  
   371  		//Group 5 - Vex encoding
   372  		case 0xC5:
   373  			if pos == 0 && pos+1 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   374  				vex = p
   375  				vexIndex = pos
   376  				inst.Prefix[pos] = p
   377  				inst.Prefix[pos+1] = Prefix(src[pos+1])
   378  				pos++
   379  				continue
   380  			} else {
   381  				nprefix = pos
   382  				break ReadPrefixes
   383  			}
   384  		case 0xC4:
   385  			if pos == 0 && pos+2 < len(src) && (mode == 64 || (mode == 32 && src[pos+1]&0xc0 == 0xc0)) {
   386  				vex = p
   387  				vexIndex = pos
   388  				inst.Prefix[pos] = p
   389  				inst.Prefix[pos+1] = Prefix(src[pos+1])
   390  				inst.Prefix[pos+2] = Prefix(src[pos+2])
   391  				pos += 2
   392  				continue
   393  			} else {
   394  				nprefix = pos
   395  				break ReadPrefixes
   396  			}
   397  		}
   398  
   399  		if pos >= len(inst.Prefix) {
   400  			return instPrefix(src[0], mode) // too long
   401  		}
   402  
   403  		inst.Prefix[pos] = p
   404  	}
   405  
   406  	// Read REX prefix.
   407  	if pos < len(src) && mode == 64 && Prefix(src[pos]).IsREX() && vex == 0 {
   408  		rex = Prefix(src[pos])
   409  
   410  		rexIndex = pos
   411  
   412  		if pos >= len(inst.Prefix) {
   413  			return instPrefix(src[0], mode) // too long
   414  		}
   415  
   416  		inst.Prefix[pos] = rex
   417  
   418  		pos++
   419  
   420  		if rex&PrefixREXW != 0 {
   421  			dataMode = 64
   422  
   423  			if dataSizeIndex >= 0 {
   424  				inst.Prefix[dataSizeIndex] |= PrefixIgnored
   425  			}
   426  		}
   427  	}
   428  
   429  	// Decode instruction stream, interpreting decoding instructions.
   430  	// opshift gives the shift to use when saving the next
   431  	// opcode byte into inst.Opcode.
   432  	opshift = 24
   433  
   434  	// Decode loop, executing decoder program.
   435  	var oldPC, prevPC int
   436  Decode:
   437  	for pc := 1; ; { // TODO uint
   438  		oldPC = prevPC
   439  		prevPC = pc
   440  		if trace {
   441  			println("run", pc)
   442  		}
   443  		x := decoder[pc]
   444  		if decoderCover != nil {
   445  			decoderCover[pc] = true
   446  		}
   447  		pc++
   448  
   449  		// Read and decode ModR/M if needed by opcode.
   450  		switch decodeOp(x) {
   451  		case xCondSlashR, xReadSlashR:
   452  			if haveModrm {
   453  				return Inst{Len: pos}, errInternal
   454  			}
   455  			haveModrm = true
   456  			if pos >= len(src) {
   457  				return truncated(src, mode)
   458  			}
   459  			modrm = int(src[pos])
   460  			pos++
   461  			if opshift >= 0 {
   462  				inst.Opcode |= uint32(modrm) << uint(opshift)
   463  				opshift -= 8
   464  			}
   465  			mod = modrm >> 6
   466  			regop = (modrm >> 3) & 07
   467  			rm = modrm & 07
   468  			if rex&PrefixREXR != 0 {
   469  				rexUsed |= PrefixREXR
   470  				regop |= 8
   471  			}
   472  			if addrMode == 16 {
   473  				// 16-bit modrm form
   474  				if mod != 3 {
   475  					haveMem = true
   476  					mem = addr16[rm]
   477  					if rm == 6 && mod == 0 {
   478  						mem.Base = 0
   479  					}
   480  
   481  					// Consume disp16 if present.
   482  					if mod == 0 && rm == 6 || mod == 2 {
   483  						if pos+2 > len(src) {
   484  							return truncated(src, mode)
   485  						}
   486  						mem.Disp = int64(binary.LittleEndian.Uint16(src[pos:]))
   487  						pos += 2
   488  					}
   489  
   490  					// Consume disp8 if present.
   491  					if mod == 1 {
   492  						if pos >= len(src) {
   493  							return truncated(src, mode)
   494  						}
   495  						mem.Disp = int64(int8(src[pos]))
   496  						pos++
   497  					}
   498  				}
   499  			} else {
   500  				haveMem = mod != 3
   501  
   502  				// 32-bit or 64-bit form
   503  				// Consume SIB encoding if present.
   504  				if rm == 4 && mod != 3 {
   505  					haveSIB = true
   506  					if pos >= len(src) {
   507  						return truncated(src, mode)
   508  					}
   509  					sib = int(src[pos])
   510  					pos++
   511  					if opshift >= 0 {
   512  						inst.Opcode |= uint32(sib) << uint(opshift)
   513  						opshift -= 8
   514  					}
   515  					scale = sib >> 6
   516  					index = (sib >> 3) & 07
   517  					base = sib & 07
   518  					if rex&PrefixREXB != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x20 == 0 {
   519  						rexUsed |= PrefixREXB
   520  						base |= 8
   521  					}
   522  					if rex&PrefixREXX != 0 || vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0 {
   523  						rexUsed |= PrefixREXX
   524  						index |= 8
   525  					}
   526  
   527  					mem.Scale = 1 << uint(scale)
   528  					if index == 4 {
   529  						// no mem.Index
   530  					} else {
   531  						mem.Index = baseRegForBits(addrMode) + Reg(index)
   532  					}
   533  					if base&7 == 5 && mod == 0 {
   534  						// no mem.Base
   535  					} else {
   536  						mem.Base = baseRegForBits(addrMode) + Reg(base)
   537  					}
   538  				} else {
   539  					if rex&PrefixREXB != 0 {
   540  						rexUsed |= PrefixREXB
   541  						rm |= 8
   542  					}
   543  					if mod == 0 && rm&7 == 5 || rm&7 == 4 {
   544  						// base omitted
   545  					} else if mod != 3 {
   546  						mem.Base = baseRegForBits(addrMode) + Reg(rm)
   547  					}
   548  				}
   549  
   550  				// Consume disp32 if present.
   551  				if mod == 0 && (rm&7 == 5 || haveSIB && base&7 == 5) || mod == 2 {
   552  					if pos+4 > len(src) {
   553  						return truncated(src, mode)
   554  					}
   555  					dispoff = pos
   556  					displen = 4
   557  					mem.Disp = int64(binary.LittleEndian.Uint32(src[pos:]))
   558  					pos += 4
   559  				}
   560  
   561  				// Consume disp8 if present.
   562  				if mod == 1 {
   563  					if pos >= len(src) {
   564  						return truncated(src, mode)
   565  					}
   566  					dispoff = pos
   567  					displen = 1
   568  					mem.Disp = int64(int8(src[pos]))
   569  					pos++
   570  				}
   571  
   572  				// In 64-bit, mod=0 rm=5 is PC-relative instead of just disp.
   573  				// See Vol 2A. Table 2-7.
   574  				if mode == 64 && mod == 0 && rm&7 == 5 {
   575  					if addrMode == 32 {
   576  						mem.Base = EIP
   577  					} else {
   578  						mem.Base = RIP
   579  					}
   580  				}
   581  			}
   582  
   583  			if segIndex >= 0 {
   584  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
   585  			}
   586  		}
   587  
   588  		// Execute single opcode.
   589  		switch decodeOp(x) {
   590  		default:
   591  			println("bad op", x, "at", pc-1, "from", oldPC)
   592  			return Inst{Len: pos}, errInternal
   593  
   594  		case xFail:
   595  			inst.Op = 0
   596  			break Decode
   597  
   598  		case xMatch:
   599  			break Decode
   600  
   601  		case xJump:
   602  			pc = int(decoder[pc])
   603  
   604  		// Conditional branches.
   605  
   606  		case xCondByte:
   607  			if pos >= len(src) {
   608  				return truncated(src, mode)
   609  			}
   610  			b := src[pos]
   611  			n := int(decoder[pc])
   612  			pc++
   613  			for i := 0; i < n; i++ {
   614  				xb, xpc := decoder[pc], int(decoder[pc+1])
   615  				pc += 2
   616  				if b == byte(xb) {
   617  					pc = xpc
   618  					pos++
   619  					if opshift >= 0 {
   620  						inst.Opcode |= uint32(b) << uint(opshift)
   621  						opshift -= 8
   622  					}
   623  					continue Decode
   624  				}
   625  			}
   626  			// xCondByte is the only conditional with a fall through,
   627  			// so that it can be used to pick off special cases before
   628  			// an xCondSlash. If the fallthrough instruction is xFail,
   629  			// advance the position so that the decoded instruction
   630  			// size includes the byte we just compared against.
   631  			if decodeOp(decoder[pc]) == xJump {
   632  				pc = int(decoder[pc+1])
   633  			}
   634  			if decodeOp(decoder[pc]) == xFail {
   635  				pos++
   636  			}
   637  
   638  		case xCondIs64:
   639  			if mode == 64 {
   640  				pc = int(decoder[pc+1])
   641  			} else {
   642  				pc = int(decoder[pc])
   643  			}
   644  
   645  		case xCondIsMem:
   646  			mem := haveMem
   647  			if !haveModrm {
   648  				if pos >= len(src) {
   649  					return instPrefix(src[0], mode) // too long
   650  				}
   651  				mem = src[pos]>>6 != 3
   652  			}
   653  			if mem {
   654  				pc = int(decoder[pc+1])
   655  			} else {
   656  				pc = int(decoder[pc])
   657  			}
   658  
   659  		case xCondDataSize:
   660  			switch dataMode {
   661  			case 16:
   662  				if dataSizeIndex >= 0 {
   663  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   664  				}
   665  				pc = int(decoder[pc])
   666  			case 32:
   667  				if dataSizeIndex >= 0 {
   668  					inst.Prefix[dataSizeIndex] |= PrefixImplicit
   669  				}
   670  				pc = int(decoder[pc+1])
   671  			case 64:
   672  				rexUsed |= PrefixREXW
   673  				pc = int(decoder[pc+2])
   674  			}
   675  
   676  		case xCondAddrSize:
   677  			switch addrMode {
   678  			case 16:
   679  				if addrSizeIndex >= 0 {
   680  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   681  				}
   682  				pc = int(decoder[pc])
   683  			case 32:
   684  				if addrSizeIndex >= 0 {
   685  					inst.Prefix[addrSizeIndex] |= PrefixImplicit
   686  				}
   687  				pc = int(decoder[pc+1])
   688  			case 64:
   689  				pc = int(decoder[pc+2])
   690  			}
   691  
   692  		case xCondPrefix:
   693  			// Conditional branch based on presence or absence of prefixes.
   694  			// The conflict cases here are completely undocumented and
   695  			// differ significantly between GNU libopcodes and Intel xed.
   696  			// I have not written assembly code to divine what various CPUs
   697  			// do, but it wouldn't surprise me if they are not consistent either.
   698  			//
   699  			// The basic idea is to switch on the presence of a prefix, so that
   700  			// for example:
   701  			//
   702  			//	xCondPrefix, 4
   703  			//	0xF3, 123,
   704  			//	0xF2, 234,
   705  			//	0x66, 345,
   706  			//	0, 456
   707  			//
   708  			// branch to 123 if the F3 prefix is present, 234 if the F2 prefix
   709  			// is present, 66 if the 345 prefix is present, and 456 otherwise.
   710  			// The prefixes are given in descending order so that the 0 will be last.
   711  			//
   712  			// It is unclear what should happen if multiple conditions are
   713  			// satisfied: what if F2 and F3 are both present, or if 66 and F2
   714  			// are present, or if all three are present? The one chosen becomes
   715  			// part of the opcode and the others do not. Perhaps the answer
   716  			// depends on the specific opcodes in question.
   717  			//
   718  			// The only clear example is that CRC32 is F2 0F 38 F1 /r, and
   719  			// it comes in 16-bit and 32-bit forms based on the 66 prefix,
   720  			// so 66 F2 0F 38 F1 /r should be treated as F2 taking priority,
   721  			// with the 66 being only an operand size override, and probably
   722  			// F2 66 0F 38 F1 /r should be treated the same.
   723  			// Perhaps that rule is specific to the case of CRC32, since no
   724  			// 66 0F 38 F1 instruction is defined (today) (that we know of).
   725  			// However, both libopcodes and xed seem to generalize this
   726  			// example and choose F2/F3 in preference to 66, and we
   727  			// do the same.
   728  			//
   729  			// Next, what if both F2 and F3 are present? Which wins?
   730  			// The Intel xed rule, and ours, is that the one that occurs last wins.
   731  			// The GNU libopcodes rule, which we implement only in gnuCompat mode,
   732  			// is that F3 beats F2 unless F3 has no special meaning, in which
   733  			// case F3 can be a modified on an F2 special meaning.
   734  			//
   735  			// Concretely,
   736  			//	66 0F D6 /r is MOVQ
   737  			//	F2 0F D6 /r is MOVDQ2Q
   738  			//	F3 0F D6 /r is MOVQ2DQ.
   739  			//
   740  			//	F2 66 0F D6 /r is 66 + MOVDQ2Q always.
   741  			//	66 F2 0F D6 /r is 66 + MOVDQ2Q always.
   742  			//	F3 66 0F D6 /r is 66 + MOVQ2DQ always.
   743  			//	66 F3 0F D6 /r is 66 + MOVQ2DQ always.
   744  			//	F2 F3 0F D6 /r is F2 + MOVQ2DQ always.
   745  			//	F3 F2 0F D6 /r is F3 + MOVQ2DQ in Intel xed, but F2 + MOVQ2DQ in GNU libopcodes.
   746  			//	Adding 66 anywhere in the prefix section of the
   747  			//	last two cases does not change the outcome.
   748  			//
   749  			// Finally, what if there is a variant in which 66 is a mandatory
   750  			// prefix rather than an operand size override, but we know of
   751  			// no corresponding F2/F3 form, and we see both F2/F3 and 66.
   752  			// Does F2/F3 still take priority, so that the result is an unknown
   753  			// instruction, or does the 66 take priority, so that the extended
   754  			// 66 instruction should be interpreted as having a REP/REPN prefix?
   755  			// Intel xed does the former and GNU libopcodes does the latter.
   756  			// We side with Intel xed, unless we are trying to match libopcodes
   757  			// more closely during the comparison-based test suite.
   758  			//
   759  			// In 64-bit mode REX.W is another valid prefix to test for, but
   760  			// there is less ambiguity about that. When present, REX.W is
   761  			// always the first entry in the table.
   762  			n := int(decoder[pc])
   763  			pc++
   764  			sawF3 := false
   765  			for j := 0; j < n; j++ {
   766  				prefix := Prefix(decoder[pc+2*j])
   767  				if prefix.IsREX() {
   768  					rexUsed |= prefix
   769  					if rex&prefix == prefix {
   770  						pc = int(decoder[pc+2*j+1])
   771  						continue Decode
   772  					}
   773  					continue
   774  				}
   775  				ok := false
   776  				if prefix == 0 {
   777  					ok = true
   778  				} else if prefix.IsREX() {
   779  					rexUsed |= prefix
   780  					if rex&prefix == prefix {
   781  						ok = true
   782  					}
   783  				} else if prefix == 0xC5 || prefix == 0xC4 {
   784  					if vex == prefix {
   785  						ok = true
   786  					}
   787  				} else if vex != 0 && (prefix == 0x0F || prefix == 0x0F38 || prefix == 0x0F3A ||
   788  					prefix == 0x66 || prefix == 0xF2 || prefix == 0xF3) {
   789  					var vexM, vexP Prefix
   790  					if vex == 0xC5 {
   791  						vexM = 1 // 2 byte vex always implies 0F
   792  						vexP = inst.Prefix[vexIndex+1]
   793  					} else {
   794  						vexM = inst.Prefix[vexIndex+1]
   795  						vexP = inst.Prefix[vexIndex+2]
   796  					}
   797  					switch prefix {
   798  					case 0x66:
   799  						ok = vexP&3 == 1
   800  					case 0xF3:
   801  						ok = vexP&3 == 2
   802  					case 0xF2:
   803  						ok = vexP&3 == 3
   804  					case 0x0F:
   805  						ok = vexM&3 == 1
   806  					case 0x0F38:
   807  						ok = vexM&3 == 2
   808  					case 0x0F3A:
   809  						ok = vexM&3 == 3
   810  					}
   811  				} else {
   812  					if prefix == 0xF3 {
   813  						sawF3 = true
   814  					}
   815  					switch prefix {
   816  					case PrefixLOCK:
   817  						if lockIndex >= 0 {
   818  							inst.Prefix[lockIndex] |= PrefixImplicit
   819  							ok = true
   820  						}
   821  					case PrefixREP, PrefixREPN:
   822  						if repIndex >= 0 && inst.Prefix[repIndex]&0xFF == prefix {
   823  							inst.Prefix[repIndex] |= PrefixImplicit
   824  							ok = true
   825  						}
   826  						if gnuCompat && !ok && prefix == 0xF3 && repIndex >= 0 && (j+1 >= n || decoder[pc+2*(j+1)] != 0xF2) {
   827  							// Check to see if earlier prefix F3 is present.
   828  							for i := repIndex - 1; i >= 0; i-- {
   829  								if inst.Prefix[i]&0xFF == prefix {
   830  									inst.Prefix[i] |= PrefixImplicit
   831  									ok = true
   832  								}
   833  							}
   834  						}
   835  						if gnuCompat && !ok && prefix == 0xF2 && repIndex >= 0 && !sawF3 && inst.Prefix[repIndex]&0xFF == 0xF3 {
   836  							// Check to see if earlier prefix F2 is present.
   837  							for i := repIndex - 1; i >= 0; i-- {
   838  								if inst.Prefix[i]&0xFF == prefix {
   839  									inst.Prefix[i] |= PrefixImplicit
   840  									ok = true
   841  								}
   842  							}
   843  						}
   844  					case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
   845  						if segIndex >= 0 && inst.Prefix[segIndex]&0xFF == prefix {
   846  							inst.Prefix[segIndex] |= PrefixImplicit
   847  							ok = true
   848  						}
   849  					case PrefixDataSize:
   850  						// Looking for 66 mandatory prefix.
   851  						// The F2/F3 mandatory prefixes take priority when both are present.
   852  						// If we got this far in the xCondPrefix table and an F2/F3 is present,
   853  						// it means the table didn't have any entry for that prefix. But if 66 has
   854  						// special meaning, perhaps F2/F3 have special meaning that we don't know.
   855  						// Intel xed works this way, treating the F2/F3 as inhibiting the 66.
   856  						// GNU libopcodes allows the 66 to match. We do what Intel xed does
   857  						// except in gnuCompat mode.
   858  						if repIndex >= 0 && !gnuCompat {
   859  							inst.Op = 0
   860  							break Decode
   861  						}
   862  						if dataSizeIndex >= 0 {
   863  							inst.Prefix[dataSizeIndex] |= PrefixImplicit
   864  							ok = true
   865  						}
   866  					case PrefixAddrSize:
   867  						if addrSizeIndex >= 0 {
   868  							inst.Prefix[addrSizeIndex] |= PrefixImplicit
   869  							ok = true
   870  						}
   871  					}
   872  				}
   873  				if ok {
   874  					pc = int(decoder[pc+2*j+1])
   875  					continue Decode
   876  				}
   877  			}
   878  			inst.Op = 0
   879  			break Decode
   880  
   881  		case xCondSlashR:
   882  			pc = int(decoder[pc+regop&7])
   883  
   884  		// Input.
   885  
   886  		case xReadSlashR:
   887  			// done above
   888  
   889  		case xReadIb:
   890  			if pos >= len(src) {
   891  				return truncated(src, mode)
   892  			}
   893  			imm8 = int8(src[pos])
   894  			pos++
   895  
   896  		case xReadIw:
   897  			if pos+2 > len(src) {
   898  				return truncated(src, mode)
   899  			}
   900  			imm = int64(binary.LittleEndian.Uint16(src[pos:]))
   901  			pos += 2
   902  
   903  		case xReadID:
   904  			if pos+4 > len(src) {
   905  				return truncated(src, mode)
   906  			}
   907  			imm = int64(binary.LittleEndian.Uint32(src[pos:]))
   908  			pos += 4
   909  
   910  		case xReadIo:
   911  			if pos+8 > len(src) {
   912  				return truncated(src, mode)
   913  			}
   914  			imm = int64(binary.LittleEndian.Uint64(src[pos:]))
   915  			pos += 8
   916  
   917  		case xReadCb:
   918  			if pos >= len(src) {
   919  				return truncated(src, mode)
   920  			}
   921  			immcpos = pos
   922  			immc = int64(src[pos])
   923  			pos++
   924  
   925  		case xReadCw:
   926  			if pos+2 > len(src) {
   927  				return truncated(src, mode)
   928  			}
   929  			immcpos = pos
   930  			immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   931  			pos += 2
   932  
   933  		case xReadCm:
   934  			immcpos = pos
   935  			if addrMode == 16 {
   936  				if pos+2 > len(src) {
   937  					return truncated(src, mode)
   938  				}
   939  				immc = int64(binary.LittleEndian.Uint16(src[pos:]))
   940  				pos += 2
   941  			} else if addrMode == 32 {
   942  				if pos+4 > len(src) {
   943  					return truncated(src, mode)
   944  				}
   945  				immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   946  				pos += 4
   947  			} else {
   948  				if pos+8 > len(src) {
   949  					return truncated(src, mode)
   950  				}
   951  				immc = int64(binary.LittleEndian.Uint64(src[pos:]))
   952  				pos += 8
   953  			}
   954  		case xReadCd:
   955  			immcpos = pos
   956  			if pos+4 > len(src) {
   957  				return truncated(src, mode)
   958  			}
   959  			immc = int64(binary.LittleEndian.Uint32(src[pos:]))
   960  			pos += 4
   961  
   962  		case xReadCp:
   963  			immcpos = pos
   964  			if pos+6 > len(src) {
   965  				return truncated(src, mode)
   966  			}
   967  			w := binary.LittleEndian.Uint32(src[pos:])
   968  			w2 := binary.LittleEndian.Uint16(src[pos+4:])
   969  			immc = int64(w2)<<32 | int64(w)
   970  			pos += 6
   971  
   972  		// Output.
   973  
   974  		case xSetOp:
   975  			inst.Op = Op(decoder[pc])
   976  			pc++
   977  
   978  		case xArg1,
   979  			xArg3,
   980  			xArgAL,
   981  			xArgAX,
   982  			xArgCL,
   983  			xArgCS,
   984  			xArgDS,
   985  			xArgDX,
   986  			xArgEAX,
   987  			xArgEDX,
   988  			xArgES,
   989  			xArgFS,
   990  			xArgGS,
   991  			xArgRAX,
   992  			xArgRDX,
   993  			xArgSS,
   994  			xArgST,
   995  			xArgXMM0:
   996  			inst.Args[narg] = fixedArg[x]
   997  			narg++
   998  
   999  		case xArgImm8:
  1000  			inst.Args[narg] = Imm(imm8)
  1001  			narg++
  1002  
  1003  		case xArgImm8u:
  1004  			inst.Args[narg] = Imm(uint8(imm8))
  1005  			narg++
  1006  
  1007  		case xArgImm16:
  1008  			inst.Args[narg] = Imm(int16(imm))
  1009  			narg++
  1010  
  1011  		case xArgImm16u:
  1012  			inst.Args[narg] = Imm(uint16(imm))
  1013  			narg++
  1014  
  1015  		case xArgImm32:
  1016  			inst.Args[narg] = Imm(int32(imm))
  1017  			narg++
  1018  
  1019  		case xArgImm64:
  1020  			inst.Args[narg] = Imm(imm)
  1021  			narg++
  1022  
  1023  		case xArgM,
  1024  			xArgM128,
  1025  			xArgM256,
  1026  			xArgM1428byte,
  1027  			xArgM16,
  1028  			xArgM16and16,
  1029  			xArgM16and32,
  1030  			xArgM16and64,
  1031  			xArgM16colon16,
  1032  			xArgM16colon32,
  1033  			xArgM16colon64,
  1034  			xArgM16int,
  1035  			xArgM2byte,
  1036  			xArgM32,
  1037  			xArgM32and32,
  1038  			xArgM32fp,
  1039  			xArgM32int,
  1040  			xArgM512byte,
  1041  			xArgM64,
  1042  			xArgM64fp,
  1043  			xArgM64int,
  1044  			xArgM8,
  1045  			xArgM80bcd,
  1046  			xArgM80dec,
  1047  			xArgM80fp,
  1048  			xArgM94108byte,
  1049  			xArgMem:
  1050  			if !haveMem {
  1051  				inst.Op = 0
  1052  				break Decode
  1053  			}
  1054  			inst.Args[narg] = mem
  1055  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1056  			if mem.Base == RIP {
  1057  				inst.PCRel = displen
  1058  				inst.PCRelOff = dispoff
  1059  			}
  1060  			narg++
  1061  
  1062  		case xArgPtr16colon16:
  1063  			inst.Args[narg] = Imm(immc >> 16)
  1064  			inst.Args[narg+1] = Imm(immc & (1<<16 - 1))
  1065  			narg += 2
  1066  
  1067  		case xArgPtr16colon32:
  1068  			inst.Args[narg] = Imm(immc >> 32)
  1069  			inst.Args[narg+1] = Imm(immc & (1<<32 - 1))
  1070  			narg += 2
  1071  
  1072  		case xArgMoffs8, xArgMoffs16, xArgMoffs32, xArgMoffs64:
  1073  			// TODO(rsc): Can address be 64 bits?
  1074  			mem = Mem{Disp: int64(immc)}
  1075  			if segIndex >= 0 {
  1076  				mem.Segment = prefixToSegment(inst.Prefix[segIndex])
  1077  				inst.Prefix[segIndex] |= PrefixImplicit
  1078  			}
  1079  			inst.Args[narg] = mem
  1080  			inst.MemBytes = int(memBytes[decodeOp(x)])
  1081  			if mem.Base == RIP {
  1082  				inst.PCRel = displen
  1083  				inst.PCRelOff = dispoff
  1084  			}
  1085  			narg++
  1086  
  1087  		case xArgYmm1:
  1088  			base := baseReg[x]
  1089  			index := Reg(regop)
  1090  			if inst.Prefix[vexIndex+1]&0x80 == 0 {
  1091  				index += 8
  1092  			}
  1093  			inst.Args[narg] = base + index
  1094  			narg++
  1095  
  1096  		case xArgR8, xArgR16, xArgR32, xArgR64, xArgXmm, xArgXmm1, xArgDR0dashDR7:
  1097  			base := baseReg[x]
  1098  			index := Reg(regop)
  1099  			if rex != 0 && base == AL && index >= 4 {
  1100  				rexUsed |= PrefixREX
  1101  				index -= 4
  1102  				base = SPB
  1103  			}
  1104  			inst.Args[narg] = base + index
  1105  			narg++
  1106  
  1107  		case xArgMm, xArgMm1, xArgTR0dashTR7:
  1108  			inst.Args[narg] = baseReg[x] + Reg(regop&7)
  1109  			narg++
  1110  
  1111  		case xArgCR0dashCR7:
  1112  			// AMD documents an extension that the LOCK prefix
  1113  			// can be used in place of a REX prefix in order to access
  1114  			// CR8 from 32-bit mode. The LOCK prefix is allowed in
  1115  			// all modes, provided the corresponding CPUID bit is set.
  1116  			if lockIndex >= 0 {
  1117  				inst.Prefix[lockIndex] |= PrefixImplicit
  1118  				regop += 8
  1119  			}
  1120  			inst.Args[narg] = CR0 + Reg(regop)
  1121  			narg++
  1122  
  1123  		case xArgSreg:
  1124  			regop &= 7
  1125  			if regop >= 6 {
  1126  				inst.Op = 0
  1127  				break Decode
  1128  			}
  1129  			inst.Args[narg] = ES + Reg(regop)
  1130  			narg++
  1131  
  1132  		case xArgRmf16, xArgRmf32, xArgRmf64:
  1133  			base := baseReg[x]
  1134  			index := Reg(modrm & 07)
  1135  			if rex&PrefixREXB != 0 {
  1136  				rexUsed |= PrefixREXB
  1137  				index += 8
  1138  			}
  1139  			inst.Args[narg] = base + index
  1140  			narg++
  1141  
  1142  		case xArgR8op, xArgR16op, xArgR32op, xArgR64op, xArgSTi:
  1143  			n := inst.Opcode >> uint(opshift+8) & 07
  1144  			base := baseReg[x]
  1145  			index := Reg(n)
  1146  			if rex&PrefixREXB != 0 && decodeOp(x) != xArgSTi {
  1147  				rexUsed |= PrefixREXB
  1148  				index += 8
  1149  			}
  1150  			if rex != 0 && base == AL && index >= 4 {
  1151  				rexUsed |= PrefixREX
  1152  				index -= 4
  1153  				base = SPB
  1154  			}
  1155  			inst.Args[narg] = base + index
  1156  			narg++
  1157  		case xArgRM8, xArgRM16, xArgRM32, xArgRM64, xArgR32M16, xArgR32M8, xArgR64M16,
  1158  			xArgMmM32, xArgMmM64, xArgMm2M64,
  1159  			xArgXmm2M16, xArgXmm2M32, xArgXmm2M64, xArgXmmM64, xArgXmmM128, xArgXmmM32, xArgXmm2M128,
  1160  			xArgYmm2M256:
  1161  			if haveMem {
  1162  				inst.Args[narg] = mem
  1163  				inst.MemBytes = int(memBytes[decodeOp(x)])
  1164  				if mem.Base == RIP {
  1165  					inst.PCRel = displen
  1166  					inst.PCRelOff = dispoff
  1167  				}
  1168  			} else {
  1169  				base := baseReg[x]
  1170  				index := Reg(rm)
  1171  				switch decodeOp(x) {
  1172  				case xArgMmM32, xArgMmM64, xArgMm2M64:
  1173  					// There are only 8 MMX registers, so these ignore the REX.X bit.
  1174  					index &= 7
  1175  				case xArgRM8:
  1176  					if rex != 0 && index >= 4 {
  1177  						rexUsed |= PrefixREX
  1178  						index -= 4
  1179  						base = SPB
  1180  					}
  1181  				case xArgYmm2M256:
  1182  					if vex == 0xC4 && inst.Prefix[vexIndex+1]&0x40 == 0x40 {
  1183  						index += 8
  1184  					}
  1185  				}
  1186  				inst.Args[narg] = base + index
  1187  			}
  1188  			narg++
  1189  
  1190  		case xArgMm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1191  			if haveMem {
  1192  				inst.Op = 0
  1193  				break Decode
  1194  			}
  1195  			inst.Args[narg] = baseReg[x] + Reg(rm&7)
  1196  			narg++
  1197  
  1198  		case xArgXmm2: // register only; TODO(rsc): Handle with tag modrm_regonly tag
  1199  			if haveMem {
  1200  				inst.Op = 0
  1201  				break Decode
  1202  			}
  1203  			inst.Args[narg] = baseReg[x] + Reg(rm)
  1204  			narg++
  1205  
  1206  		case xArgRel8:
  1207  			inst.PCRelOff = immcpos
  1208  			inst.PCRel = 1
  1209  			inst.Args[narg] = Rel(int8(immc))
  1210  			narg++
  1211  
  1212  		case xArgRel16:
  1213  			inst.PCRelOff = immcpos
  1214  			inst.PCRel = 2
  1215  			inst.Args[narg] = Rel(int16(immc))
  1216  			narg++
  1217  
  1218  		case xArgRel32:
  1219  			inst.PCRelOff = immcpos
  1220  			inst.PCRel = 4
  1221  			inst.Args[narg] = Rel(int32(immc))
  1222  			narg++
  1223  		}
  1224  	}
  1225  
  1226  	if inst.Op == 0 {
  1227  		// Invalid instruction.
  1228  		if nprefix > 0 {
  1229  			return instPrefix(src[0], mode) // invalid instruction
  1230  		}
  1231  
  1232  		return Inst{Len: pos}, ErrUnrecognized
  1233  	}
  1234  
  1235  	// Matched! Hooray!
  1236  
  1237  	// 90 decodes as XCHG EAX, EAX but is NOP.
  1238  	// 66 90 decodes as XCHG AX, AX and is NOP too.
  1239  	// 48 90 decodes as XCHG RAX, RAX and is NOP too.
  1240  	// 43 90 decodes as XCHG R8D, EAX and is *not* NOP.
  1241  	// F3 90 decodes as REP XCHG EAX, EAX but is PAUSE.
  1242  	// It's all too special to handle in the decoding tables, at least for now.
  1243  	if inst.Op == XCHG && inst.Opcode>>24 == 0x90 {
  1244  		if inst.Args[0] == RAX || inst.Args[0] == EAX || inst.Args[0] == AX {
  1245  			inst.Op = NOP
  1246  			if dataSizeIndex >= 0 {
  1247  				inst.Prefix[dataSizeIndex] &^= PrefixImplicit
  1248  			}
  1249  
  1250  			inst.Args[0] = nil
  1251  			inst.Args[1] = nil
  1252  		}
  1253  
  1254  		if repIndex >= 0 && inst.Prefix[repIndex] == 0xF3 {
  1255  			inst.Prefix[repIndex] |= PrefixImplicit
  1256  			inst.Op = PAUSE
  1257  			inst.Args[0] = nil
  1258  			inst.Args[1] = nil
  1259  		} else if gnuCompat {
  1260  			for i := nprefix - 1; i >= 0; i-- {
  1261  				if inst.Prefix[i]&0xFF == 0xF3 {
  1262  					inst.Prefix[i] |= PrefixImplicit
  1263  					inst.Op = PAUSE
  1264  					inst.Args[0] = nil
  1265  					inst.Args[1] = nil
  1266  					break
  1267  				}
  1268  			}
  1269  		}
  1270  	}
  1271  
  1272  	// defaultSeg returns the default segment for an implicit
  1273  	// memory reference: the final override if present, or else DS.
  1274  	defaultSeg := func() Reg {
  1275  		if segIndex >= 0 {
  1276  			inst.Prefix[segIndex] |= PrefixImplicit
  1277  			return prefixToSegment(inst.Prefix[segIndex])
  1278  		}
  1279  
  1280  		return DS
  1281  	}
  1282  
  1283  	// Add implicit arguments not present in the tables.
  1284  	// Normally we shy away from making implicit arguments explicit,
  1285  	// following the Intel manuals, but adding the arguments seems
  1286  	// the best way to express the effect of the segment override prefixes.
  1287  	// TODO(rsc): Perhaps add these to the tables and
  1288  	// create bytecode instructions for them.
  1289  	usedAddrSize := false
  1290  
  1291  	switch inst.Op {
  1292  	case INSB, INSW, INSD:
  1293  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1294  		inst.Args[1] = DX
  1295  		usedAddrSize = true
  1296  
  1297  	case OUTSB, OUTSW, OUTSD:
  1298  		inst.Args[0] = DX
  1299  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1300  		usedAddrSize = true
  1301  
  1302  	case MOVSB, MOVSW, MOVSD, MOVSQ:
  1303  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1304  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1305  		usedAddrSize = true
  1306  
  1307  	case CMPSB, CMPSW, CMPSD, CMPSQ:
  1308  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1309  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1310  		usedAddrSize = true
  1311  
  1312  	case LODSB, LODSW, LODSD, LODSQ:
  1313  		switch inst.Op {
  1314  		case LODSB:
  1315  			inst.Args[0] = AL
  1316  		case LODSW:
  1317  			inst.Args[0] = AX
  1318  		case LODSD:
  1319  			inst.Args[0] = EAX
  1320  		case LODSQ:
  1321  			inst.Args[0] = RAX
  1322  		}
  1323  
  1324  		inst.Args[1] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + SI - AX}
  1325  		usedAddrSize = true
  1326  
  1327  	case STOSB, STOSW, STOSD, STOSQ:
  1328  		inst.Args[0] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1329  		switch inst.Op {
  1330  		case STOSB:
  1331  			inst.Args[1] = AL
  1332  		case STOSW:
  1333  			inst.Args[1] = AX
  1334  		case STOSD:
  1335  			inst.Args[1] = EAX
  1336  		case STOSQ:
  1337  			inst.Args[1] = RAX
  1338  		}
  1339  
  1340  		usedAddrSize = true
  1341  
  1342  	case SCASB, SCASW, SCASD, SCASQ:
  1343  		inst.Args[1] = Mem{Segment: ES, Base: baseRegForBits(addrMode) + DI - AX}
  1344  		switch inst.Op {
  1345  		case SCASB:
  1346  			inst.Args[0] = AL
  1347  		case SCASW:
  1348  			inst.Args[0] = AX
  1349  		case SCASD:
  1350  			inst.Args[0] = EAX
  1351  		case SCASQ:
  1352  			inst.Args[0] = RAX
  1353  		}
  1354  
  1355  		usedAddrSize = true
  1356  
  1357  	case XLATB:
  1358  		inst.Args[0] = Mem{Segment: defaultSeg(), Base: baseRegForBits(addrMode) + BX - AX}
  1359  		usedAddrSize = true
  1360  	}
  1361  
  1362  	// If we used the address size annotation to construct the
  1363  	// argument list, mark that prefix as implicit: it doesn't need
  1364  	// to be shown when printing the instruction.
  1365  	if haveMem || usedAddrSize {
  1366  		if addrSizeIndex >= 0 {
  1367  			inst.Prefix[addrSizeIndex] |= PrefixImplicit
  1368  		}
  1369  	}
  1370  
  1371  	// Similarly, if there's some memory operand, the segment
  1372  	// will be shown there and doesn't need to be shown as an
  1373  	// explicit prefix.
  1374  	if haveMem {
  1375  		if segIndex >= 0 {
  1376  			inst.Prefix[segIndex] |= PrefixImplicit
  1377  		}
  1378  	}
  1379  
  1380  	// Branch predict prefixes are overloaded segment prefixes,
  1381  	// since segment prefixes don't make sense on conditional jumps.
  1382  	// Rewrite final instance to prediction prefix.
  1383  	// The set of instructions to which the prefixes apply (other then the
  1384  	// Jcc conditional jumps) is not 100% clear from the manuals, but
  1385  	// the disassemblers seem to agree about the LOOP and JCXZ instructions,
  1386  	// so we'll follow along.
  1387  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1388  	if isCondJmp[inst.Op] || isLoop[inst.Op] || inst.Op == JCXZ || inst.Op == JECXZ || inst.Op == JRCXZ {
  1389  	PredictLoop:
  1390  		for i := nprefix - 1; i >= 0; i-- {
  1391  			p := inst.Prefix[i]
  1392  			switch p & 0xFF {
  1393  			case PrefixCS:
  1394  				inst.Prefix[i] = PrefixPN
  1395  				break PredictLoop
  1396  			case PrefixDS:
  1397  				inst.Prefix[i] = PrefixPT
  1398  				break PredictLoop
  1399  			}
  1400  		}
  1401  	}
  1402  
  1403  	// The BND prefix is part of the Intel Memory Protection Extensions (MPX).
  1404  	// A REPN applied to certain control transfers is a BND prefix to bound
  1405  	// the range of possible destinations. There's surprisingly little documentation
  1406  	// about this, so we just do what libopcodes and xed agree on.
  1407  	// In particular, it's unclear why a REPN applied to LOOP or JCXZ instructions
  1408  	// does not turn into a BND.
  1409  	// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1410  	if isCondJmp[inst.Op] || inst.Op == JMP || inst.Op == CALL || inst.Op == RET {
  1411  		for i := nprefix - 1; i >= 0; i-- {
  1412  			p := inst.Prefix[i]
  1413  			if p&^PrefixIgnored == PrefixREPN {
  1414  				inst.Prefix[i] = PrefixBND
  1415  				break
  1416  			}
  1417  		}
  1418  	}
  1419  
  1420  	// The LOCK prefix only applies to certain instructions, and then only
  1421  	// to instances of the instruction with a memory destination.
  1422  	// Other uses of LOCK are invalid and cause a processor exception,
  1423  	// in contrast to the "just ignore it" spirit applied to all other prefixes.
  1424  	// Mark invalid lock prefixes.
  1425  	hasLock := false
  1426  
  1427  	if lockIndex >= 0 && inst.Prefix[lockIndex]&PrefixImplicit == 0 {
  1428  		switch inst.Op {
  1429  		// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1430  		case ADD, ADC, AND, BTC, BTR, BTS, CMPXCHG, CMPXCHG8B, CMPXCHG16B, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, XCHG:
  1431  			if isMem(inst.Args[0]) {
  1432  				hasLock = true
  1433  				break
  1434  			}
  1435  
  1436  			fallthrough
  1437  		default:
  1438  			inst.Prefix[lockIndex] |= PrefixInvalid
  1439  		}
  1440  	}
  1441  
  1442  	// In certain cases, all of which require a memory destination,
  1443  	// the REPN and REP prefixes are interpreted as XACQUIRE and XRELEASE
  1444  	// from the Intel Transactional Synchroniation Extensions (TSX).
  1445  	//
  1446  	// The specific rules are:
  1447  	// (1) Any instruction with a valid LOCK prefix can have XACQUIRE or XRELEASE.
  1448  	// (2) Any XCHG, which always has an implicit LOCK, can have XACQUIRE or XRELEASE.
  1449  	// (3) Any 0x88-, 0x89-, 0xC6-, or 0xC7-opcode MOV can have XRELEASE.
  1450  	if isMem(inst.Args[0]) {
  1451  		if inst.Op == XCHG {
  1452  			hasLock = true
  1453  		}
  1454  
  1455  		for i := len(inst.Prefix) - 1; i >= 0; i-- {
  1456  			p := inst.Prefix[i] &^ PrefixIgnored
  1457  			switch p {
  1458  			case PrefixREPN:
  1459  				if hasLock {
  1460  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXACQUIRE
  1461  				}
  1462  
  1463  			case PrefixREP:
  1464  				if hasLock {
  1465  					inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1466  				}
  1467  
  1468  				if inst.Op == MOV {
  1469  					op := (inst.Opcode >> 24) &^ 1
  1470  					if op == 0x88 || op == 0xC6 {
  1471  						inst.Prefix[i] = inst.Prefix[i]&PrefixIgnored | PrefixXRELEASE
  1472  					}
  1473  				}
  1474  			}
  1475  		}
  1476  	}
  1477  
  1478  	// If REP is used on a non-REP-able instruction, mark the prefix as ignored.
  1479  	if repIndex >= 0 {
  1480  		switch inst.Prefix[repIndex] {
  1481  		case PrefixREP, PrefixREPN:
  1482  			switch inst.Op {
  1483  			// According to the manuals, the REP/REPE prefix applies to all of these,
  1484  			// while the REPN applies only to some of them. However, both libopcodes
  1485  			// and xed show both prefixes explicitly for all instructions, so we do the same.
  1486  			// TODO(rsc): Perhaps this instruction class should be derived from the CSV.
  1487  			case INSB, INSW, INSD,
  1488  				MOVSB, MOVSW, MOVSD, MOVSQ,
  1489  				OUTSB, OUTSW, OUTSD,
  1490  				LODSB, LODSW, LODSD, LODSQ,
  1491  				CMPSB, CMPSW, CMPSD, CMPSQ,
  1492  				SCASB, SCASW, SCASD, SCASQ,
  1493  				STOSB, STOSW, STOSD, STOSQ:
  1494  				// ok
  1495  			default:
  1496  				inst.Prefix[repIndex] |= PrefixIgnored
  1497  			}
  1498  		}
  1499  	}
  1500  
  1501  	// If REX was present, mark implicit if all the 1 bits were consumed.
  1502  	if rexIndex >= 0 {
  1503  		if rexUsed != 0 {
  1504  			rexUsed |= PrefixREX
  1505  		}
  1506  
  1507  		if rex&^rexUsed == 0 {
  1508  			inst.Prefix[rexIndex] |= PrefixImplicit
  1509  		}
  1510  	}
  1511  
  1512  	inst.DataSize = dataMode
  1513  	inst.AddrSize = addrMode
  1514  	inst.Mode = mode
  1515  	inst.Len = pos
  1516  
  1517  	return inst, nil
  1518  }
  1519  
  1520  var errInternal = errors.New("internal error")
  1521  
  1522  // addr16 records the eight 16-bit addressing modes.
  1523  var addr16 = [8]Mem{
  1524  	{Base: BX, Scale: 1, Index: SI},
  1525  	{Base: BX, Scale: 1, Index: DI},
  1526  	{Base: BP, Scale: 1, Index: SI},
  1527  	{Base: BP, Scale: 1, Index: DI},
  1528  	{Base: SI},
  1529  	{Base: DI},
  1530  	{Base: BP},
  1531  	{Base: BX},
  1532  }
  1533  
  1534  // baseReg returns the base register for a given register size in bits.
  1535  func baseRegForBits(bits int) Reg {
  1536  	switch bits {
  1537  	case 8:
  1538  		return AL
  1539  	case 16:
  1540  		return AX
  1541  	case 32:
  1542  		return EAX
  1543  	case 64:
  1544  		return RAX
  1545  	}
  1546  
  1547  	return 0
  1548  }
  1549  
  1550  // baseReg records the base register for argument types that specify
  1551  // a range of registers indexed by op, regop, or rm.
  1552  var baseReg = [...]Reg{
  1553  	xArgDR0dashDR7: DR0,
  1554  	xArgMm1:        M0,
  1555  	xArgMm2:        M0,
  1556  	xArgMm2M64:     M0,
  1557  	xArgMm:         M0,
  1558  	xArgMmM32:      M0,
  1559  	xArgMmM64:      M0,
  1560  	xArgR16:        AX,
  1561  	xArgR16op:      AX,
  1562  	xArgR32:        EAX,
  1563  	xArgR32M16:     EAX,
  1564  	xArgR32M8:      EAX,
  1565  	xArgR32op:      EAX,
  1566  	xArgR64:        RAX,
  1567  	xArgR64M16:     RAX,
  1568  	xArgR64op:      RAX,
  1569  	xArgR8:         AL,
  1570  	xArgR8op:       AL,
  1571  	xArgRM16:       AX,
  1572  	xArgRM32:       EAX,
  1573  	xArgRM64:       RAX,
  1574  	xArgRM8:        AL,
  1575  	xArgRmf16:      AX,
  1576  	xArgRmf32:      EAX,
  1577  	xArgRmf64:      RAX,
  1578  	xArgSTi:        F0,
  1579  	xArgTR0dashTR7: TR0,
  1580  	xArgXmm1:       X0,
  1581  	xArgYmm1:       X0,
  1582  	xArgXmm2:       X0,
  1583  	xArgXmm2M128:   X0,
  1584  	xArgYmm2M256:   X0,
  1585  	xArgXmm2M16:    X0,
  1586  	xArgXmm2M32:    X0,
  1587  	xArgXmm2M64:    X0,
  1588  	xArgXmm:        X0,
  1589  	xArgXmmM128:    X0,
  1590  	xArgXmmM32:     X0,
  1591  	xArgXmmM64:     X0,
  1592  }
  1593  
  1594  // prefixToSegment returns the segment register
  1595  // corresponding to a particular segment prefix.
  1596  func prefixToSegment(p Prefix) Reg {
  1597  	switch p &^ PrefixImplicit {
  1598  	case PrefixCS:
  1599  		return CS
  1600  	case PrefixDS:
  1601  		return DS
  1602  	case PrefixES:
  1603  		return ES
  1604  	case PrefixFS:
  1605  		return FS
  1606  	case PrefixGS:
  1607  		return GS
  1608  	case PrefixSS:
  1609  		return SS
  1610  	}
  1611  
  1612  	return 0
  1613  }
  1614  
  1615  // fixedArg records the fixed arguments corresponding to the given bytecodes.
  1616  var fixedArg = [...]Arg{
  1617  	xArg1:    Imm(1),
  1618  	xArg3:    Imm(3),
  1619  	xArgAL:   AL,
  1620  	xArgAX:   AX,
  1621  	xArgDX:   DX,
  1622  	xArgEAX:  EAX,
  1623  	xArgEDX:  EDX,
  1624  	xArgRAX:  RAX,
  1625  	xArgRDX:  RDX,
  1626  	xArgCL:   CL,
  1627  	xArgCS:   CS,
  1628  	xArgDS:   DS,
  1629  	xArgES:   ES,
  1630  	xArgFS:   FS,
  1631  	xArgGS:   GS,
  1632  	xArgSS:   SS,
  1633  	xArgST:   F0,
  1634  	xArgXMM0: X0,
  1635  }
  1636  
  1637  // memBytes records the size of the memory pointed at
  1638  // by a memory argument of the given form.
  1639  var memBytes = [...]int8{
  1640  	xArgM128:       128 / 8,
  1641  	xArgM256:       256 / 8,
  1642  	xArgM16:        16 / 8,
  1643  	xArgM16and16:   (16 + 16) / 8,
  1644  	xArgM16colon16: (16 + 16) / 8,
  1645  	xArgM16colon32: (16 + 32) / 8,
  1646  	xArgM16int:     16 / 8,
  1647  	xArgM2byte:     2,
  1648  	xArgM32:        32 / 8,
  1649  	xArgM32and32:   (32 + 32) / 8,
  1650  	xArgM32fp:      32 / 8,
  1651  	xArgM32int:     32 / 8,
  1652  	xArgM64:        64 / 8,
  1653  	xArgM64fp:      64 / 8,
  1654  	xArgM64int:     64 / 8,
  1655  	xArgMm2M64:     64 / 8,
  1656  	xArgMmM32:      32 / 8,
  1657  	xArgMmM64:      64 / 8,
  1658  	xArgMoffs16:    16 / 8,
  1659  	xArgMoffs32:    32 / 8,
  1660  	xArgMoffs64:    64 / 8,
  1661  	xArgMoffs8:     1, // 8 / 8,
  1662  	xArgR32M16:     16 / 8,
  1663  	xArgR32M8:      1, //8 / 8,
  1664  	xArgR64M16:     16 / 8,
  1665  	xArgRM16:       16 / 8,
  1666  	xArgRM32:       32 / 8,
  1667  	xArgRM64:       64 / 8,
  1668  	xArgRM8:        1, //8 / 8,
  1669  	xArgXmm2M128:   128 / 8,
  1670  	xArgYmm2M256:   256 / 8,
  1671  	xArgXmm2M16:    16 / 8,
  1672  	xArgXmm2M32:    32 / 8,
  1673  	xArgXmm2M64:    64 / 8,
  1674  	xArgXmm:        128 / 8,
  1675  	xArgXmmM128:    128 / 8,
  1676  	xArgXmmM32:     32 / 8,
  1677  	xArgXmmM64:     64 / 8,
  1678  }
  1679  
  1680  // isCondJmp records the conditional jumps.
  1681  var isCondJmp = [maxOp + 1]bool{
  1682  	JA:  true,
  1683  	JAE: true,
  1684  	JB:  true,
  1685  	JBE: true,
  1686  	JE:  true,
  1687  	JG:  true,
  1688  	JGE: true,
  1689  	JL:  true,
  1690  	JLE: true,
  1691  	JNE: true,
  1692  	JNO: true,
  1693  	JNP: true,
  1694  	JNS: true,
  1695  	JO:  true,
  1696  	JP:  true,
  1697  	JS:  true,
  1698  }
  1699  
  1700  // isLoop records the loop operators.
  1701  var isLoop = [maxOp + 1]bool{
  1702  	LOOP:   true,
  1703  	LOOPE:  true,
  1704  	LOOPNE: true,
  1705  	JECXZ:  true,
  1706  	JRCXZ:  true,
  1707  }