github.com/tidwall/go@v0.0.0-20170415222209-6694a6888b7d/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/sys"
    36  	"encoding/binary"
    37  	"fmt"
    38  	"log"
    39  	"strings"
    40  )
    41  
    42  var (
    43  	plan9privates *obj.LSym
    44  	deferreturn   *obj.LSym
    45  )
    46  
    47  // Instruction layout.
    48  
    49  const (
    50  	// Loop alignment constants:
    51  	// want to align loop entry to LoopAlign-byte boundary,
    52  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    53  	// We define a loop entry as the target of a backward jump.
    54  	//
    55  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    56  	// and it aligns all jump targets, not just backward jump targets.
    57  	//
    58  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    59  	// is very slight but negative, so the alignment is disabled by
    60  	// setting MaxLoopPad = 0. The code is here for reference and
    61  	// for future experiments.
    62  	//
    63  	LoopAlign  = 16
    64  	MaxLoopPad = 0
    65  )
    66  
    67  type Optab struct {
    68  	as     obj.As
    69  	ytab   []ytab
    70  	prefix uint8
    71  	op     [23]uint8
    72  }
    73  
    74  type ytab struct {
    75  	from    uint8
    76  	from3   uint8
    77  	to      uint8
    78  	zcase   uint8
    79  	zoffset uint8
    80  }
    81  
    82  type Movtab struct {
    83  	as   obj.As
    84  	ft   uint8
    85  	f3t  uint8
    86  	tt   uint8
    87  	code uint8
    88  	op   [4]uint8
    89  }
    90  
    91  const (
    92  	Yxxx = iota
    93  	Ynone
    94  	Yi0 // $0
    95  	Yi1 // $1
    96  	Yi8 // $x, x fits in int8
    97  	Yu8 // $x, x fits in uint8
    98  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    99  	Ys32
   100  	Yi32
   101  	Yi64
   102  	Yiauto
   103  	Yal
   104  	Ycl
   105  	Yax
   106  	Ycx
   107  	Yrb
   108  	Yrl
   109  	Yrl32 // Yrl on 32-bit system
   110  	Yrf
   111  	Yf0
   112  	Yrx
   113  	Ymb
   114  	Yml
   115  	Ym
   116  	Ybr
   117  	Ycs
   118  	Yss
   119  	Yds
   120  	Yes
   121  	Yfs
   122  	Ygs
   123  	Ygdtr
   124  	Yidtr
   125  	Yldtr
   126  	Ymsw
   127  	Ytask
   128  	Ycr0
   129  	Ycr1
   130  	Ycr2
   131  	Ycr3
   132  	Ycr4
   133  	Ycr5
   134  	Ycr6
   135  	Ycr7
   136  	Ycr8
   137  	Ydr0
   138  	Ydr1
   139  	Ydr2
   140  	Ydr3
   141  	Ydr4
   142  	Ydr5
   143  	Ydr6
   144  	Ydr7
   145  	Ytr0
   146  	Ytr1
   147  	Ytr2
   148  	Ytr3
   149  	Ytr4
   150  	Ytr5
   151  	Ytr6
   152  	Ytr7
   153  	Ymr
   154  	Ymm
   155  	Yxr
   156  	Yxm
   157  	Yyr
   158  	Yym
   159  	Ytls
   160  	Ytextsize
   161  	Yindir
   162  	Ymax
   163  )
   164  
   165  const (
   166  	Zxxx = iota
   167  	Zlit
   168  	Zlitm_r
   169  	Z_rp
   170  	Zbr
   171  	Zcall
   172  	Zcallcon
   173  	Zcallduff
   174  	Zcallind
   175  	Zcallindreg
   176  	Zib_
   177  	Zib_rp
   178  	Zibo_m
   179  	Zibo_m_xm
   180  	Zil_
   181  	Zil_rp
   182  	Ziq_rp
   183  	Zilo_m
   184  	Zjmp
   185  	Zjmpcon
   186  	Zloop
   187  	Zo_iw
   188  	Zm_o
   189  	Zm_r
   190  	Zm2_r
   191  	Zm_r_xm
   192  	Zm_r_i_xm
   193  	Zm_r_xm_nr
   194  	Zr_m_xm_nr
   195  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   196  	Zibr_m
   197  	Zmb_r
   198  	Zaut_r
   199  	Zo_m
   200  	Zo_m64
   201  	Zpseudo
   202  	Zr_m
   203  	Zr_m_xm
   204  	Zrp_
   205  	Z_ib
   206  	Z_il
   207  	Zm_ibo
   208  	Zm_ilo
   209  	Zib_rr
   210  	Zil_rr
   211  	Zclr
   212  	Zbyte
   213  	Zvex_rm_v_r
   214  	Zvex_r_v_rm
   215  	Zvex_v_rm_r
   216  	Zvex_i_rm_r
   217  	Zvex_i_r_v
   218  	Zvex_i_rm_v_r
   219  	Zmax
   220  )
   221  
   222  const (
   223  	Px   = 0
   224  	Px1  = 1    // symbolic; exact value doesn't matter
   225  	P32  = 0x32 /* 32-bit only */
   226  	Pe   = 0x66 /* operand escape */
   227  	Pm   = 0x0f /* 2byte opcode escape */
   228  	Pq   = 0xff /* both escapes: 66 0f */
   229  	Pb   = 0xfe /* byte operands */
   230  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   231  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   232  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   233  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   234  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   235  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   236  	Pw   = 0x48 /* Rex.w */
   237  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   238  	Py   = 0x80 /* defaults to 64-bit mode */
   239  	Py1  = 0x81 // symbolic; exact value doesn't matter
   240  	Py3  = 0x83 // symbolic; exact value doesn't matter
   241  	Pvex = 0x84 // symbolic: exact value doesn't matter
   242  
   243  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   244  	Rxr = 1 << 2 /* extend modrm reg */
   245  	Rxx = 1 << 1 /* extend sib index */
   246  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   247  )
   248  
   249  const (
   250  	// Encoding for VEX prefix in tables.
   251  	// The P, L, and W fields are chosen to match
   252  	// their eventual locations in the VEX prefix bytes.
   253  
   254  	// P field - 2 bits
   255  	vex66 = 1 << 0
   256  	vexF3 = 2 << 0
   257  	vexF2 = 3 << 0
   258  	// L field - 1 bit
   259  	vexLZ  = 0 << 2
   260  	vexLIG = 0 << 2
   261  	vex128 = 0 << 2
   262  	vex256 = 1 << 2
   263  	// W field - 1 bit
   264  	vexWIG = 0 << 7
   265  	vexW0  = 0 << 7
   266  	vexW1  = 1 << 7
   267  	// M field - 5 bits, but mostly reserved; we can store up to 4
   268  	vex0F   = 1 << 3
   269  	vex0F38 = 2 << 3
   270  	vex0F3A = 3 << 3
   271  
   272  	// Combinations used in the manual.
   273  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   274  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   275  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   276  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   277  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   278  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   279  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   280  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   281  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   282  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   283  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   284  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   285  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   286  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   287  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   288  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   289  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   290  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   291  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   292  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   293  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   294  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   295  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   296  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   297  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   298  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   299  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   300  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   301  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   302  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   303  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   304  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   305  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   306  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   307  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   308  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   309  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   310  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   311  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   312  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   313  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   314  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   315  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   316  )
   317  
   318  var ycover [Ymax * Ymax]uint8
   319  
   320  var reg [MAXREG]int
   321  
   322  var regrex [MAXREG + 1]int
   323  
   324  var ynone = []ytab{
   325  	{Ynone, Ynone, Ynone, Zlit, 1},
   326  }
   327  
   328  var ytext = []ytab{
   329  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   330  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   331  }
   332  
   333  var ynop = []ytab{
   334  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   335  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   336  	{Ynone, Ynone, Yml, Zpseudo, 0},
   337  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   338  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   339  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   340  	{Yml, Ynone, Ynone, Zpseudo, 0},
   341  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   342  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   343  }
   344  
   345  var yfuncdata = []ytab{
   346  	{Yi32, Ynone, Ym, Zpseudo, 0},
   347  }
   348  
   349  var ypcdata = []ytab{
   350  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   351  }
   352  
   353  var yxorb = []ytab{
   354  	{Yi32, Ynone, Yal, Zib_, 1},
   355  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   356  	{Yrb, Ynone, Ymb, Zr_m, 1},
   357  	{Ymb, Ynone, Yrb, Zm_r, 1},
   358  }
   359  
   360  var yaddl = []ytab{
   361  	{Yi8, Ynone, Yml, Zibo_m, 2},
   362  	{Yi32, Ynone, Yax, Zil_, 1},
   363  	{Yi32, Ynone, Yml, Zilo_m, 2},
   364  	{Yrl, Ynone, Yml, Zr_m, 1},
   365  	{Yml, Ynone, Yrl, Zm_r, 1},
   366  }
   367  
   368  var yincl = []ytab{
   369  	{Ynone, Ynone, Yrl, Z_rp, 1},
   370  	{Ynone, Ynone, Yml, Zo_m, 2},
   371  }
   372  
   373  var yincq = []ytab{
   374  	{Ynone, Ynone, Yml, Zo_m, 2},
   375  }
   376  
   377  var ycmpb = []ytab{
   378  	{Yal, Ynone, Yi32, Z_ib, 1},
   379  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   380  	{Ymb, Ynone, Yrb, Zm_r, 1},
   381  	{Yrb, Ynone, Ymb, Zr_m, 1},
   382  }
   383  
   384  var ycmpl = []ytab{
   385  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   386  	{Yax, Ynone, Yi32, Z_il, 1},
   387  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   388  	{Yml, Ynone, Yrl, Zm_r, 1},
   389  	{Yrl, Ynone, Yml, Zr_m, 1},
   390  }
   391  
   392  var yshb = []ytab{
   393  	{Yi1, Ynone, Ymb, Zo_m, 2},
   394  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   395  	{Ycx, Ynone, Ymb, Zo_m, 2},
   396  }
   397  
   398  var yshl = []ytab{
   399  	{Yi1, Ynone, Yml, Zo_m, 2},
   400  	{Yi32, Ynone, Yml, Zibo_m, 2},
   401  	{Ycl, Ynone, Yml, Zo_m, 2},
   402  	{Ycx, Ynone, Yml, Zo_m, 2},
   403  }
   404  
   405  var ytestl = []ytab{
   406  	{Yi32, Ynone, Yax, Zil_, 1},
   407  	{Yi32, Ynone, Yml, Zilo_m, 2},
   408  	{Yrl, Ynone, Yml, Zr_m, 1},
   409  	{Yml, Ynone, Yrl, Zm_r, 1},
   410  }
   411  
   412  var ymovb = []ytab{
   413  	{Yrb, Ynone, Ymb, Zr_m, 1},
   414  	{Ymb, Ynone, Yrb, Zm_r, 1},
   415  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   416  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   417  }
   418  
   419  var ybtl = []ytab{
   420  	{Yi8, Ynone, Yml, Zibo_m, 2},
   421  	{Yrl, Ynone, Yml, Zr_m, 1},
   422  }
   423  
   424  var ymovw = []ytab{
   425  	{Yrl, Ynone, Yml, Zr_m, 1},
   426  	{Yml, Ynone, Yrl, Zm_r, 1},
   427  	{Yi0, Ynone, Yrl, Zclr, 1},
   428  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   429  	{Yi32, Ynone, Yml, Zilo_m, 2},
   430  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   431  }
   432  
   433  var ymovl = []ytab{
   434  	{Yrl, Ynone, Yml, Zr_m, 1},
   435  	{Yml, Ynone, Yrl, Zm_r, 1},
   436  	{Yi0, Ynone, Yrl, Zclr, 1},
   437  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   438  	{Yi32, Ynone, Yml, Zilo_m, 2},
   439  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   440  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   441  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   442  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   443  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   444  }
   445  
   446  var yret = []ytab{
   447  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   448  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   449  }
   450  
   451  var ymovq = []ytab{
   452  	// valid in 32-bit mode
   453  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   454  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   455  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   456  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   457  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   458  
   459  	// valid only in 64-bit mode, usually with 64-bit prefix
   460  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   461  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   462  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   463  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   464  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   465  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   466  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   467  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   468  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   469  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   470  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   471  }
   472  
   473  var ym_rl = []ytab{
   474  	{Ym, Ynone, Yrl, Zm_r, 1},
   475  }
   476  
   477  var yrl_m = []ytab{
   478  	{Yrl, Ynone, Ym, Zr_m, 1},
   479  }
   480  
   481  var ymb_rl = []ytab{
   482  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   483  }
   484  
   485  var yml_rl = []ytab{
   486  	{Yml, Ynone, Yrl, Zm_r, 1},
   487  }
   488  
   489  var yrl_ml = []ytab{
   490  	{Yrl, Ynone, Yml, Zr_m, 1},
   491  }
   492  
   493  var yml_mb = []ytab{
   494  	{Yrb, Ynone, Ymb, Zr_m, 1},
   495  	{Ymb, Ynone, Yrb, Zm_r, 1},
   496  }
   497  
   498  var yrb_mb = []ytab{
   499  	{Yrb, Ynone, Ymb, Zr_m, 1},
   500  }
   501  
   502  var yxchg = []ytab{
   503  	{Yax, Ynone, Yrl, Z_rp, 1},
   504  	{Yrl, Ynone, Yax, Zrp_, 1},
   505  	{Yrl, Ynone, Yml, Zr_m, 1},
   506  	{Yml, Ynone, Yrl, Zm_r, 1},
   507  }
   508  
   509  var ydivl = []ytab{
   510  	{Yml, Ynone, Ynone, Zm_o, 2},
   511  }
   512  
   513  var ydivb = []ytab{
   514  	{Ymb, Ynone, Ynone, Zm_o, 2},
   515  }
   516  
   517  var yimul = []ytab{
   518  	{Yml, Ynone, Ynone, Zm_o, 2},
   519  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   520  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   521  	{Yml, Ynone, Yrl, Zm_r, 2},
   522  }
   523  
   524  var yimul3 = []ytab{
   525  	{Yi8, Yml, Yrl, Zibm_r, 2},
   526  }
   527  
   528  var ybyte = []ytab{
   529  	{Yi64, Ynone, Ynone, Zbyte, 1},
   530  }
   531  
   532  var yin = []ytab{
   533  	{Yi32, Ynone, Ynone, Zib_, 1},
   534  	{Ynone, Ynone, Ynone, Zlit, 1},
   535  }
   536  
   537  var yint = []ytab{
   538  	{Yi32, Ynone, Ynone, Zib_, 1},
   539  }
   540  
   541  var ypushl = []ytab{
   542  	{Yrl, Ynone, Ynone, Zrp_, 1},
   543  	{Ym, Ynone, Ynone, Zm_o, 2},
   544  	{Yi8, Ynone, Ynone, Zib_, 1},
   545  	{Yi32, Ynone, Ynone, Zil_, 1},
   546  }
   547  
   548  var ypopl = []ytab{
   549  	{Ynone, Ynone, Yrl, Z_rp, 1},
   550  	{Ynone, Ynone, Ym, Zo_m, 2},
   551  }
   552  
   553  var ybswap = []ytab{
   554  	{Ynone, Ynone, Yrl, Z_rp, 2},
   555  }
   556  
   557  var yscond = []ytab{
   558  	{Ynone, Ynone, Ymb, Zo_m, 2},
   559  }
   560  
   561  var yjcond = []ytab{
   562  	{Ynone, Ynone, Ybr, Zbr, 0},
   563  	{Yi0, Ynone, Ybr, Zbr, 0},
   564  	{Yi1, Ynone, Ybr, Zbr, 1},
   565  }
   566  
   567  var yloop = []ytab{
   568  	{Ynone, Ynone, Ybr, Zloop, 1},
   569  }
   570  
   571  var ycall = []ytab{
   572  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   573  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   574  	{Ynone, Ynone, Yindir, Zcallind, 2},
   575  	{Ynone, Ynone, Ybr, Zcall, 0},
   576  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   577  }
   578  
   579  var yduff = []ytab{
   580  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   581  }
   582  
   583  var yjmp = []ytab{
   584  	{Ynone, Ynone, Yml, Zo_m64, 2},
   585  	{Ynone, Ynone, Ybr, Zjmp, 0},
   586  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   587  }
   588  
   589  var yfmvd = []ytab{
   590  	{Ym, Ynone, Yf0, Zm_o, 2},
   591  	{Yf0, Ynone, Ym, Zo_m, 2},
   592  	{Yrf, Ynone, Yf0, Zm_o, 2},
   593  	{Yf0, Ynone, Yrf, Zo_m, 2},
   594  }
   595  
   596  var yfmvdp = []ytab{
   597  	{Yf0, Ynone, Ym, Zo_m, 2},
   598  	{Yf0, Ynone, Yrf, Zo_m, 2},
   599  }
   600  
   601  var yfmvf = []ytab{
   602  	{Ym, Ynone, Yf0, Zm_o, 2},
   603  	{Yf0, Ynone, Ym, Zo_m, 2},
   604  }
   605  
   606  var yfmvx = []ytab{
   607  	{Ym, Ynone, Yf0, Zm_o, 2},
   608  }
   609  
   610  var yfmvp = []ytab{
   611  	{Yf0, Ynone, Ym, Zo_m, 2},
   612  }
   613  
   614  var yfcmv = []ytab{
   615  	{Yrf, Ynone, Yf0, Zm_o, 2},
   616  }
   617  
   618  var yfadd = []ytab{
   619  	{Ym, Ynone, Yf0, Zm_o, 2},
   620  	{Yrf, Ynone, Yf0, Zm_o, 2},
   621  	{Yf0, Ynone, Yrf, Zo_m, 2},
   622  }
   623  
   624  var yfxch = []ytab{
   625  	{Yf0, Ynone, Yrf, Zo_m, 2},
   626  	{Yrf, Ynone, Yf0, Zm_o, 2},
   627  }
   628  
   629  var ycompp = []ytab{
   630  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   631  }
   632  
   633  var ystsw = []ytab{
   634  	{Ynone, Ynone, Ym, Zo_m, 2},
   635  	{Ynone, Ynone, Yax, Zlit, 1},
   636  }
   637  
   638  var ysvrs = []ytab{
   639  	{Ynone, Ynone, Ym, Zo_m, 2},
   640  	{Ym, Ynone, Ynone, Zm_o, 2},
   641  }
   642  
   643  var ymm = []ytab{
   644  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   645  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   646  }
   647  
   648  var yxm = []ytab{
   649  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   650  }
   651  
   652  var yxm_q4 = []ytab{
   653  	{Yxm, Ynone, Yxr, Zm_r, 1},
   654  }
   655  
   656  var yxcvm1 = []ytab{
   657  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   658  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   659  }
   660  
   661  var yxcvm2 = []ytab{
   662  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   663  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   664  }
   665  
   666  var yxr = []ytab{
   667  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   668  }
   669  
   670  var yxr_ml = []ytab{
   671  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   672  }
   673  
   674  var ymr = []ytab{
   675  	{Ymr, Ynone, Ymr, Zm_r, 1},
   676  }
   677  
   678  var ymr_ml = []ytab{
   679  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   680  }
   681  
   682  var yxcmpi = []ytab{
   683  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   684  }
   685  
   686  var yxmov = []ytab{
   687  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   688  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   689  }
   690  
   691  var yxcvfl = []ytab{
   692  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   693  }
   694  
   695  var yxcvlf = []ytab{
   696  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   697  }
   698  
   699  var yxcvfq = []ytab{
   700  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   701  }
   702  
   703  var yxcvqf = []ytab{
   704  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   705  }
   706  
   707  var yps = []ytab{
   708  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   709  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   710  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   711  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   712  }
   713  
   714  var yxrrl = []ytab{
   715  	{Yxr, Ynone, Yrl, Zm_r, 1},
   716  }
   717  
   718  var ymrxr = []ytab{
   719  	{Ymr, Ynone, Yxr, Zm_r, 1},
   720  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   721  }
   722  
   723  var ymshuf = []ytab{
   724  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   725  }
   726  
   727  var ymshufb = []ytab{
   728  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   729  }
   730  
   731  var yxshuf = []ytab{
   732  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   733  }
   734  
   735  var yextrw = []ytab{
   736  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   737  }
   738  
   739  var yextr = []ytab{
   740  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   741  }
   742  
   743  var yinsrw = []ytab{
   744  	{Yu8, Yml, Yxr, Zibm_r, 2},
   745  }
   746  
   747  var yinsr = []ytab{
   748  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   749  }
   750  
   751  var ypsdq = []ytab{
   752  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   753  }
   754  
   755  var ymskb = []ytab{
   756  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   757  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   758  }
   759  
   760  var ycrc32l = []ytab{
   761  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   762  }
   763  
   764  var yprefetch = []ytab{
   765  	{Ym, Ynone, Ynone, Zm_o, 2},
   766  }
   767  
   768  var yaes = []ytab{
   769  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   770  }
   771  
   772  var yxbegin = []ytab{
   773  	{Ynone, Ynone, Ybr, Zjmp, 1},
   774  }
   775  
   776  var yxabort = []ytab{
   777  	{Yu8, Ynone, Ynone, Zib_, 1},
   778  }
   779  
   780  var ylddqu = []ytab{
   781  	{Ym, Ynone, Yxr, Zm_r, 1},
   782  }
   783  
   784  // VEX instructions that come in two forms:
   785  //	VTHING xmm2/m128, xmmV, xmm1
   786  //	VTHING ymm2/m256, ymmV, ymm1
   787  // The opcode array in the corresponding Optab entry
   788  // should contain the (VEX prefixes, opcode byte) pair
   789  // for each of the two forms.
   790  // For example, the entries for VPXOR are:
   791  //
   792  //	VPXOR xmm2/m128, xmmV, xmm1
   793  //	VEX.NDS.128.66.0F.WIG EF /r
   794  //
   795  //	VPXOR ymm2/m256, ymmV, ymm1
   796  //	VEX.NDS.256.66.0F.WIG EF /r
   797  //
   798  // The NDS/NDD/DDS part can be dropped, producing this
   799  // Optab entry:
   800  //
   801  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   802  //
   803  var yvex_xy3 = []ytab{
   804  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   805  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   806  }
   807  
   808  var yvex_ri3 = []ytab{
   809  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   810  }
   811  
   812  var yvex_xyi3 = []ytab{
   813  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   814  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   815  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   816  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   817  }
   818  
   819  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   820  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   821  }
   822  
   823  var yvex_xyi4 = []ytab{
   824  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   825  }
   826  
   827  var yvex_shift = []ytab{
   828  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   829  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   830  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   831  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   832  }
   833  
   834  var yvex_shift_dq = []ytab{
   835  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   836  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   837  }
   838  
   839  var yvex_r3 = []ytab{
   840  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   841  }
   842  
   843  var yvex_vmr3 = []ytab{
   844  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   845  }
   846  
   847  var yvex_xy2 = []ytab{
   848  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   849  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   850  }
   851  
   852  var yvex_xyr2 = []ytab{
   853  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   854  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   855  }
   856  
   857  var yvex_vmovdqa = []ytab{
   858  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   859  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   860  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   861  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   862  }
   863  
   864  var yvex_vmovntdq = []ytab{
   865  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   866  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   867  }
   868  
   869  var yvex_vpbroadcast = []ytab{
   870  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   871  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   872  }
   873  
   874  var yvex_vpbroadcast_sd = []ytab{
   875  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   876  }
   877  
   878  var ymmxmm0f38 = []ytab{
   879  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   880  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   881  }
   882  
   883  /*
   884   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   885   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   886   * the entry with the given p->as and then looks through the ytable for that
   887   * instruction (the second field in the optab struct) for a line whose first
   888   * two values match the Ytypes of the p->from and p->to operands.  The function
   889   * oclass in span.c computes the specific Ytype of an operand and then the set
   890   * of more general Ytypes that it satisfies is implied by the ycover table, set
   891   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   892   * from the more general 8-bit constants, but instinit says
   893   *
   894   *        ycover[Yi0*Ymax + Ys32] = 1;
   895   *        ycover[Yi1*Ymax + Ys32] = 1;
   896   *        ycover[Yi8*Ymax + Ys32] = 1;
   897   *
   898   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   899   * if that's what an instruction can handle.
   900   *
   901   * In parallel with the scan through the ytable for the appropriate line, there
   902   * is a z pointer that starts out pointing at the strange magic byte list in
   903   * the Optab struct.  With each step past a non-matching ytable line, z
   904   * advances by the 4th entry in the line.  When a matching line is found, that
   905   * z pointer has the extra data to use in laying down the instruction bytes.
   906   * The actual bytes laid down are a function of the 3rd entry in the line (that
   907   * is, the Ztype) and the z bytes.
   908   *
   909   * For example, let's look at AADDL.  The optab line says:
   910   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   911   *
   912   * and yaddl says
   913   *        uchar   yaddl[] =
   914   *        {
   915   *                Yi8,    Yml,    Zibo_m, 2,
   916   *                Yi32,   Yax,    Zil_,   1,
   917   *                Yi32,   Yml,    Zilo_m, 2,
   918   *                Yrl,    Yml,    Zr_m,   1,
   919   *                Yml,    Yrl,    Zm_r,   1,
   920   *                0
   921   *        };
   922   *
   923   * so there are 5 possible types of ADDL instruction that can be laid down, and
   924   * possible states used to lay them down (Ztype and z pointer, assuming z
   925   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   926   *
   927   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   928   *        Yi32, Yax -> Zil_, z+2 (0x05)
   929   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   930   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   931   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   932   *
   933   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   934   * relatively straightforward as this program goes.
   935   *
   936   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   937   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   938   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   939   * Zilo_m is the same but a long (32-bit) immediate.
   940   */
   941  var optab =
   942  /*	as, ytab, andproto, opcode */
   943  []Optab{
   944  	{obj.AXXX, nil, 0, [23]uint8{}},
   945  	{AAAA, ynone, P32, [23]uint8{0x37}},
   946  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   947  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   948  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   949  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   950  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   951  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   952  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   953  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   954  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   955  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   956  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   957  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   958  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   959  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   960  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   961  	{AADJSP, nil, 0, [23]uint8{}},
   962  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   963  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   964  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   965  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   966  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   967  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   968  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   969  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   970  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   971  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   972  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   973  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   974  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   975  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   976  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   977  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   978  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   979  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   980  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   981  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   982  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   983  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   984  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   985  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   986  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   987  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   988  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   989  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   990  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   991  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   992  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   993  	{ABYTE, ybyte, Px, [23]uint8{1}},
   994  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   995  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   996  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   997  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   998  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   999  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1000  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1001  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1002  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1003  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1004  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1005  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1006  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1007  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1008  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1009  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1010  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1011  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1012  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1013  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1014  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1015  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1016  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1017  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1018  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1019  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1020  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1021  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1022  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1023  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1024  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1025  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1026  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1027  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1028  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1029  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1030  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1031  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1032  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1033  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1034  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1035  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1036  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1037  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1038  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1039  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1040  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1041  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1042  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1043  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1044  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1045  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1046  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1047  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1048  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1049  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1050  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1051  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1052  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1053  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1055  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1056  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1057  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1058  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1059  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1060  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1061  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1062  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1063  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1064  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1065  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1066  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1067  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1068  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1069  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1070  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1071  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1072  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1073  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1074  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1075  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1076  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1077  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1078  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1079  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1080  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1081  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1082  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1083  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1084  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1085  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1086  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1087  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1088  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1089  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1090  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1091  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1092  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1093  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1094  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1095  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1096  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1097  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1098  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1099  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1100  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1101  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1102  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1103  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1104  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1105  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1106  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1107  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1108  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1109  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1110  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1111  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1112  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1113  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1114  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1116  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1117  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1118  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1119  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1120  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1121  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1122  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1123  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1124  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1125  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1126  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1127  	{AINT, yint, Px, [23]uint8{0xcd}},
  1128  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1129  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1130  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1131  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1132  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1133  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1134  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1135  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1136  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1137  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1138  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1139  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1140  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1141  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1142  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1143  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1144  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1145  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1146  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1147  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1148  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1149  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1150  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1151  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1152  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1153  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1154  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1155  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1156  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1157  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1158  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1159  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1160  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1161  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1162  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1163  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1164  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1165  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1166  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1167  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1168  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1169  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1170  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1171  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1172  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1173  	{ALONG, ybyte, Px, [23]uint8{4}},
  1174  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1175  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1176  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1177  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1178  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1179  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1180  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1181  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1182  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1183  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1184  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1185  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1186  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1187  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1188  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1189  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1190  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1191  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1192  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1193  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1194  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1195  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1196  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1197  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1198  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1199  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1200  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1201  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1202  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1203  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1204  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1205  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1206  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1207  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1208  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1209  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1210  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1211  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1212  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1213  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1214  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1215  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1216  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1217  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1218  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1219  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1220  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1221  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1222  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1223  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1224  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1225  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1226  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1227  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1228  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1229  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1230  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1231  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1232  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1233  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1234  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1235  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1236  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1237  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1238  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1239  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1240  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1241  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1242  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1243  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1244  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1245  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1246  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1247  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1248  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1249  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1250  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1251  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1252  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1253  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1254  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1255  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1256  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1257  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1258  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1259  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1260  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1261  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1262  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1263  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1264  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1265  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1266  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1267  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1268  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1269  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1270  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1271  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1272  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1273  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1274  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1275  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1276  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1277  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1278  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1279  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1280  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1281  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1282  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1283  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1284  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1285  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1286  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1287  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1288  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1289  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1290  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1291  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1292  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1293  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1294  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1295  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1296  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1297  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1298  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1299  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1300  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1301  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1302  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1303  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1304  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1305  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1306  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1307  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1308  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1309  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1310  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1311  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1312  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1313  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1314  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1315  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1316  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1317  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1318  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1319  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1320  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1321  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1322  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1323  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1324  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1325  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1326  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1327  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1328  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1329  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1330  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1331  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1332  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1333  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1334  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1335  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1336  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1337  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1338  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1339  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1340  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1341  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1342  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1343  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1344  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1345  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1346  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1347  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1348  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1349  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1350  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1351  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1352  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1353  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1354  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1355  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1356  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1357  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1358  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1359  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1360  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1361  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1362  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1363  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1364  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1365  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1366  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1367  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1368  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1369  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1370  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1371  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1372  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1373  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1374  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1375  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1376  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1377  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1378  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1379  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1380  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1381  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1382  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1383  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1384  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1385  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1386  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1387  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1388  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1389  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1390  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1391  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1392  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1393  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1394  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1395  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1396  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1397  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1398  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1399  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1400  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1401  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1402  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1403  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1404  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1405  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1406  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1407  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1408  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1409  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1410  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1411  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1412  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1413  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1414  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1415  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1416  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1417  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1418  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1419  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1420  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1421  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1422  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1423  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1424  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1425  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1426  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1427  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1428  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1429  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1430  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1431  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1432  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1433  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1434  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1435  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1436  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1437  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1438  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1439  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1440  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1441  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1442  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1443  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1444  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1445  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1446  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1447  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1448  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1449  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1450  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1451  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1452  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1453  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1454  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1455  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1456  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1457  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1458  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1459  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1460  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1461  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1462  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1463  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1464  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1465  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1466  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1467  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1468  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1469  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1470  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1471  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1472  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1473  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1474  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1475  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1476  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1477  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1478  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1479  	{AWORD, ybyte, Px, [23]uint8{2}},
  1480  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1481  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1482  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1483  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1484  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1485  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1486  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1487  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1488  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1489  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1490  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1491  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1492  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1493  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1494  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1495  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1496  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1497  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1498  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1499  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1500  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1501  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1502  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1503  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1504  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1505  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1506  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1507  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1508  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1509  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1510  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1511  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1512  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1513  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1514  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1515  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1516  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1517  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1518  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1519  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1520  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1521  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1522  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1523  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1524  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1525  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1526  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1527  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1528  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1529  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1530  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1531  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1532  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1533  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1534  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1535  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1536  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1537  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1538  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1539  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1540  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1541  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1542  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1543  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1544  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1545  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1546  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1547  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1548  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1549  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1550  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1551  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1552  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1553  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1554  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1555  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1556  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1557  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1558  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1559  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1560  	{AFFREE, nil, 0, [23]uint8{}},
  1561  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1562  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1563  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1564  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1565  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1566  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1567  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1568  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1569  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1570  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1571  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1572  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1573  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1574  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1575  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1576  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1577  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1578  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1579  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1580  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1581  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1582  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1583  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1584  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1585  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1586  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1587  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1588  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1589  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1590  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1591  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1592  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1593  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1594  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1595  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1596  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1597  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1598  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1599  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1600  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1601  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1602  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1603  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1604  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1605  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1606  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1607  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1608  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1609  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1610  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1611  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1612  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1613  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1614  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1615  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1616  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1617  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1618  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1619  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1620  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1621  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1622  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1623  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1624  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1625  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1626  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1627  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1628  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1629  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1630  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1631  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1632  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1633  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1634  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1635  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1636  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1637  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1638  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1639  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1640  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1641  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1642  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1643  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1644  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1645  
  1646  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1647  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1648  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1649  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1650  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1651  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1652  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1653  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1654  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1655  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1656  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1657  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1658  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1659  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1660  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1661  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1662  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1663  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1664  
  1665  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1666  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1667  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1668  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1669  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1670  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1671  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1672  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1673  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1674  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1675  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1676  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1677  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1678  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1679  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1680  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1681  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1682  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1683  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1684  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1685  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1686  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1687  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1688  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1689  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1690  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1691  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1692  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1693  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1694  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1695  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1696  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1697  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1698  
  1699  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1700  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1701  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1702  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1703  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1704  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1705  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1706  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1707  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1708  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1710  	{obj.AEND, nil, 0, [23]uint8{}},
  1711  	{0, nil, 0, [23]uint8{}},
  1712  }
  1713  
  1714  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1715  
  1716  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1717  // This happens on systems like Solaris that call .so functions instead of system calls.
  1718  // It does not seem to be necessary for any other systems. This is probably working
  1719  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1720  // what that bug is. And this does fix it.
  1721  func isextern(s *obj.LSym) bool {
  1722  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1723  	return strings.HasPrefix(s.Name, "libc_")
  1724  }
  1725  
  1726  // single-instruction no-ops of various lengths.
  1727  // constructed by hand and disassembled with gdb to verify.
  1728  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1729  var nop = [][16]uint8{
  1730  	{0x90},
  1731  	{0x66, 0x90},
  1732  	{0x0F, 0x1F, 0x00},
  1733  	{0x0F, 0x1F, 0x40, 0x00},
  1734  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1735  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1736  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1737  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1738  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1739  }
  1740  
  1741  // Native Client rejects the repeated 0x66 prefix.
  1742  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1743  func fillnop(p []byte, n int) {
  1744  	var m int
  1745  
  1746  	for n > 0 {
  1747  		m = n
  1748  		if m > len(nop) {
  1749  			m = len(nop)
  1750  		}
  1751  		copy(p[:m], nop[m-1][:m])
  1752  		p = p[m:]
  1753  		n -= m
  1754  	}
  1755  }
  1756  
  1757  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1758  	s.Grow(int64(c) + int64(pad))
  1759  	fillnop(s.P[c:], int(pad))
  1760  	return c + pad
  1761  }
  1762  
  1763  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1764  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1765  		return l
  1766  	}
  1767  	return q
  1768  }
  1769  
  1770  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1771  	if s.P != nil {
  1772  		return
  1773  	}
  1774  
  1775  	if ycover[0] == 0 {
  1776  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1777  	}
  1778  
  1779  	var asmbuf AsmBuf
  1780  
  1781  	for p := s.Text; p != nil; p = p.Link {
  1782  		if p.To.Type == obj.TYPE_BRANCH {
  1783  			if p.Pcond == nil {
  1784  				p.Pcond = p
  1785  			}
  1786  		}
  1787  		if p.As == AADJSP {
  1788  			p.To.Type = obj.TYPE_REG
  1789  			p.To.Reg = REG_SP
  1790  			v := int32(-p.From.Offset)
  1791  			p.From.Offset = int64(v)
  1792  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1793  			if v < 0 {
  1794  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1795  				v = -v
  1796  				p.From.Offset = int64(v)
  1797  			}
  1798  
  1799  			if v == 0 {
  1800  				p.As = obj.ANOP
  1801  			}
  1802  		}
  1803  	}
  1804  
  1805  	var q *obj.Prog
  1806  	var count int64 // rough count of number of instructions
  1807  	for p := s.Text; p != nil; p = p.Link {
  1808  		count++
  1809  		p.Back = 2 // use short branches first time through
  1810  		q = p.Pcond
  1811  		if q != nil && (q.Back&2 != 0) {
  1812  			p.Back |= 1 // backward jump
  1813  			q.Back |= 4 // loop head
  1814  		}
  1815  
  1816  		if p.As == AADJSP {
  1817  			p.To.Type = obj.TYPE_REG
  1818  			p.To.Reg = REG_SP
  1819  			v := int32(-p.From.Offset)
  1820  			p.From.Offset = int64(v)
  1821  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1822  			if v < 0 {
  1823  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1824  				v = -v
  1825  				p.From.Offset = int64(v)
  1826  			}
  1827  
  1828  			if v == 0 {
  1829  				p.As = obj.ANOP
  1830  			}
  1831  		}
  1832  	}
  1833  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1834  
  1835  	n := 0
  1836  	var c int32
  1837  	errors := ctxt.Errors
  1838  	for {
  1839  		loop := int32(0)
  1840  		for i := range s.R {
  1841  			s.R[i] = obj.Reloc{}
  1842  		}
  1843  		s.R = s.R[:0]
  1844  		s.P = s.P[:0]
  1845  		c = 0
  1846  		for p := s.Text; p != nil; p = p.Link {
  1847  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1848  
  1849  				// pad everything to avoid crossing 32-byte boundary
  1850  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1851  					c = naclpad(ctxt, s, c, -c&31)
  1852  				}
  1853  
  1854  				// pad call deferreturn to start at 32-byte boundary
  1855  				// so that subtracting 5 in jmpdefer will jump back
  1856  				// to that boundary and rerun the call.
  1857  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1858  					c = naclpad(ctxt, s, c, -c&31)
  1859  				}
  1860  
  1861  				// pad call to end at 32-byte boundary
  1862  				if p.As == obj.ACALL {
  1863  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1864  				}
  1865  
  1866  				// the linker treats REP and STOSQ as different instructions
  1867  				// but in fact the REP is a prefix on the STOSQ.
  1868  				// make sure REP has room for 2 more bytes, so that
  1869  				// padding will not be inserted before the next instruction.
  1870  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1871  					c = naclpad(ctxt, s, c, -c&31)
  1872  				}
  1873  
  1874  				// same for LOCK.
  1875  				// various instructions follow; the longest is 4 bytes.
  1876  				// give ourselves 8 bytes so as to avoid surprises.
  1877  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1878  					c = naclpad(ctxt, s, c, -c&31)
  1879  				}
  1880  			}
  1881  
  1882  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1883  				// pad with NOPs
  1884  				v := -c & (LoopAlign - 1)
  1885  
  1886  				if v <= MaxLoopPad {
  1887  					s.Grow(int64(c) + int64(v))
  1888  					fillnop(s.P[c:], int(v))
  1889  					c += v
  1890  				}
  1891  			}
  1892  
  1893  			p.Pc = int64(c)
  1894  
  1895  			// process forward jumps to p
  1896  			for q = p.Rel; q != nil; q = q.Forwd {
  1897  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1898  				if q.Back&2 != 0 { // short
  1899  					if v > 127 {
  1900  						loop++
  1901  						q.Back ^= 2
  1902  					}
  1903  
  1904  					if q.As == AJCXZL || q.As == AXBEGIN {
  1905  						s.P[q.Pc+2] = byte(v)
  1906  					} else {
  1907  						s.P[q.Pc+1] = byte(v)
  1908  					}
  1909  				} else {
  1910  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1911  				}
  1912  			}
  1913  
  1914  			p.Rel = nil
  1915  
  1916  			p.Pc = int64(c)
  1917  			asmbuf.asmins(ctxt, s, p)
  1918  			m := asmbuf.Len()
  1919  			if int(p.Isize) != m {
  1920  				p.Isize = uint8(m)
  1921  				loop++
  1922  			}
  1923  
  1924  			s.Grow(p.Pc + int64(m))
  1925  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1926  			c += int32(m)
  1927  		}
  1928  
  1929  		n++
  1930  		if n > 20 {
  1931  			ctxt.Diag("span must be looping")
  1932  			log.Fatalf("loop")
  1933  		}
  1934  		if loop == 0 {
  1935  			break
  1936  		}
  1937  		if ctxt.Errors > errors {
  1938  			return
  1939  		}
  1940  	}
  1941  
  1942  	if ctxt.Headtype == obj.Hnacl {
  1943  		c = naclpad(ctxt, s, c, -c&31)
  1944  	}
  1945  
  1946  	s.Size = int64(c)
  1947  
  1948  	if false { /* debug['a'] > 1 */
  1949  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1950  		var i int
  1951  		for i = 0; i < len(s.P); i++ {
  1952  			fmt.Printf(" %.2x", s.P[i])
  1953  			if i%16 == 15 {
  1954  				fmt.Printf("\n  %.6x", uint(i+1))
  1955  			}
  1956  		}
  1957  
  1958  		if i%16 != 0 {
  1959  			fmt.Printf("\n")
  1960  		}
  1961  
  1962  		for i := 0; i < len(s.R); i++ {
  1963  			r := &s.R[i]
  1964  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1965  		}
  1966  	}
  1967  }
  1968  
  1969  func instinit(ctxt *obj.Link) {
  1970  	if ycover[0] != 0 {
  1971  		// Already initialized; stop now.
  1972  		// This happens in the cmd/asm tests,
  1973  		// each of which re-initializes the arch.
  1974  		return
  1975  	}
  1976  
  1977  	switch ctxt.Headtype {
  1978  	case obj.Hplan9:
  1979  		plan9privates = ctxt.Lookup("_privates", 0)
  1980  	case obj.Hnacl:
  1981  		deferreturn = ctxt.Lookup("runtime.deferreturn", 0)
  1982  	}
  1983  
  1984  	for i := 1; optab[i].as != 0; i++ {
  1985  		c := optab[i].as
  1986  		if opindex[c&obj.AMask] != nil {
  1987  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  1988  		}
  1989  		opindex[c&obj.AMask] = &optab[i]
  1990  	}
  1991  
  1992  	for i := 0; i < Ymax; i++ {
  1993  		ycover[i*Ymax+i] = 1
  1994  	}
  1995  
  1996  	ycover[Yi0*Ymax+Yi8] = 1
  1997  	ycover[Yi1*Ymax+Yi8] = 1
  1998  	ycover[Yu7*Ymax+Yi8] = 1
  1999  
  2000  	ycover[Yi0*Ymax+Yu7] = 1
  2001  	ycover[Yi1*Ymax+Yu7] = 1
  2002  
  2003  	ycover[Yi0*Ymax+Yu8] = 1
  2004  	ycover[Yi1*Ymax+Yu8] = 1
  2005  	ycover[Yu7*Ymax+Yu8] = 1
  2006  
  2007  	ycover[Yi0*Ymax+Ys32] = 1
  2008  	ycover[Yi1*Ymax+Ys32] = 1
  2009  	ycover[Yu7*Ymax+Ys32] = 1
  2010  	ycover[Yu8*Ymax+Ys32] = 1
  2011  	ycover[Yi8*Ymax+Ys32] = 1
  2012  
  2013  	ycover[Yi0*Ymax+Yi32] = 1
  2014  	ycover[Yi1*Ymax+Yi32] = 1
  2015  	ycover[Yu7*Ymax+Yi32] = 1
  2016  	ycover[Yu8*Ymax+Yi32] = 1
  2017  	ycover[Yi8*Ymax+Yi32] = 1
  2018  	ycover[Ys32*Ymax+Yi32] = 1
  2019  
  2020  	ycover[Yi0*Ymax+Yi64] = 1
  2021  	ycover[Yi1*Ymax+Yi64] = 1
  2022  	ycover[Yu7*Ymax+Yi64] = 1
  2023  	ycover[Yu8*Ymax+Yi64] = 1
  2024  	ycover[Yi8*Ymax+Yi64] = 1
  2025  	ycover[Ys32*Ymax+Yi64] = 1
  2026  	ycover[Yi32*Ymax+Yi64] = 1
  2027  
  2028  	ycover[Yal*Ymax+Yrb] = 1
  2029  	ycover[Ycl*Ymax+Yrb] = 1
  2030  	ycover[Yax*Ymax+Yrb] = 1
  2031  	ycover[Ycx*Ymax+Yrb] = 1
  2032  	ycover[Yrx*Ymax+Yrb] = 1
  2033  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2034  
  2035  	ycover[Ycl*Ymax+Ycx] = 1
  2036  
  2037  	ycover[Yax*Ymax+Yrx] = 1
  2038  	ycover[Ycx*Ymax+Yrx] = 1
  2039  
  2040  	ycover[Yax*Ymax+Yrl] = 1
  2041  	ycover[Ycx*Ymax+Yrl] = 1
  2042  	ycover[Yrx*Ymax+Yrl] = 1
  2043  	ycover[Yrl32*Ymax+Yrl] = 1
  2044  
  2045  	ycover[Yf0*Ymax+Yrf] = 1
  2046  
  2047  	ycover[Yal*Ymax+Ymb] = 1
  2048  	ycover[Ycl*Ymax+Ymb] = 1
  2049  	ycover[Yax*Ymax+Ymb] = 1
  2050  	ycover[Ycx*Ymax+Ymb] = 1
  2051  	ycover[Yrx*Ymax+Ymb] = 1
  2052  	ycover[Yrb*Ymax+Ymb] = 1
  2053  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2054  	ycover[Ym*Ymax+Ymb] = 1
  2055  
  2056  	ycover[Yax*Ymax+Yml] = 1
  2057  	ycover[Ycx*Ymax+Yml] = 1
  2058  	ycover[Yrx*Ymax+Yml] = 1
  2059  	ycover[Yrl*Ymax+Yml] = 1
  2060  	ycover[Yrl32*Ymax+Yml] = 1
  2061  	ycover[Ym*Ymax+Yml] = 1
  2062  
  2063  	ycover[Yax*Ymax+Ymm] = 1
  2064  	ycover[Ycx*Ymax+Ymm] = 1
  2065  	ycover[Yrx*Ymax+Ymm] = 1
  2066  	ycover[Yrl*Ymax+Ymm] = 1
  2067  	ycover[Yrl32*Ymax+Ymm] = 1
  2068  	ycover[Ym*Ymax+Ymm] = 1
  2069  	ycover[Ymr*Ymax+Ymm] = 1
  2070  
  2071  	ycover[Ym*Ymax+Yxm] = 1
  2072  	ycover[Yxr*Ymax+Yxm] = 1
  2073  
  2074  	ycover[Ym*Ymax+Yym] = 1
  2075  	ycover[Yyr*Ymax+Yym] = 1
  2076  
  2077  	for i := 0; i < MAXREG; i++ {
  2078  		reg[i] = -1
  2079  		if i >= REG_AL && i <= REG_R15B {
  2080  			reg[i] = (i - REG_AL) & 7
  2081  			if i >= REG_SPB && i <= REG_DIB {
  2082  				regrex[i] = 0x40
  2083  			}
  2084  			if i >= REG_R8B && i <= REG_R15B {
  2085  				regrex[i] = Rxr | Rxx | Rxb
  2086  			}
  2087  		}
  2088  
  2089  		if i >= REG_AH && i <= REG_BH {
  2090  			reg[i] = 4 + ((i - REG_AH) & 7)
  2091  		}
  2092  		if i >= REG_AX && i <= REG_R15 {
  2093  			reg[i] = (i - REG_AX) & 7
  2094  			if i >= REG_R8 {
  2095  				regrex[i] = Rxr | Rxx | Rxb
  2096  			}
  2097  		}
  2098  
  2099  		if i >= REG_F0 && i <= REG_F0+7 {
  2100  			reg[i] = (i - REG_F0) & 7
  2101  		}
  2102  		if i >= REG_M0 && i <= REG_M0+7 {
  2103  			reg[i] = (i - REG_M0) & 7
  2104  		}
  2105  		if i >= REG_X0 && i <= REG_X0+15 {
  2106  			reg[i] = (i - REG_X0) & 7
  2107  			if i >= REG_X0+8 {
  2108  				regrex[i] = Rxr | Rxx | Rxb
  2109  			}
  2110  		}
  2111  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2112  			reg[i] = (i - REG_Y0) & 7
  2113  			if i >= REG_Y0+8 {
  2114  				regrex[i] = Rxr | Rxx | Rxb
  2115  			}
  2116  		}
  2117  
  2118  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2119  			regrex[i] = Rxr
  2120  		}
  2121  	}
  2122  }
  2123  
  2124  var isAndroid = (obj.GOOS == "android")
  2125  
  2126  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2127  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2128  		return 0
  2129  	}
  2130  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2131  		switch a.Reg {
  2132  		case REG_CS:
  2133  			return 0x2e
  2134  
  2135  		case REG_DS:
  2136  			return 0x3e
  2137  
  2138  		case REG_ES:
  2139  			return 0x26
  2140  
  2141  		case REG_FS:
  2142  			return 0x64
  2143  
  2144  		case REG_GS:
  2145  			return 0x65
  2146  
  2147  		case REG_TLS:
  2148  			// NOTE: Systems listed here should be only systems that
  2149  			// support direct TLS references like 8(TLS) implemented as
  2150  			// direct references from FS or GS. Systems that require
  2151  			// the initial-exec model, where you load the TLS base into
  2152  			// a register and then index from that register, do not reach
  2153  			// this code and should not be listed.
  2154  			if ctxt.Arch.Family == sys.I386 {
  2155  				switch ctxt.Headtype {
  2156  				default:
  2157  					if isAndroid {
  2158  						return 0x65 // GS
  2159  					}
  2160  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2161  
  2162  				case obj.Hdarwin,
  2163  					obj.Hdragonfly,
  2164  					obj.Hfreebsd,
  2165  					obj.Hnetbsd,
  2166  					obj.Hopenbsd:
  2167  					return 0x65 // GS
  2168  				}
  2169  			}
  2170  
  2171  			switch ctxt.Headtype {
  2172  			default:
  2173  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2174  
  2175  			case obj.Hlinux:
  2176  				if isAndroid {
  2177  					return 0x64 // FS
  2178  				}
  2179  
  2180  				if ctxt.Flag_shared {
  2181  					log.Fatalf("unknown TLS base register for linux with -shared")
  2182  				} else {
  2183  					return 0x64 // FS
  2184  				}
  2185  
  2186  			case obj.Hdragonfly,
  2187  				obj.Hfreebsd,
  2188  				obj.Hnetbsd,
  2189  				obj.Hopenbsd,
  2190  				obj.Hsolaris:
  2191  				return 0x64 // FS
  2192  
  2193  			case obj.Hdarwin:
  2194  				return 0x65 // GS
  2195  			}
  2196  		}
  2197  	}
  2198  
  2199  	if ctxt.Arch.Family == sys.I386 {
  2200  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2201  			// When building for inclusion into a shared library, an instruction of the form
  2202  			//     MOVL 0(CX)(TLS*1), AX
  2203  			// becomes
  2204  			//     mov %gs:(%ecx), %eax
  2205  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2206  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2207  			// a shared library the instruction it becomes
  2208  			//     mov 0x0(%ecx), $eax
  2209  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2210  			if a.Offset != 0 {
  2211  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2212  			}
  2213  			return 0x65 // GS
  2214  		}
  2215  		return 0
  2216  	}
  2217  
  2218  	switch a.Index {
  2219  	case REG_CS:
  2220  		return 0x2e
  2221  
  2222  	case REG_DS:
  2223  		return 0x3e
  2224  
  2225  	case REG_ES:
  2226  		return 0x26
  2227  
  2228  	case REG_TLS:
  2229  		if ctxt.Flag_shared {
  2230  			// When building for inclusion into a shared library, an instruction of the form
  2231  			//     MOV 0(CX)(TLS*1), AX
  2232  			// becomes
  2233  			//     mov %fs:(%rcx), %rax
  2234  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2235  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2236  			// a shared library the instruction does not require a prefix.
  2237  			if a.Offset != 0 {
  2238  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2239  			}
  2240  			return 0x64
  2241  		}
  2242  
  2243  	case REG_FS:
  2244  		return 0x64
  2245  
  2246  	case REG_GS:
  2247  		return 0x65
  2248  	}
  2249  
  2250  	return 0
  2251  }
  2252  
  2253  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2254  	switch a.Type {
  2255  	case obj.TYPE_NONE:
  2256  		return Ynone
  2257  
  2258  	case obj.TYPE_BRANCH:
  2259  		return Ybr
  2260  
  2261  	case obj.TYPE_INDIR:
  2262  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2263  			return Yindir
  2264  		}
  2265  		return Yxxx
  2266  
  2267  	case obj.TYPE_MEM:
  2268  		if a.Index == REG_SP {
  2269  			// Can't use SP as the index register
  2270  			return Yxxx
  2271  		}
  2272  		if ctxt.Arch.Family == sys.AMD64 {
  2273  			switch a.Name {
  2274  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2275  				// Global variables can't use index registers and their
  2276  				// base register is %rip (%rip is encoded as REG_NONE).
  2277  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2278  					return Yxxx
  2279  				}
  2280  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2281  				// These names must have a base of SP.  The old compiler
  2282  				// uses 0 for the base register. SSA uses REG_SP.
  2283  				if a.Reg != REG_SP && a.Reg != 0 {
  2284  					return Yxxx
  2285  				}
  2286  			case obj.NAME_NONE:
  2287  				// everything is ok
  2288  			default:
  2289  				// unknown name
  2290  				return Yxxx
  2291  			}
  2292  		}
  2293  		return Ym
  2294  
  2295  	case obj.TYPE_ADDR:
  2296  		switch a.Name {
  2297  		case obj.NAME_GOTREF:
  2298  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2299  			return Yxxx
  2300  
  2301  		case obj.NAME_EXTERN,
  2302  			obj.NAME_STATIC:
  2303  			if a.Sym != nil && isextern(a.Sym) || (ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared) {
  2304  				return Yi32
  2305  			}
  2306  			return Yiauto // use pc-relative addressing
  2307  
  2308  		case obj.NAME_AUTO,
  2309  			obj.NAME_PARAM:
  2310  			return Yiauto
  2311  		}
  2312  
  2313  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2314  		// and got Yi32 in an earlier version of this code.
  2315  		// Keep doing that until we fix yduff etc.
  2316  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2317  			return Yi32
  2318  		}
  2319  
  2320  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2321  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2322  		}
  2323  		fallthrough
  2324  
  2325  		// fall through
  2326  
  2327  	case obj.TYPE_CONST:
  2328  		if a.Sym != nil {
  2329  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2330  		}
  2331  
  2332  		v := a.Offset
  2333  		if ctxt.Arch.Family == sys.I386 {
  2334  			v = int64(int32(v))
  2335  		}
  2336  		if v == 0 {
  2337  			if p.Mark&PRESERVEFLAGS != 0 {
  2338  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2339  				return Yu7
  2340  			}
  2341  			return Yi0
  2342  		}
  2343  		if v == 1 {
  2344  			return Yi1
  2345  		}
  2346  		if v >= 0 && v <= 127 {
  2347  			return Yu7
  2348  		}
  2349  		if v >= 0 && v <= 255 {
  2350  			return Yu8
  2351  		}
  2352  		if v >= -128 && v <= 127 {
  2353  			return Yi8
  2354  		}
  2355  		if ctxt.Arch.Family == sys.I386 {
  2356  			return Yi32
  2357  		}
  2358  		l := int32(v)
  2359  		if int64(l) == v {
  2360  			return Ys32 /* can sign extend */
  2361  		}
  2362  		if v>>32 == 0 {
  2363  			return Yi32 /* unsigned */
  2364  		}
  2365  		return Yi64
  2366  
  2367  	case obj.TYPE_TEXTSIZE:
  2368  		return Ytextsize
  2369  	}
  2370  
  2371  	if a.Type != obj.TYPE_REG {
  2372  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2373  		return Yxxx
  2374  	}
  2375  
  2376  	switch a.Reg {
  2377  	case REG_AL:
  2378  		return Yal
  2379  
  2380  	case REG_AX:
  2381  		return Yax
  2382  
  2383  		/*
  2384  			case REG_SPB:
  2385  		*/
  2386  	case REG_BPB,
  2387  		REG_SIB,
  2388  		REG_DIB,
  2389  		REG_R8B,
  2390  		REG_R9B,
  2391  		REG_R10B,
  2392  		REG_R11B,
  2393  		REG_R12B,
  2394  		REG_R13B,
  2395  		REG_R14B,
  2396  		REG_R15B:
  2397  		if ctxt.Arch.Family == sys.I386 {
  2398  			return Yxxx
  2399  		}
  2400  		fallthrough
  2401  
  2402  	case REG_DL,
  2403  		REG_BL,
  2404  		REG_AH,
  2405  		REG_CH,
  2406  		REG_DH,
  2407  		REG_BH:
  2408  		return Yrb
  2409  
  2410  	case REG_CL:
  2411  		return Ycl
  2412  
  2413  	case REG_CX:
  2414  		return Ycx
  2415  
  2416  	case REG_DX, REG_BX:
  2417  		return Yrx
  2418  
  2419  	case REG_R8, /* not really Yrl */
  2420  		REG_R9,
  2421  		REG_R10,
  2422  		REG_R11,
  2423  		REG_R12,
  2424  		REG_R13,
  2425  		REG_R14,
  2426  		REG_R15:
  2427  		if ctxt.Arch.Family == sys.I386 {
  2428  			return Yxxx
  2429  		}
  2430  		fallthrough
  2431  
  2432  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2433  		if ctxt.Arch.Family == sys.I386 {
  2434  			return Yrl32
  2435  		}
  2436  		return Yrl
  2437  
  2438  	case REG_F0 + 0:
  2439  		return Yf0
  2440  
  2441  	case REG_F0 + 1,
  2442  		REG_F0 + 2,
  2443  		REG_F0 + 3,
  2444  		REG_F0 + 4,
  2445  		REG_F0 + 5,
  2446  		REG_F0 + 6,
  2447  		REG_F0 + 7:
  2448  		return Yrf
  2449  
  2450  	case REG_M0 + 0,
  2451  		REG_M0 + 1,
  2452  		REG_M0 + 2,
  2453  		REG_M0 + 3,
  2454  		REG_M0 + 4,
  2455  		REG_M0 + 5,
  2456  		REG_M0 + 6,
  2457  		REG_M0 + 7:
  2458  		return Ymr
  2459  
  2460  	case REG_X0 + 0,
  2461  		REG_X0 + 1,
  2462  		REG_X0 + 2,
  2463  		REG_X0 + 3,
  2464  		REG_X0 + 4,
  2465  		REG_X0 + 5,
  2466  		REG_X0 + 6,
  2467  		REG_X0 + 7,
  2468  		REG_X0 + 8,
  2469  		REG_X0 + 9,
  2470  		REG_X0 + 10,
  2471  		REG_X0 + 11,
  2472  		REG_X0 + 12,
  2473  		REG_X0 + 13,
  2474  		REG_X0 + 14,
  2475  		REG_X0 + 15:
  2476  		return Yxr
  2477  
  2478  	case REG_Y0 + 0,
  2479  		REG_Y0 + 1,
  2480  		REG_Y0 + 2,
  2481  		REG_Y0 + 3,
  2482  		REG_Y0 + 4,
  2483  		REG_Y0 + 5,
  2484  		REG_Y0 + 6,
  2485  		REG_Y0 + 7,
  2486  		REG_Y0 + 8,
  2487  		REG_Y0 + 9,
  2488  		REG_Y0 + 10,
  2489  		REG_Y0 + 11,
  2490  		REG_Y0 + 12,
  2491  		REG_Y0 + 13,
  2492  		REG_Y0 + 14,
  2493  		REG_Y0 + 15:
  2494  		return Yyr
  2495  
  2496  	case REG_CS:
  2497  		return Ycs
  2498  	case REG_SS:
  2499  		return Yss
  2500  	case REG_DS:
  2501  		return Yds
  2502  	case REG_ES:
  2503  		return Yes
  2504  	case REG_FS:
  2505  		return Yfs
  2506  	case REG_GS:
  2507  		return Ygs
  2508  	case REG_TLS:
  2509  		return Ytls
  2510  
  2511  	case REG_GDTR:
  2512  		return Ygdtr
  2513  	case REG_IDTR:
  2514  		return Yidtr
  2515  	case REG_LDTR:
  2516  		return Yldtr
  2517  	case REG_MSW:
  2518  		return Ymsw
  2519  	case REG_TASK:
  2520  		return Ytask
  2521  
  2522  	case REG_CR + 0:
  2523  		return Ycr0
  2524  	case REG_CR + 1:
  2525  		return Ycr1
  2526  	case REG_CR + 2:
  2527  		return Ycr2
  2528  	case REG_CR + 3:
  2529  		return Ycr3
  2530  	case REG_CR + 4:
  2531  		return Ycr4
  2532  	case REG_CR + 5:
  2533  		return Ycr5
  2534  	case REG_CR + 6:
  2535  		return Ycr6
  2536  	case REG_CR + 7:
  2537  		return Ycr7
  2538  	case REG_CR + 8:
  2539  		return Ycr8
  2540  
  2541  	case REG_DR + 0:
  2542  		return Ydr0
  2543  	case REG_DR + 1:
  2544  		return Ydr1
  2545  	case REG_DR + 2:
  2546  		return Ydr2
  2547  	case REG_DR + 3:
  2548  		return Ydr3
  2549  	case REG_DR + 4:
  2550  		return Ydr4
  2551  	case REG_DR + 5:
  2552  		return Ydr5
  2553  	case REG_DR + 6:
  2554  		return Ydr6
  2555  	case REG_DR + 7:
  2556  		return Ydr7
  2557  
  2558  	case REG_TR + 0:
  2559  		return Ytr0
  2560  	case REG_TR + 1:
  2561  		return Ytr1
  2562  	case REG_TR + 2:
  2563  		return Ytr2
  2564  	case REG_TR + 3:
  2565  		return Ytr3
  2566  	case REG_TR + 4:
  2567  		return Ytr4
  2568  	case REG_TR + 5:
  2569  		return Ytr5
  2570  	case REG_TR + 6:
  2571  		return Ytr6
  2572  	case REG_TR + 7:
  2573  		return Ytr7
  2574  	}
  2575  
  2576  	return Yxxx
  2577  }
  2578  
  2579  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2580  // and hold assembly state.
  2581  type AsmBuf struct {
  2582  	buf     [100]byte
  2583  	off     int
  2584  	rexflag int
  2585  	vexflag int
  2586  	rep     int
  2587  	repn    int
  2588  	lock    bool
  2589  }
  2590  
  2591  // Put1 appends one byte to the end of the buffer.
  2592  func (a *AsmBuf) Put1(x byte) {
  2593  	a.buf[a.off] = x
  2594  	a.off++
  2595  }
  2596  
  2597  // Put2 appends two bytes to the end of the buffer.
  2598  func (a *AsmBuf) Put2(x, y byte) {
  2599  	a.buf[a.off+0] = x
  2600  	a.buf[a.off+1] = y
  2601  	a.off += 2
  2602  }
  2603  
  2604  // Put3 appends three bytes to the end of the buffer.
  2605  func (a *AsmBuf) Put3(x, y, z byte) {
  2606  	a.buf[a.off+0] = x
  2607  	a.buf[a.off+1] = y
  2608  	a.buf[a.off+2] = z
  2609  	a.off += 3
  2610  }
  2611  
  2612  // Put4 appends four bytes to the end of the buffer.
  2613  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2614  	a.buf[a.off+0] = x
  2615  	a.buf[a.off+1] = y
  2616  	a.buf[a.off+2] = z
  2617  	a.buf[a.off+3] = w
  2618  	a.off += 4
  2619  }
  2620  
  2621  // PutInt16 writes v into the buffer using little-endian encoding.
  2622  func (a *AsmBuf) PutInt16(v int16) {
  2623  	a.buf[a.off+0] = byte(v)
  2624  	a.buf[a.off+1] = byte(v >> 8)
  2625  	a.off += 2
  2626  }
  2627  
  2628  // PutInt32 writes v into the buffer using little-endian encoding.
  2629  func (a *AsmBuf) PutInt32(v int32) {
  2630  	a.buf[a.off+0] = byte(v)
  2631  	a.buf[a.off+1] = byte(v >> 8)
  2632  	a.buf[a.off+2] = byte(v >> 16)
  2633  	a.buf[a.off+3] = byte(v >> 24)
  2634  	a.off += 4
  2635  }
  2636  
  2637  // PutInt64 writes v into the buffer using little-endian encoding.
  2638  func (a *AsmBuf) PutInt64(v int64) {
  2639  	a.buf[a.off+0] = byte(v)
  2640  	a.buf[a.off+1] = byte(v >> 8)
  2641  	a.buf[a.off+2] = byte(v >> 16)
  2642  	a.buf[a.off+3] = byte(v >> 24)
  2643  	a.buf[a.off+4] = byte(v >> 32)
  2644  	a.buf[a.off+5] = byte(v >> 40)
  2645  	a.buf[a.off+6] = byte(v >> 48)
  2646  	a.buf[a.off+7] = byte(v >> 56)
  2647  	a.off += 8
  2648  }
  2649  
  2650  // Put copies b into the buffer.
  2651  func (a *AsmBuf) Put(b []byte) {
  2652  	copy(a.buf[a.off:], b)
  2653  	a.off += len(b)
  2654  }
  2655  
  2656  // Insert inserts b at offset i.
  2657  func (a *AsmBuf) Insert(i int, b byte) {
  2658  	a.off++
  2659  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2660  	a.buf[i] = b
  2661  }
  2662  
  2663  // Last returns the byte at the end of the buffer.
  2664  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2665  
  2666  // Len returns the length of the buffer.
  2667  func (a *AsmBuf) Len() int { return a.off }
  2668  
  2669  // Bytes returns the contents of the buffer.
  2670  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2671  
  2672  // Reset empties the buffer.
  2673  func (a *AsmBuf) Reset() { a.off = 0 }
  2674  
  2675  // At returns the byte at offset i.
  2676  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2677  
  2678  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2679  	var i int
  2680  
  2681  	switch index {
  2682  	default:
  2683  		goto bad
  2684  
  2685  	case REG_NONE:
  2686  		i = 4 << 3
  2687  		goto bas
  2688  
  2689  	case REG_R8,
  2690  		REG_R9,
  2691  		REG_R10,
  2692  		REG_R11,
  2693  		REG_R12,
  2694  		REG_R13,
  2695  		REG_R14,
  2696  		REG_R15:
  2697  		if ctxt.Arch.Family == sys.I386 {
  2698  			goto bad
  2699  		}
  2700  		fallthrough
  2701  
  2702  	case REG_AX,
  2703  		REG_CX,
  2704  		REG_DX,
  2705  		REG_BX,
  2706  		REG_BP,
  2707  		REG_SI,
  2708  		REG_DI:
  2709  		i = reg[index] << 3
  2710  	}
  2711  
  2712  	switch scale {
  2713  	default:
  2714  		goto bad
  2715  
  2716  	case 1:
  2717  		break
  2718  
  2719  	case 2:
  2720  		i |= 1 << 6
  2721  
  2722  	case 4:
  2723  		i |= 2 << 6
  2724  
  2725  	case 8:
  2726  		i |= 3 << 6
  2727  	}
  2728  
  2729  bas:
  2730  	switch base {
  2731  	default:
  2732  		goto bad
  2733  
  2734  	case REG_NONE: /* must be mod=00 */
  2735  		i |= 5
  2736  
  2737  	case REG_R8,
  2738  		REG_R9,
  2739  		REG_R10,
  2740  		REG_R11,
  2741  		REG_R12,
  2742  		REG_R13,
  2743  		REG_R14,
  2744  		REG_R15:
  2745  		if ctxt.Arch.Family == sys.I386 {
  2746  			goto bad
  2747  		}
  2748  		fallthrough
  2749  
  2750  	case REG_AX,
  2751  		REG_CX,
  2752  		REG_DX,
  2753  		REG_BX,
  2754  		REG_SP,
  2755  		REG_BP,
  2756  		REG_SI,
  2757  		REG_DI:
  2758  		i |= reg[base]
  2759  	}
  2760  
  2761  	asmbuf.Put1(byte(i))
  2762  	return
  2763  
  2764  bad:
  2765  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2766  	asmbuf.Put1(0)
  2767  	return
  2768  }
  2769  
  2770  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2771  	var rel obj.Reloc
  2772  
  2773  	v := vaddr(ctxt, p, a, &rel)
  2774  	if rel.Siz != 0 {
  2775  		if rel.Siz != 4 {
  2776  			ctxt.Diag("bad reloc")
  2777  		}
  2778  		r := obj.Addrel(cursym)
  2779  		*r = rel
  2780  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2781  	}
  2782  
  2783  	asmbuf.PutInt32(int32(v))
  2784  }
  2785  
  2786  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2787  	if r != nil {
  2788  		*r = obj.Reloc{}
  2789  	}
  2790  
  2791  	switch a.Name {
  2792  	case obj.NAME_STATIC,
  2793  		obj.NAME_GOTREF,
  2794  		obj.NAME_EXTERN:
  2795  		s := a.Sym
  2796  		if r == nil {
  2797  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2798  			log.Fatalf("reloc")
  2799  		}
  2800  
  2801  		if a.Name == obj.NAME_GOTREF {
  2802  			r.Siz = 4
  2803  			r.Type = obj.R_GOTPCREL
  2804  		} else if isextern(s) || (ctxt.Arch.Family != sys.AMD64 && !ctxt.Flag_shared) {
  2805  			r.Siz = 4
  2806  			r.Type = obj.R_ADDR
  2807  		} else {
  2808  			r.Siz = 4
  2809  			r.Type = obj.R_PCREL
  2810  		}
  2811  
  2812  		r.Off = -1 // caller must fill in
  2813  		r.Sym = s
  2814  		r.Add = a.Offset
  2815  
  2816  		return 0
  2817  	}
  2818  
  2819  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2820  		if r == nil {
  2821  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2822  			log.Fatalf("reloc")
  2823  		}
  2824  
  2825  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == obj.Hdarwin {
  2826  			r.Type = obj.R_TLS_LE
  2827  			r.Siz = 4
  2828  			r.Off = -1 // caller must fill in
  2829  			r.Add = a.Offset
  2830  		}
  2831  		return 0
  2832  	}
  2833  
  2834  	return a.Offset
  2835  }
  2836  
  2837  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2838  	var base int
  2839  	var rel obj.Reloc
  2840  
  2841  	rex &= 0x40 | Rxr
  2842  	switch {
  2843  	case int64(int32(a.Offset)) == a.Offset:
  2844  		// Offset fits in sign-extended 32 bits.
  2845  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2846  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2847  		// This is allowed for assembly that wants to use 32-bit hex
  2848  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2849  	default:
  2850  		ctxt.Diag("offset too large in %s", p)
  2851  	}
  2852  	v := int32(a.Offset)
  2853  	rel.Siz = 0
  2854  
  2855  	switch a.Type {
  2856  	case obj.TYPE_ADDR:
  2857  		if a.Name == obj.NAME_NONE {
  2858  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2859  		}
  2860  		if a.Index == REG_TLS {
  2861  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2862  		}
  2863  		goto bad
  2864  
  2865  	case obj.TYPE_REG:
  2866  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2867  			goto bad
  2868  		}
  2869  		if v != 0 {
  2870  			goto bad
  2871  		}
  2872  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2873  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2874  		return
  2875  	}
  2876  
  2877  	if a.Type != obj.TYPE_MEM {
  2878  		goto bad
  2879  	}
  2880  
  2881  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2882  		base := int(a.Reg)
  2883  		switch a.Name {
  2884  		case obj.NAME_EXTERN,
  2885  			obj.NAME_GOTREF,
  2886  			obj.NAME_STATIC:
  2887  			if !isextern(a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2888  				goto bad
  2889  			}
  2890  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2891  				// The base register has already been set. It holds the PC
  2892  				// of this instruction returned by a PC-reading thunk.
  2893  				// See obj6.go:rewriteToPcrel.
  2894  			} else {
  2895  				base = REG_NONE
  2896  			}
  2897  			v = int32(vaddr(ctxt, p, a, &rel))
  2898  
  2899  		case obj.NAME_AUTO,
  2900  			obj.NAME_PARAM:
  2901  			base = REG_SP
  2902  		}
  2903  
  2904  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2905  		if base == REG_NONE {
  2906  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2907  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2908  			goto putrelv
  2909  		}
  2910  
  2911  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2912  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2913  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2914  			return
  2915  		}
  2916  
  2917  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2918  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2919  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2920  			asmbuf.Put1(byte(v))
  2921  			return
  2922  		}
  2923  
  2924  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2925  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2926  		goto putrelv
  2927  	}
  2928  
  2929  	base = int(a.Reg)
  2930  	switch a.Name {
  2931  	case obj.NAME_STATIC,
  2932  		obj.NAME_GOTREF,
  2933  		obj.NAME_EXTERN:
  2934  		if a.Sym == nil {
  2935  			ctxt.Diag("bad addr: %v", p)
  2936  		}
  2937  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2938  			// The base register has already been set. It holds the PC
  2939  			// of this instruction returned by a PC-reading thunk.
  2940  			// See obj6.go:rewriteToPcrel.
  2941  		} else {
  2942  			base = REG_NONE
  2943  		}
  2944  		v = int32(vaddr(ctxt, p, a, &rel))
  2945  
  2946  	case obj.NAME_AUTO,
  2947  		obj.NAME_PARAM:
  2948  		base = REG_SP
  2949  	}
  2950  
  2951  	if base == REG_TLS {
  2952  		v = int32(vaddr(ctxt, p, a, &rel))
  2953  	}
  2954  
  2955  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  2956  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2957  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  2958  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2959  				ctxt.Diag("%v has offset against gotref", p)
  2960  			}
  2961  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2962  			goto putrelv
  2963  		}
  2964  
  2965  		// temporary
  2966  		asmbuf.Put2(
  2967  			byte(0<<6|4<<0|r<<3), // sib present
  2968  			0<<6|4<<3|5<<0,       // DS:d32
  2969  		)
  2970  		goto putrelv
  2971  	}
  2972  
  2973  	if base == REG_SP || base == REG_R12 {
  2974  		if v == 0 {
  2975  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2976  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2977  			return
  2978  		}
  2979  
  2980  		if v >= -128 && v < 128 {
  2981  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2982  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2983  			asmbuf.Put1(byte(v))
  2984  			return
  2985  		}
  2986  
  2987  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2988  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2989  		goto putrelv
  2990  	}
  2991  
  2992  	if REG_AX <= base && base <= REG_R15 {
  2993  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2994  			rel = obj.Reloc{}
  2995  			rel.Type = obj.R_TLS_LE
  2996  			rel.Siz = 4
  2997  			rel.Sym = nil
  2998  			rel.Add = int64(v)
  2999  			v = 0
  3000  		}
  3001  
  3002  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3003  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3004  			return
  3005  		}
  3006  
  3007  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3008  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3009  			return
  3010  		}
  3011  
  3012  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3013  		goto putrelv
  3014  	}
  3015  
  3016  	goto bad
  3017  
  3018  putrelv:
  3019  	if rel.Siz != 0 {
  3020  		if rel.Siz != 4 {
  3021  			ctxt.Diag("bad rel")
  3022  			goto bad
  3023  		}
  3024  
  3025  		r := obj.Addrel(cursym)
  3026  		*r = rel
  3027  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3028  	}
  3029  
  3030  	asmbuf.PutInt32(v)
  3031  	return
  3032  
  3033  bad:
  3034  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3035  	return
  3036  }
  3037  
  3038  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3039  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3040  }
  3041  
  3042  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3043  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3044  }
  3045  
  3046  func bytereg(a *obj.Addr, t *uint8) {
  3047  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3048  		a.Reg += REG_AL - REG_AX
  3049  		*t = 0
  3050  	}
  3051  }
  3052  
  3053  func unbytereg(a *obj.Addr, t *uint8) {
  3054  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3055  		a.Reg += REG_AX - REG_AL
  3056  		*t = 0
  3057  	}
  3058  }
  3059  
  3060  const (
  3061  	E = 0xff
  3062  )
  3063  
  3064  var ymovtab = []Movtab{
  3065  	/* push */
  3066  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3067  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3068  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3069  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3070  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3071  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3072  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3073  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3074  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3075  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3076  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3077  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3078  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3079  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3080  
  3081  	/* pop */
  3082  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3083  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3084  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3085  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3086  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3087  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3088  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3089  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3090  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3091  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3092  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3093  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3094  
  3095  	/* mov seg */
  3096  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3097  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3098  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3099  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3100  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3101  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3102  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3103  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3104  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3105  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3106  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3107  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3108  
  3109  	/* mov cr */
  3110  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3111  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3112  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3113  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3114  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3115  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3116  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3117  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3118  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3119  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3120  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3121  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3122  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3123  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3124  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3125  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3126  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3127  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3128  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3129  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3130  
  3131  	/* mov dr */
  3132  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3133  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3134  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3135  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3136  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3137  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3138  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3139  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3140  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3141  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3142  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3143  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3144  
  3145  	/* mov tr */
  3146  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3147  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3148  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3149  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3150  
  3151  	/* lgdt, sgdt, lidt, sidt */
  3152  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3153  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3154  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3155  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3156  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3157  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3158  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3159  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3160  
  3161  	/* lldt, sldt */
  3162  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3163  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3164  
  3165  	/* lmsw, smsw */
  3166  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3167  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3168  
  3169  	/* ltr, str */
  3170  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3171  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3172  
  3173  	/* load full pointer - unsupported
  3174  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3175  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3176  	*/
  3177  
  3178  	/* double shift */
  3179  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3180  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3181  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3182  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3183  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3184  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3185  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3186  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3187  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3188  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3189  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3190  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3191  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3192  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3193  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3194  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3195  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3196  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3197  
  3198  	/* load TLS base */
  3199  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3200  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3201  	{0, 0, 0, 0, 0, [4]uint8{}},
  3202  }
  3203  
  3204  func isax(a *obj.Addr) bool {
  3205  	switch a.Reg {
  3206  	case REG_AX, REG_AL, REG_AH:
  3207  		return true
  3208  	}
  3209  
  3210  	if a.Index == REG_AX {
  3211  		return true
  3212  	}
  3213  	return false
  3214  }
  3215  
  3216  func subreg(p *obj.Prog, from int, to int) {
  3217  	if false { /* debug['Q'] */
  3218  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3219  	}
  3220  
  3221  	if int(p.From.Reg) == from {
  3222  		p.From.Reg = int16(to)
  3223  		p.Ft = 0
  3224  	}
  3225  
  3226  	if int(p.To.Reg) == from {
  3227  		p.To.Reg = int16(to)
  3228  		p.Tt = 0
  3229  	}
  3230  
  3231  	if int(p.From.Index) == from {
  3232  		p.From.Index = int16(to)
  3233  		p.Ft = 0
  3234  	}
  3235  
  3236  	if int(p.To.Index) == from {
  3237  		p.To.Index = int16(to)
  3238  		p.Tt = 0
  3239  	}
  3240  
  3241  	if false { /* debug['Q'] */
  3242  		fmt.Printf("%v\n", p)
  3243  	}
  3244  }
  3245  
  3246  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3247  	switch op {
  3248  	case Pm, Pe, Pf2, Pf3:
  3249  		if osize != 1 {
  3250  			if op != Pm {
  3251  				asmbuf.Put1(byte(op))
  3252  			}
  3253  			asmbuf.Put1(Pm)
  3254  			z++
  3255  			op = int(o.op[z])
  3256  			break
  3257  		}
  3258  		fallthrough
  3259  
  3260  	default:
  3261  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3262  			asmbuf.Put1(Pm)
  3263  		}
  3264  	}
  3265  
  3266  	asmbuf.Put1(byte(op))
  3267  	return z
  3268  }
  3269  
  3270  var bpduff1 = []byte{
  3271  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3272  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3273  }
  3274  
  3275  var bpduff2 = []byte{
  3276  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3277  }
  3278  
  3279  // Emit VEX prefix and opcode byte.
  3280  // The three addresses are the r/m, vvvv, and reg fields.
  3281  // The reg and rm arguments appear in the same order as the
  3282  // arguments to asmand, which typically follows the call to asmvex.
  3283  // The final two arguments are the VEX prefix (see encoding above)
  3284  // and the opcode byte.
  3285  // For details about vex prefix see:
  3286  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3287  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3288  	asmbuf.vexflag = 1
  3289  	rexR := 0
  3290  	if r != nil {
  3291  		rexR = regrex[r.Reg] & Rxr
  3292  	}
  3293  	rexB := 0
  3294  	rexX := 0
  3295  	if rm != nil {
  3296  		rexB = regrex[rm.Reg] & Rxb
  3297  		rexX = regrex[rm.Index] & Rxx
  3298  	}
  3299  	vexM := (vex >> 3) & 0xF
  3300  	vexWLP := vex & 0x87
  3301  	vexV := byte(0)
  3302  	if v != nil {
  3303  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3304  	}
  3305  	vexV ^= 0xF
  3306  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3307  		// Can use 2-byte encoding.
  3308  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3309  	} else {
  3310  		// Must use 3-byte encoding.
  3311  		asmbuf.Put3(0xc4,
  3312  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3313  			vexV<<3|vexWLP,
  3314  		)
  3315  	}
  3316  	asmbuf.Put1(opcode)
  3317  }
  3318  
  3319  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3320  	o := opindex[p.As&obj.AMask]
  3321  
  3322  	if o == nil {
  3323  		ctxt.Diag("asmins: missing op %v", p)
  3324  		return
  3325  	}
  3326  
  3327  	pre := prefixof(ctxt, p, &p.From)
  3328  	if pre != 0 {
  3329  		asmbuf.Put1(byte(pre))
  3330  	}
  3331  	pre = prefixof(ctxt, p, &p.To)
  3332  	if pre != 0 {
  3333  		asmbuf.Put1(byte(pre))
  3334  	}
  3335  
  3336  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3337  	// which encodes as SHRQ $32(DX*0), AX.
  3338  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3339  	// Change encoding generated by assemblers and compilers and remove.
  3340  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3341  		p.From3 = new(obj.Addr)
  3342  		p.From3.Type = obj.TYPE_REG
  3343  		p.From3.Reg = p.From.Index
  3344  		p.From.Index = 0
  3345  	}
  3346  
  3347  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3348  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3349  	switch p.As {
  3350  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3351  		if p.From3Type() == obj.TYPE_NONE {
  3352  			p.From3 = new(obj.Addr)
  3353  			*p.From3 = p.From
  3354  			p.From = obj.Addr{}
  3355  			p.From.Type = obj.TYPE_CONST
  3356  			p.From.Offset = p.To.Offset
  3357  			p.To.Offset = 0
  3358  		}
  3359  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3360  		if p.From3Type() == obj.TYPE_NONE {
  3361  			p.From3 = new(obj.Addr)
  3362  			*p.From3 = p.To
  3363  			p.To = obj.Addr{}
  3364  			p.To.Type = obj.TYPE_CONST
  3365  			p.To.Offset = p.From3.Offset
  3366  			p.From3.Offset = 0
  3367  		}
  3368  	}
  3369  
  3370  	if p.Ft == 0 {
  3371  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3372  	}
  3373  	if p.Tt == 0 {
  3374  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3375  	}
  3376  
  3377  	ft := int(p.Ft) * Ymax
  3378  	f3t := Ynone * Ymax
  3379  	if p.From3 != nil {
  3380  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3381  	}
  3382  	tt := int(p.Tt) * Ymax
  3383  
  3384  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3385  	z := 0
  3386  	var a *obj.Addr
  3387  	var l int
  3388  	var op int
  3389  	var q *obj.Prog
  3390  	var r *obj.Reloc
  3391  	var rel obj.Reloc
  3392  	var v int64
  3393  	for i := range o.ytab {
  3394  		yt := &o.ytab[i]
  3395  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3396  			switch o.prefix {
  3397  			case Px1: /* first option valid only in 32-bit mode */
  3398  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3399  					z += int(yt.zoffset) + xo
  3400  					continue
  3401  				}
  3402  			case Pq: /* 16 bit escape and opcode escape */
  3403  				asmbuf.Put2(Pe, Pm)
  3404  
  3405  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3406  				asmbuf.rexflag |= Pw
  3407  				asmbuf.Put2(Pe, Pm)
  3408  
  3409  			case Pq4: /*  66 0F 38 */
  3410  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3411  
  3412  			case Pf2, /* xmm opcode escape */
  3413  				Pf3:
  3414  				asmbuf.Put2(o.prefix, Pm)
  3415  
  3416  			case Pef3:
  3417  				asmbuf.Put3(Pe, Pf3, Pm)
  3418  
  3419  			case Pfw: /* xmm opcode escape + REX.W */
  3420  				asmbuf.rexflag |= Pw
  3421  				asmbuf.Put2(Pf3, Pm)
  3422  
  3423  			case Pm: /* opcode escape */
  3424  				asmbuf.Put1(Pm)
  3425  
  3426  			case Pe: /* 16 bit escape */
  3427  				asmbuf.Put1(Pe)
  3428  
  3429  			case Pw: /* 64-bit escape */
  3430  				if ctxt.Arch.Family != sys.AMD64 {
  3431  					ctxt.Diag("asmins: illegal 64: %v", p)
  3432  				}
  3433  				asmbuf.rexflag |= Pw
  3434  
  3435  			case Pw8: /* 64-bit escape if z >= 8 */
  3436  				if z >= 8 {
  3437  					if ctxt.Arch.Family != sys.AMD64 {
  3438  						ctxt.Diag("asmins: illegal 64: %v", p)
  3439  					}
  3440  					asmbuf.rexflag |= Pw
  3441  				}
  3442  
  3443  			case Pb: /* botch */
  3444  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3445  					goto bad
  3446  				}
  3447  				// NOTE(rsc): This is probably safe to do always,
  3448  				// but when enabled it chooses different encodings
  3449  				// than the old cmd/internal/obj/i386 code did,
  3450  				// which breaks our "same bits out" checks.
  3451  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3452  				// in the original obj/i386, and it would encode
  3453  				// (using a valid, shorter form) as 3c 00 if we enabled
  3454  				// the call to bytereg here.
  3455  				if ctxt.Arch.Family == sys.AMD64 {
  3456  					bytereg(&p.From, &p.Ft)
  3457  					bytereg(&p.To, &p.Tt)
  3458  				}
  3459  
  3460  			case P32: /* 32 bit but illegal if 64-bit mode */
  3461  				if ctxt.Arch.Family == sys.AMD64 {
  3462  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3463  				}
  3464  
  3465  			case Py: /* 64-bit only, no prefix */
  3466  				if ctxt.Arch.Family != sys.AMD64 {
  3467  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3468  				}
  3469  
  3470  			case Py1: /* 64-bit only if z < 1, no prefix */
  3471  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3472  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3473  				}
  3474  
  3475  			case Py3: /* 64-bit only if z < 3, no prefix */
  3476  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3477  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3478  				}
  3479  			}
  3480  
  3481  			if z >= len(o.op) {
  3482  				log.Fatalf("asmins bad table %v", p)
  3483  			}
  3484  			op = int(o.op[z])
  3485  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3486  			if op == 0x0f && o.prefix != Pvex {
  3487  				asmbuf.Put1(byte(op))
  3488  				z++
  3489  				op = int(o.op[z])
  3490  			}
  3491  
  3492  			switch yt.zcase {
  3493  			default:
  3494  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3495  				return
  3496  
  3497  			case Zpseudo:
  3498  				break
  3499  
  3500  			case Zlit:
  3501  				for ; ; z++ {
  3502  					op = int(o.op[z])
  3503  					if op == 0 {
  3504  						break
  3505  					}
  3506  					asmbuf.Put1(byte(op))
  3507  				}
  3508  
  3509  			case Zlitm_r:
  3510  				for ; ; z++ {
  3511  					op = int(o.op[z])
  3512  					if op == 0 {
  3513  						break
  3514  					}
  3515  					asmbuf.Put1(byte(op))
  3516  				}
  3517  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3518  
  3519  			case Zmb_r:
  3520  				bytereg(&p.From, &p.Ft)
  3521  				fallthrough
  3522  
  3523  			case Zm_r:
  3524  				asmbuf.Put1(byte(op))
  3525  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3526  
  3527  			case Zm2_r:
  3528  				asmbuf.Put2(byte(op), o.op[z+1])
  3529  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3530  
  3531  			case Zm_r_xm:
  3532  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3533  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3534  
  3535  			case Zm_r_xm_nr:
  3536  				asmbuf.rexflag = 0
  3537  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3538  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3539  
  3540  			case Zm_r_i_xm:
  3541  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3542  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.From3)
  3543  				asmbuf.Put1(byte(p.To.Offset))
  3544  
  3545  			case Zibm_r, Zibr_m:
  3546  				for {
  3547  					tmp1 := z
  3548  					z++
  3549  					op = int(o.op[tmp1])
  3550  					if op == 0 {
  3551  						break
  3552  					}
  3553  					asmbuf.Put1(byte(op))
  3554  				}
  3555  				if yt.zcase == Zibr_m {
  3556  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.From3)
  3557  				} else {
  3558  					asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3559  				}
  3560  				asmbuf.Put1(byte(p.From.Offset))
  3561  
  3562  			case Zaut_r:
  3563  				asmbuf.Put1(0x8d) // leal
  3564  				if p.From.Type != obj.TYPE_ADDR {
  3565  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3566  				}
  3567  				p.From.Type = obj.TYPE_MEM
  3568  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3569  				p.From.Type = obj.TYPE_ADDR
  3570  
  3571  			case Zm_o:
  3572  				asmbuf.Put1(byte(op))
  3573  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3574  
  3575  			case Zr_m:
  3576  				asmbuf.Put1(byte(op))
  3577  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3578  
  3579  			case Zvex_rm_v_r:
  3580  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3581  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3582  
  3583  			case Zvex_i_r_v:
  3584  				asmbuf.asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3585  				regnum := byte(0x7)
  3586  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3587  					regnum &= byte(p.From3.Reg - REG_X0)
  3588  				} else {
  3589  					regnum &= byte(p.From3.Reg - REG_Y0)
  3590  				}
  3591  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3592  				asmbuf.Put1(byte(p.From.Offset))
  3593  
  3594  			case Zvex_i_rm_v_r:
  3595  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3596  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3597  				asmbuf.Put1(byte(p.From3.Offset))
  3598  
  3599  			case Zvex_i_rm_r:
  3600  				asmbuf.asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3601  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3602  				asmbuf.Put1(byte(p.From.Offset))
  3603  
  3604  			case Zvex_v_rm_r:
  3605  				asmbuf.asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3606  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3607  
  3608  			case Zvex_r_v_rm:
  3609  				asmbuf.asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3610  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3611  
  3612  			case Zr_m_xm:
  3613  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3614  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3615  
  3616  			case Zr_m_xm_nr:
  3617  				asmbuf.rexflag = 0
  3618  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3619  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3620  
  3621  			case Zo_m:
  3622  				asmbuf.Put1(byte(op))
  3623  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3624  
  3625  			case Zcallindreg:
  3626  				r = obj.Addrel(cursym)
  3627  				r.Off = int32(p.Pc)
  3628  				r.Type = obj.R_CALLIND
  3629  				r.Siz = 0
  3630  				fallthrough
  3631  
  3632  			case Zo_m64:
  3633  				asmbuf.Put1(byte(op))
  3634  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3635  
  3636  			case Zm_ibo:
  3637  				asmbuf.Put1(byte(op))
  3638  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3639  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3640  
  3641  			case Zibo_m:
  3642  				asmbuf.Put1(byte(op))
  3643  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3644  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3645  
  3646  			case Zibo_m_xm:
  3647  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3648  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3649  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3650  
  3651  			case Z_ib, Zib_:
  3652  				if yt.zcase == Zib_ {
  3653  					a = &p.From
  3654  				} else {
  3655  					a = &p.To
  3656  				}
  3657  				asmbuf.Put1(byte(op))
  3658  				if p.As == AXABORT {
  3659  					asmbuf.Put1(o.op[z+1])
  3660  				}
  3661  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3662  
  3663  			case Zib_rp:
  3664  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3665  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3666  
  3667  			case Zil_rp:
  3668  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3669  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3670  				if o.prefix == Pe {
  3671  					v = vaddr(ctxt, p, &p.From, nil)
  3672  					asmbuf.PutInt16(int16(v))
  3673  				} else {
  3674  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3675  				}
  3676  
  3677  			case Zo_iw:
  3678  				asmbuf.Put1(byte(op))
  3679  				if p.From.Type != obj.TYPE_NONE {
  3680  					v = vaddr(ctxt, p, &p.From, nil)
  3681  					asmbuf.PutInt16(int16(v))
  3682  				}
  3683  
  3684  			case Ziq_rp:
  3685  				v = vaddr(ctxt, p, &p.From, &rel)
  3686  				l = int(v >> 32)
  3687  				if l == 0 && rel.Siz != 8 {
  3688  					//p->mark |= 0100;
  3689  					//print("zero: %llux %v\n", v, p);
  3690  					asmbuf.rexflag &^= (0x40 | Rxw)
  3691  
  3692  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3693  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3694  					if rel.Type != 0 {
  3695  						r = obj.Addrel(cursym)
  3696  						*r = rel
  3697  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3698  					}
  3699  
  3700  					asmbuf.PutInt32(int32(v))
  3701  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3702  
  3703  					//p->mark |= 0100;
  3704  					//print("sign: %llux %v\n", v, p);
  3705  					asmbuf.Put1(0xc7)
  3706  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3707  
  3708  					asmbuf.PutInt32(int32(v)) // need all 8
  3709  				} else {
  3710  					//print("all: %llux %v\n", v, p);
  3711  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3712  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3713  					if rel.Type != 0 {
  3714  						r = obj.Addrel(cursym)
  3715  						*r = rel
  3716  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3717  					}
  3718  
  3719  					asmbuf.PutInt64(v)
  3720  				}
  3721  
  3722  			case Zib_rr:
  3723  				asmbuf.Put1(byte(op))
  3724  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3725  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3726  
  3727  			case Z_il, Zil_:
  3728  				if yt.zcase == Zil_ {
  3729  					a = &p.From
  3730  				} else {
  3731  					a = &p.To
  3732  				}
  3733  				asmbuf.Put1(byte(op))
  3734  				if o.prefix == Pe {
  3735  					v = vaddr(ctxt, p, a, nil)
  3736  					asmbuf.PutInt16(int16(v))
  3737  				} else {
  3738  					asmbuf.relput4(ctxt, cursym, p, a)
  3739  				}
  3740  
  3741  			case Zm_ilo, Zilo_m:
  3742  				asmbuf.Put1(byte(op))
  3743  				if yt.zcase == Zilo_m {
  3744  					a = &p.From
  3745  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3746  				} else {
  3747  					a = &p.To
  3748  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3749  				}
  3750  
  3751  				if o.prefix == Pe {
  3752  					v = vaddr(ctxt, p, a, nil)
  3753  					asmbuf.PutInt16(int16(v))
  3754  				} else {
  3755  					asmbuf.relput4(ctxt, cursym, p, a)
  3756  				}
  3757  
  3758  			case Zil_rr:
  3759  				asmbuf.Put1(byte(op))
  3760  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3761  				if o.prefix == Pe {
  3762  					v = vaddr(ctxt, p, &p.From, nil)
  3763  					asmbuf.PutInt16(int16(v))
  3764  				} else {
  3765  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3766  				}
  3767  
  3768  			case Z_rp:
  3769  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3770  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3771  
  3772  			case Zrp_:
  3773  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3774  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3775  
  3776  			case Zclr:
  3777  				asmbuf.rexflag &^= Pw
  3778  				asmbuf.Put1(byte(op))
  3779  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3780  
  3781  			case Zcallcon, Zjmpcon:
  3782  				if yt.zcase == Zcallcon {
  3783  					asmbuf.Put1(byte(op))
  3784  				} else {
  3785  					asmbuf.Put1(o.op[z+1])
  3786  				}
  3787  				r = obj.Addrel(cursym)
  3788  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3789  				r.Type = obj.R_PCREL
  3790  				r.Siz = 4
  3791  				r.Add = p.To.Offset
  3792  				asmbuf.PutInt32(0)
  3793  
  3794  			case Zcallind:
  3795  				asmbuf.Put2(byte(op), o.op[z+1])
  3796  				r = obj.Addrel(cursym)
  3797  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3798  				if ctxt.Arch.Family == sys.AMD64 {
  3799  					r.Type = obj.R_PCREL
  3800  				} else {
  3801  					r.Type = obj.R_ADDR
  3802  				}
  3803  				r.Siz = 4
  3804  				r.Add = p.To.Offset
  3805  				r.Sym = p.To.Sym
  3806  				asmbuf.PutInt32(0)
  3807  
  3808  			case Zcall, Zcallduff:
  3809  				if p.To.Sym == nil {
  3810  					ctxt.Diag("call without target")
  3811  					log.Fatalf("bad code")
  3812  				}
  3813  
  3814  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3815  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3816  				}
  3817  
  3818  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3819  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3820  					// (the call jumps into the middle of the function).
  3821  					// This makes it possible to see call sites for duffcopy/duffzero in
  3822  					// BP-based profiling tools like Linux perf (which is the
  3823  					// whole point of obj.Framepointer_enabled).
  3824  					// MOVQ BP, -16(SP)
  3825  					// LEAQ -16(SP), BP
  3826  					asmbuf.Put(bpduff1)
  3827  				}
  3828  				asmbuf.Put1(byte(op))
  3829  				r = obj.Addrel(cursym)
  3830  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3831  				r.Sym = p.To.Sym
  3832  				r.Add = p.To.Offset
  3833  				r.Type = obj.R_CALL
  3834  				r.Siz = 4
  3835  				asmbuf.PutInt32(0)
  3836  
  3837  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3838  					// Pop BP pushed above.
  3839  					// MOVQ 0(BP), BP
  3840  					asmbuf.Put(bpduff2)
  3841  				}
  3842  
  3843  			// TODO: jump across functions needs reloc
  3844  			case Zbr, Zjmp, Zloop:
  3845  				if p.As == AXBEGIN {
  3846  					asmbuf.Put1(byte(op))
  3847  				}
  3848  				if p.To.Sym != nil {
  3849  					if yt.zcase != Zjmp {
  3850  						ctxt.Diag("branch to ATEXT")
  3851  						log.Fatalf("bad code")
  3852  					}
  3853  
  3854  					asmbuf.Put1(o.op[z+1])
  3855  					r = obj.Addrel(cursym)
  3856  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3857  					r.Sym = p.To.Sym
  3858  					r.Type = obj.R_PCREL
  3859  					r.Siz = 4
  3860  					asmbuf.PutInt32(0)
  3861  					break
  3862  				}
  3863  
  3864  				// Assumes q is in this function.
  3865  				// TODO: Check in input, preserve in brchain.
  3866  
  3867  				// Fill in backward jump now.
  3868  				q = p.Pcond
  3869  
  3870  				if q == nil {
  3871  					ctxt.Diag("jmp/branch/loop without target")
  3872  					log.Fatalf("bad code")
  3873  				}
  3874  
  3875  				if p.Back&1 != 0 {
  3876  					v = q.Pc - (p.Pc + 2)
  3877  					if v >= -128 && p.As != AXBEGIN {
  3878  						if p.As == AJCXZL {
  3879  							asmbuf.Put1(0x67)
  3880  						}
  3881  						asmbuf.Put2(byte(op), byte(v))
  3882  					} else if yt.zcase == Zloop {
  3883  						ctxt.Diag("loop too far: %v", p)
  3884  					} else {
  3885  						v -= 5 - 2
  3886  						if p.As == AXBEGIN {
  3887  							v--
  3888  						}
  3889  						if yt.zcase == Zbr {
  3890  							asmbuf.Put1(0x0f)
  3891  							v--
  3892  						}
  3893  
  3894  						asmbuf.Put1(o.op[z+1])
  3895  						asmbuf.PutInt32(int32(v))
  3896  					}
  3897  
  3898  					break
  3899  				}
  3900  
  3901  				// Annotate target; will fill in later.
  3902  				p.Forwd = q.Rel
  3903  
  3904  				q.Rel = p
  3905  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3906  					if p.As == AJCXZL {
  3907  						asmbuf.Put1(0x67)
  3908  					}
  3909  					asmbuf.Put2(byte(op), 0)
  3910  				} else if yt.zcase == Zloop {
  3911  					ctxt.Diag("loop too far: %v", p)
  3912  				} else {
  3913  					if yt.zcase == Zbr {
  3914  						asmbuf.Put1(0x0f)
  3915  					}
  3916  					asmbuf.Put1(o.op[z+1])
  3917  					asmbuf.PutInt32(0)
  3918  				}
  3919  
  3920  				break
  3921  
  3922  			/*
  3923  				v = q->pc - p->pc - 2;
  3924  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3925  					*ctxt->andptr++ = op;
  3926  					*ctxt->andptr++ = v;
  3927  				} else {
  3928  					v -= 5-2;
  3929  					if(yt.zcase == Zbr) {
  3930  						*ctxt->andptr++ = 0x0f;
  3931  						v--;
  3932  					}
  3933  					*ctxt->andptr++ = o->op[z+1];
  3934  					*ctxt->andptr++ = v;
  3935  					*ctxt->andptr++ = v>>8;
  3936  					*ctxt->andptr++ = v>>16;
  3937  					*ctxt->andptr++ = v>>24;
  3938  				}
  3939  			*/
  3940  
  3941  			case Zbyte:
  3942  				v = vaddr(ctxt, p, &p.From, &rel)
  3943  				if rel.Siz != 0 {
  3944  					rel.Siz = uint8(op)
  3945  					r = obj.Addrel(cursym)
  3946  					*r = rel
  3947  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3948  				}
  3949  
  3950  				asmbuf.Put1(byte(v))
  3951  				if op > 1 {
  3952  					asmbuf.Put1(byte(v >> 8))
  3953  					if op > 2 {
  3954  						asmbuf.PutInt16(int16(v >> 16))
  3955  						if op > 4 {
  3956  							asmbuf.PutInt32(int32(v >> 32))
  3957  						}
  3958  					}
  3959  				}
  3960  			}
  3961  
  3962  			return
  3963  		}
  3964  		z += int(yt.zoffset) + xo
  3965  	}
  3966  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3967  		var pp obj.Prog
  3968  		var t []byte
  3969  		if p.As == mo[0].as {
  3970  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3971  				t = mo[0].op[:]
  3972  				switch mo[0].code {
  3973  				default:
  3974  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3975  
  3976  				case 0: /* lit */
  3977  					for z = 0; t[z] != E; z++ {
  3978  						asmbuf.Put1(t[z])
  3979  					}
  3980  
  3981  				case 1: /* r,m */
  3982  					asmbuf.Put1(t[0])
  3983  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  3984  
  3985  				case 2: /* m,r */
  3986  					asmbuf.Put1(t[0])
  3987  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  3988  
  3989  				case 3: /* r,m - 2op */
  3990  					asmbuf.Put2(t[0], t[1])
  3991  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  3992  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3993  
  3994  				case 4: /* m,r - 2op */
  3995  					asmbuf.Put2(t[0], t[1])
  3996  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  3997  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3998  
  3999  				case 5: /* load full pointer, trash heap */
  4000  					if t[0] != 0 {
  4001  						asmbuf.Put1(t[0])
  4002  					}
  4003  					switch p.To.Index {
  4004  					default:
  4005  						goto bad
  4006  
  4007  					case REG_DS:
  4008  						asmbuf.Put1(0xc5)
  4009  
  4010  					case REG_SS:
  4011  						asmbuf.Put2(0x0f, 0xb2)
  4012  
  4013  					case REG_ES:
  4014  						asmbuf.Put1(0xc4)
  4015  
  4016  					case REG_FS:
  4017  						asmbuf.Put2(0x0f, 0xb4)
  4018  
  4019  					case REG_GS:
  4020  						asmbuf.Put2(0x0f, 0xb5)
  4021  					}
  4022  
  4023  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4024  
  4025  				case 6: /* double shift */
  4026  					if t[0] == Pw {
  4027  						if ctxt.Arch.Family != sys.AMD64 {
  4028  							ctxt.Diag("asmins: illegal 64: %v", p)
  4029  						}
  4030  						asmbuf.rexflag |= Pw
  4031  						t = t[1:]
  4032  					} else if t[0] == Pe {
  4033  						asmbuf.Put1(Pe)
  4034  						t = t[1:]
  4035  					}
  4036  
  4037  					switch p.From.Type {
  4038  					default:
  4039  						goto bad
  4040  
  4041  					case obj.TYPE_CONST:
  4042  						asmbuf.Put2(0x0f, t[0])
  4043  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4044  						asmbuf.Put1(byte(p.From.Offset))
  4045  
  4046  					case obj.TYPE_REG:
  4047  						switch p.From.Reg {
  4048  						default:
  4049  							goto bad
  4050  
  4051  						case REG_CL, REG_CX:
  4052  							asmbuf.Put2(0x0f, t[1])
  4053  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4054  						}
  4055  					}
  4056  
  4057  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4058  				// where you load the TLS base register into a register and then index off that
  4059  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4060  				// are handled in prefixof above and should not be listed here.
  4061  				case 7: /* mov tls, r */
  4062  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4063  						ctxt.Diag("invalid load of TLS: %v", p)
  4064  					}
  4065  
  4066  					if ctxt.Arch.Family == sys.I386 {
  4067  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4068  						// where you load the TLS base register into a register and then index off that
  4069  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4070  						// are handled in prefixof above and should not be listed here.
  4071  						switch ctxt.Headtype {
  4072  						default:
  4073  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4074  
  4075  						case obj.Hlinux,
  4076  							obj.Hnacl:
  4077  							if ctxt.Flag_shared {
  4078  								// Note that this is not generating the same insns as the other cases.
  4079  								//     MOV TLS, dst
  4080  								// becomes
  4081  								//     call __x86.get_pc_thunk.dst
  4082  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4083  								// which is encoded as
  4084  								//     call __x86.get_pc_thunk.dst
  4085  								//     movq 0(dst), dst
  4086  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4087  								// is g, which we can't check here, but will when we assemble the second
  4088  								// instruction.
  4089  								dst := p.To.Reg
  4090  								asmbuf.Put1(0xe8)
  4091  								r = obj.Addrel(cursym)
  4092  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4093  								r.Type = obj.R_CALL
  4094  								r.Siz = 4
  4095  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk."+strings.ToLower(rconv(int(dst))), 0)
  4096  								asmbuf.PutInt32(0)
  4097  
  4098  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4099  								r = obj.Addrel(cursym)
  4100  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4101  								r.Type = obj.R_TLS_IE
  4102  								r.Siz = 4
  4103  								r.Add = 2
  4104  								asmbuf.PutInt32(0)
  4105  							} else {
  4106  								// ELF TLS base is 0(GS).
  4107  								pp.From = p.From
  4108  
  4109  								pp.From.Type = obj.TYPE_MEM
  4110  								pp.From.Reg = REG_GS
  4111  								pp.From.Offset = 0
  4112  								pp.From.Index = REG_NONE
  4113  								pp.From.Scale = 0
  4114  								asmbuf.Put2(0x65, // GS
  4115  									0x8B)
  4116  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4117  							}
  4118  						case obj.Hplan9:
  4119  							pp.From = obj.Addr{}
  4120  							pp.From.Type = obj.TYPE_MEM
  4121  							pp.From.Name = obj.NAME_EXTERN
  4122  							pp.From.Sym = plan9privates
  4123  							pp.From.Offset = 0
  4124  							pp.From.Index = REG_NONE
  4125  							asmbuf.Put1(0x8B)
  4126  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4127  
  4128  						case obj.Hwindows:
  4129  							// Windows TLS base is always 0x14(FS).
  4130  							pp.From = p.From
  4131  
  4132  							pp.From.Type = obj.TYPE_MEM
  4133  							pp.From.Reg = REG_FS
  4134  							pp.From.Offset = 0x14
  4135  							pp.From.Index = REG_NONE
  4136  							pp.From.Scale = 0
  4137  							asmbuf.Put2(0x64, // FS
  4138  								0x8B)
  4139  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4140  						}
  4141  						break
  4142  					}
  4143  
  4144  					switch ctxt.Headtype {
  4145  					default:
  4146  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4147  
  4148  					case obj.Hlinux:
  4149  						if !ctxt.Flag_shared {
  4150  							log.Fatalf("unknown TLS base location for linux without -shared")
  4151  						}
  4152  						// Note that this is not generating the same insn as the other cases.
  4153  						//     MOV TLS, R_to
  4154  						// becomes
  4155  						//     movq g@gottpoff(%rip), R_to
  4156  						// which is encoded as
  4157  						//     movq 0(%rip), R_to
  4158  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4159  						// is g, which we can't check here, but will when we assemble the second
  4160  						// instruction.
  4161  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4162  
  4163  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4164  						r = obj.Addrel(cursym)
  4165  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4166  						r.Type = obj.R_TLS_IE
  4167  						r.Siz = 4
  4168  						r.Add = -4
  4169  						asmbuf.PutInt32(0)
  4170  
  4171  					case obj.Hplan9:
  4172  						pp.From = obj.Addr{}
  4173  						pp.From.Type = obj.TYPE_MEM
  4174  						pp.From.Name = obj.NAME_EXTERN
  4175  						pp.From.Sym = plan9privates
  4176  						pp.From.Offset = 0
  4177  						pp.From.Index = REG_NONE
  4178  						asmbuf.rexflag |= Pw
  4179  						asmbuf.Put1(0x8B)
  4180  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4181  
  4182  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4183  						// TLS base is 0(FS).
  4184  						pp.From = p.From
  4185  
  4186  						pp.From.Type = obj.TYPE_MEM
  4187  						pp.From.Name = obj.NAME_NONE
  4188  						pp.From.Reg = REG_NONE
  4189  						pp.From.Offset = 0
  4190  						pp.From.Index = REG_NONE
  4191  						pp.From.Scale = 0
  4192  						asmbuf.rexflag |= Pw
  4193  						asmbuf.Put2(0x64, // FS
  4194  							0x8B)
  4195  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4196  
  4197  					case obj.Hwindows:
  4198  						// Windows TLS base is always 0x28(GS).
  4199  						pp.From = p.From
  4200  
  4201  						pp.From.Type = obj.TYPE_MEM
  4202  						pp.From.Name = obj.NAME_NONE
  4203  						pp.From.Reg = REG_GS
  4204  						pp.From.Offset = 0x28
  4205  						pp.From.Index = REG_NONE
  4206  						pp.From.Scale = 0
  4207  						asmbuf.rexflag |= Pw
  4208  						asmbuf.Put2(0x65, // GS
  4209  							0x8B)
  4210  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4211  					}
  4212  				}
  4213  				return
  4214  			}
  4215  		}
  4216  	}
  4217  	goto bad
  4218  
  4219  bad:
  4220  	if ctxt.Arch.Family != sys.AMD64 {
  4221  		/*
  4222  		 * here, the assembly has failed.
  4223  		 * if its a byte instruction that has
  4224  		 * unaddressable registers, try to
  4225  		 * exchange registers and reissue the
  4226  		 * instruction with the operands renamed.
  4227  		 */
  4228  		pp := *p
  4229  
  4230  		unbytereg(&pp.From, &pp.Ft)
  4231  		unbytereg(&pp.To, &pp.Tt)
  4232  
  4233  		z := int(p.From.Reg)
  4234  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4235  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4236  			// For now, different to keep bit-for-bit compatibility.
  4237  			if ctxt.Arch.Family == sys.I386 {
  4238  				breg := byteswapreg(ctxt, &p.To)
  4239  				if breg != REG_AX {
  4240  					asmbuf.Put1(0x87) // xchg lhs,bx
  4241  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4242  					subreg(&pp, z, breg)
  4243  					asmbuf.doasm(ctxt, cursym, &pp)
  4244  					asmbuf.Put1(0x87) // xchg lhs,bx
  4245  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4246  				} else {
  4247  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4248  					subreg(&pp, z, REG_AX)
  4249  					asmbuf.doasm(ctxt, cursym, &pp)
  4250  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4251  				}
  4252  				return
  4253  			}
  4254  
  4255  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4256  				// We certainly don't want to exchange
  4257  				// with AX if the op is MUL or DIV.
  4258  				asmbuf.Put1(0x87) // xchg lhs,bx
  4259  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4260  				subreg(&pp, z, REG_BX)
  4261  				asmbuf.doasm(ctxt, cursym, &pp)
  4262  				asmbuf.Put1(0x87) // xchg lhs,bx
  4263  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4264  			} else {
  4265  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4266  				subreg(&pp, z, REG_AX)
  4267  				asmbuf.doasm(ctxt, cursym, &pp)
  4268  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4269  			}
  4270  			return
  4271  		}
  4272  
  4273  		z = int(p.To.Reg)
  4274  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4275  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4276  			// For now, different to keep bit-for-bit compatibility.
  4277  			if ctxt.Arch.Family == sys.I386 {
  4278  				breg := byteswapreg(ctxt, &p.From)
  4279  				if breg != REG_AX {
  4280  					asmbuf.Put1(0x87) //xchg rhs,bx
  4281  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4282  					subreg(&pp, z, breg)
  4283  					asmbuf.doasm(ctxt, cursym, &pp)
  4284  					asmbuf.Put1(0x87) // xchg rhs,bx
  4285  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4286  				} else {
  4287  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4288  					subreg(&pp, z, REG_AX)
  4289  					asmbuf.doasm(ctxt, cursym, &pp)
  4290  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4291  				}
  4292  				return
  4293  			}
  4294  
  4295  			if isax(&p.From) {
  4296  				asmbuf.Put1(0x87) // xchg rhs,bx
  4297  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4298  				subreg(&pp, z, REG_BX)
  4299  				asmbuf.doasm(ctxt, cursym, &pp)
  4300  				asmbuf.Put1(0x87) // xchg rhs,bx
  4301  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4302  			} else {
  4303  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4304  				subreg(&pp, z, REG_AX)
  4305  				asmbuf.doasm(ctxt, cursym, &pp)
  4306  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4307  			}
  4308  			return
  4309  		}
  4310  	}
  4311  
  4312  	ctxt.Diag("invalid instruction: %v", p)
  4313  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4314  	return
  4315  }
  4316  
  4317  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4318  // which is not referenced in a.
  4319  // If a is empty, it returns BX to account for MULB-like instructions
  4320  // that might use DX and AX.
  4321  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4322  	cana, canb, canc, cand := true, true, true, true
  4323  	if a.Type == obj.TYPE_NONE {
  4324  		cana, cand = false, false
  4325  	}
  4326  
  4327  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4328  		switch a.Reg {
  4329  		case REG_NONE:
  4330  			cana, cand = false, false
  4331  		case REG_AX, REG_AL, REG_AH:
  4332  			cana = false
  4333  		case REG_BX, REG_BL, REG_BH:
  4334  			canb = false
  4335  		case REG_CX, REG_CL, REG_CH:
  4336  			canc = false
  4337  		case REG_DX, REG_DL, REG_DH:
  4338  			cand = false
  4339  		}
  4340  	}
  4341  
  4342  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4343  		switch a.Index {
  4344  		case REG_AX:
  4345  			cana = false
  4346  		case REG_BX:
  4347  			canb = false
  4348  		case REG_CX:
  4349  			canc = false
  4350  		case REG_DX:
  4351  			cand = false
  4352  		}
  4353  	}
  4354  
  4355  	switch {
  4356  	case cana:
  4357  		return REG_AX
  4358  	case canb:
  4359  		return REG_BX
  4360  	case canc:
  4361  		return REG_CX
  4362  	case cand:
  4363  		return REG_DX
  4364  	default:
  4365  		ctxt.Diag("impossible byte register")
  4366  		log.Fatalf("bad code")
  4367  		return 0
  4368  	}
  4369  }
  4370  
  4371  func isbadbyte(a *obj.Addr) bool {
  4372  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4373  }
  4374  
  4375  var naclret = []uint8{
  4376  	0x5e, // POPL SI
  4377  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4378  	0x83,
  4379  	0xe6,
  4380  	0xe0, // ANDL $~31, SI
  4381  	0x4c,
  4382  	0x01,
  4383  	0xfe, // ADDQ R15, SI
  4384  	0xff,
  4385  	0xe6, // JMP SI
  4386  }
  4387  
  4388  var naclret8 = []uint8{
  4389  	0x5d, // POPL BP
  4390  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4391  	0x83,
  4392  	0xe5,
  4393  	0xe0, // ANDL $~31, BP
  4394  	0xff,
  4395  	0xe5, // JMP BP
  4396  }
  4397  
  4398  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4399  
  4400  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4401  
  4402  var naclmovs = []uint8{
  4403  	0x89,
  4404  	0xf6, // MOVL SI, SI
  4405  	0x49,
  4406  	0x8d,
  4407  	0x34,
  4408  	0x37, // LEAQ (R15)(SI*1), SI
  4409  	0x89,
  4410  	0xff, // MOVL DI, DI
  4411  	0x49,
  4412  	0x8d,
  4413  	0x3c,
  4414  	0x3f, // LEAQ (R15)(DI*1), DI
  4415  }
  4416  
  4417  var naclstos = []uint8{
  4418  	0x89,
  4419  	0xff, // MOVL DI, DI
  4420  	0x49,
  4421  	0x8d,
  4422  	0x3c,
  4423  	0x3f, // LEAQ (R15)(DI*1), DI
  4424  }
  4425  
  4426  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4427  	if reg >= REG_R8 {
  4428  		asmbuf.Put1(0x45)
  4429  	}
  4430  	reg = (reg - REG_AX) & 7
  4431  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4432  }
  4433  
  4434  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4435  	asmbuf.Reset()
  4436  
  4437  	if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.I386 {
  4438  		switch p.As {
  4439  		case obj.ARET:
  4440  			asmbuf.Put(naclret8)
  4441  			return
  4442  
  4443  		case obj.ACALL,
  4444  			obj.AJMP:
  4445  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4446  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4447  			}
  4448  
  4449  		case AINT:
  4450  			asmbuf.Put1(0xf4)
  4451  			return
  4452  		}
  4453  	}
  4454  
  4455  	if ctxt.Headtype == obj.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4456  		if p.As == AREP {
  4457  			asmbuf.rep++
  4458  			return
  4459  		}
  4460  
  4461  		if p.As == AREPN {
  4462  			asmbuf.repn++
  4463  			return
  4464  		}
  4465  
  4466  		if p.As == ALOCK {
  4467  			asmbuf.lock = true
  4468  			return
  4469  		}
  4470  
  4471  		if p.As != ALEAQ && p.As != ALEAL {
  4472  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4473  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4474  			}
  4475  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4476  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4477  			}
  4478  		}
  4479  
  4480  		switch p.As {
  4481  		case obj.ARET:
  4482  			asmbuf.Put(naclret)
  4483  			return
  4484  
  4485  		case obj.ACALL,
  4486  			obj.AJMP:
  4487  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4488  				// ANDL $~31, reg
  4489  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4490  				// ADDQ R15, reg
  4491  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4492  			}
  4493  
  4494  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4495  				// ANDL $~31, reg
  4496  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4497  				// ADDQ R15, reg
  4498  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4499  			}
  4500  
  4501  		case AINT:
  4502  			asmbuf.Put1(0xf4)
  4503  			return
  4504  
  4505  		case ASCASB,
  4506  			ASCASW,
  4507  			ASCASL,
  4508  			ASCASQ,
  4509  			ASTOSB,
  4510  			ASTOSW,
  4511  			ASTOSL,
  4512  			ASTOSQ:
  4513  			asmbuf.Put(naclstos)
  4514  
  4515  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4516  			asmbuf.Put(naclmovs)
  4517  		}
  4518  
  4519  		if asmbuf.rep != 0 {
  4520  			asmbuf.Put1(0xf3)
  4521  			asmbuf.rep = 0
  4522  		}
  4523  
  4524  		if asmbuf.repn != 0 {
  4525  			asmbuf.Put1(0xf2)
  4526  			asmbuf.repn = 0
  4527  		}
  4528  
  4529  		if asmbuf.lock {
  4530  			asmbuf.Put1(0xf0)
  4531  			asmbuf.lock = false
  4532  		}
  4533  	}
  4534  
  4535  	asmbuf.rexflag = 0
  4536  	asmbuf.vexflag = 0
  4537  	mark := asmbuf.Len()
  4538  	asmbuf.doasm(ctxt, cursym, p)
  4539  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4540  		/*
  4541  		 * as befits the whole approach of the architecture,
  4542  		 * the rex prefix must appear before the first opcode byte
  4543  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4544  		 * before the 0f opcode escape!), or it might be ignored.
  4545  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4546  		 */
  4547  		if ctxt.Arch.Family != sys.AMD64 {
  4548  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4549  		}
  4550  		n := asmbuf.Len()
  4551  		var np int
  4552  		for np = mark; np < n; np++ {
  4553  			c := asmbuf.At(np)
  4554  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4555  				break
  4556  			}
  4557  		}
  4558  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4559  	}
  4560  
  4561  	n := asmbuf.Len()
  4562  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4563  		r := &cursym.R[i]
  4564  		if int64(r.Off) < p.Pc {
  4565  			break
  4566  		}
  4567  		if asmbuf.rexflag != 0 {
  4568  			r.Off++
  4569  		}
  4570  		if r.Type == obj.R_PCREL {
  4571  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4572  				// PC-relative addressing is relative to the end of the instruction,
  4573  				// but the relocations applied by the linker are relative to the end
  4574  				// of the relocation. Because immediate instruction
  4575  				// arguments can follow the PC-relative memory reference in the
  4576  				// instruction encoding, the two may not coincide. In this case,
  4577  				// adjust addend so that linker can keep relocating relative to the
  4578  				// end of the relocation.
  4579  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4580  			} else if ctxt.Arch.Family == sys.I386 {
  4581  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4582  				// assumes that the previous instruction loaded the PC of the end
  4583  				// of that instruction into CX, so the adjustment is relative to
  4584  				// that.
  4585  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4586  			}
  4587  		}
  4588  		if r.Type == obj.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4589  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4590  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4591  		}
  4592  
  4593  	}
  4594  
  4595  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4596  		switch p.To.Reg {
  4597  		case REG_SP:
  4598  			asmbuf.Put(naclspfix)
  4599  		case REG_BP:
  4600  			asmbuf.Put(naclbpfix)
  4601  		}
  4602  	}
  4603  }