github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	FuncAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zvex_i_rm_r
   212  	Zvex_i_r_v
   213  	Zvex_i_rm_v_r
   214  	Zmax
   215  )
   216  
   217  const (
   218  	Px   = 0
   219  	Px1  = 1    // symbolic; exact value doesn't matter
   220  	P32  = 0x32 /* 32-bit only */
   221  	Pe   = 0x66 /* operand escape */
   222  	Pm   = 0x0f /* 2byte opcode escape */
   223  	Pq   = 0xff /* both escapes: 66 0f */
   224  	Pb   = 0xfe /* byte operands */
   225  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   226  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   227  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   228  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   229  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   230  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   231  	Pw   = 0x48 /* Rex.w */
   232  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   233  	Py   = 0x80 /* defaults to 64-bit mode */
   234  	Py1  = 0x81 // symbolic; exact value doesn't matter
   235  	Py3  = 0x83 // symbolic; exact value doesn't matter
   236  	Pvex = 0x84 // symbolic: exact value doesn't matter
   237  
   238  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   239  	Rxr = 1 << 2 /* extend modrm reg */
   240  	Rxx = 1 << 1 /* extend sib index */
   241  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   242  )
   243  
   244  const (
   245  	// Encoding for VEX prefix in tables.
   246  	// The P, L, and W fields are chosen to match
   247  	// their eventual locations in the VEX prefix bytes.
   248  
   249  	// P field - 2 bits
   250  	vex66 = 1 << 0
   251  	vexF3 = 2 << 0
   252  	vexF2 = 3 << 0
   253  	// L field - 1 bit
   254  	vexLZ  = 0 << 2
   255  	vexLIG = 0 << 2
   256  	vex128 = 0 << 2
   257  	vex256 = 1 << 2
   258  	// W field - 1 bit
   259  	vexWIG = 0 << 7
   260  	vexW0  = 0 << 7
   261  	vexW1  = 1 << 7
   262  	// M field - 5 bits, but mostly reserved; we can store up to 4
   263  	vex0F   = 1 << 3
   264  	vex0F38 = 2 << 3
   265  	vex0F3A = 3 << 3
   266  
   267  	// Combinations used in the manual.
   268  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   269  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   270  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   271  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   272  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   273  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   274  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   275  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   276  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   277  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   278  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   279  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   280  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   281  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   282  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   283  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   284  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   285  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   286  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   287  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   288  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   289  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   290  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   291  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   292  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   293  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   294  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   295  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   296  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   297  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   298  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   299  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   300  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   301  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   302  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   303  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   304  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   305  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   306  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   307  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   308  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   309  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   310  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   311  )
   312  
   313  var ycover [Ymax * Ymax]uint8
   314  
   315  var reg [MAXREG]int
   316  
   317  var regrex [MAXREG + 1]int
   318  
   319  var ynone = []ytab{
   320  	{Ynone, Ynone, Ynone, Zlit, 1},
   321  }
   322  
   323  var ytext = []ytab{
   324  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   325  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   326  }
   327  
   328  var ynop = []ytab{
   329  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   330  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   331  	{Ynone, Ynone, Yml, Zpseudo, 0},
   332  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   333  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   334  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   335  	{Yml, Ynone, Ynone, Zpseudo, 0},
   336  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   337  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   338  }
   339  
   340  var yfuncdata = []ytab{
   341  	{Yi32, Ynone, Ym, Zpseudo, 0},
   342  }
   343  
   344  var ypcdata = []ytab{
   345  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   346  }
   347  
   348  var yxorb = []ytab{
   349  	{Yi32, Ynone, Yal, Zib_, 1},
   350  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   351  	{Yrb, Ynone, Ymb, Zr_m, 1},
   352  	{Ymb, Ynone, Yrb, Zm_r, 1},
   353  }
   354  
   355  var yxorl = []ytab{
   356  	{Yi8, Ynone, Yml, Zibo_m, 2},
   357  	{Yi32, Ynone, Yax, Zil_, 1},
   358  	{Yi32, Ynone, Yml, Zilo_m, 2},
   359  	{Yrl, Ynone, Yml, Zr_m, 1},
   360  	{Yml, Ynone, Yrl, Zm_r, 1},
   361  }
   362  
   363  var yaddl = []ytab{
   364  	{Yi8, Ynone, Yml, Zibo_m, 2},
   365  	{Yi32, Ynone, Yax, Zil_, 1},
   366  	{Yi32, Ynone, Yml, Zilo_m, 2},
   367  	{Yrl, Ynone, Yml, Zr_m, 1},
   368  	{Yml, Ynone, Yrl, Zm_r, 1},
   369  }
   370  
   371  var yincb = []ytab{
   372  	{Ynone, Ynone, Ymb, Zo_m, 2},
   373  }
   374  
   375  var yincw = []ytab{
   376  	{Ynone, Ynone, Yml, Zo_m, 2},
   377  }
   378  
   379  var yincl = []ytab{
   380  	{Ynone, Ynone, Yrl, Z_rp, 1},
   381  	{Ynone, Ynone, Yml, Zo_m, 2},
   382  }
   383  
   384  var yincq = []ytab{
   385  	{Ynone, Ynone, Yml, Zo_m, 2},
   386  }
   387  
   388  var ycmpb = []ytab{
   389  	{Yal, Ynone, Yi32, Z_ib, 1},
   390  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   391  	{Ymb, Ynone, Yrb, Zm_r, 1},
   392  	{Yrb, Ynone, Ymb, Zr_m, 1},
   393  }
   394  
   395  var ycmpl = []ytab{
   396  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   397  	{Yax, Ynone, Yi32, Z_il, 1},
   398  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   399  	{Yml, Ynone, Yrl, Zm_r, 1},
   400  	{Yrl, Ynone, Yml, Zr_m, 1},
   401  }
   402  
   403  var yshb = []ytab{
   404  	{Yi1, Ynone, Ymb, Zo_m, 2},
   405  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   406  	{Ycx, Ynone, Ymb, Zo_m, 2},
   407  }
   408  
   409  var yshl = []ytab{
   410  	{Yi1, Ynone, Yml, Zo_m, 2},
   411  	{Yi32, Ynone, Yml, Zibo_m, 2},
   412  	{Ycl, Ynone, Yml, Zo_m, 2},
   413  	{Ycx, Ynone, Yml, Zo_m, 2},
   414  }
   415  
   416  var ytestb = []ytab{
   417  	{Yi32, Ynone, Yal, Zib_, 1},
   418  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   419  	{Yrb, Ynone, Ymb, Zr_m, 1},
   420  	{Ymb, Ynone, Yrb, Zm_r, 1},
   421  }
   422  
   423  var ytestl = []ytab{
   424  	{Yi32, Ynone, Yax, Zil_, 1},
   425  	{Yi32, Ynone, Yml, Zilo_m, 2},
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  }
   429  
   430  var ymovb = []ytab{
   431  	{Yrb, Ynone, Ymb, Zr_m, 1},
   432  	{Ymb, Ynone, Yrb, Zm_r, 1},
   433  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   434  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   435  }
   436  
   437  var ymbs = []ytab{
   438  	{Ymb, Ynone, Ynone, Zm_o, 2},
   439  }
   440  
   441  var ybtl = []ytab{
   442  	{Yi8, Ynone, Yml, Zibo_m, 2},
   443  	{Yrl, Ynone, Yml, Zr_m, 1},
   444  }
   445  
   446  var ymovw = []ytab{
   447  	{Yrl, Ynone, Yml, Zr_m, 1},
   448  	{Yml, Ynone, Yrl, Zm_r, 1},
   449  	{Yi0, Ynone, Yrl, Zclr, 1},
   450  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   451  	{Yi32, Ynone, Yml, Zilo_m, 2},
   452  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   453  }
   454  
   455  var ymovl = []ytab{
   456  	{Yrl, Ynone, Yml, Zr_m, 1},
   457  	{Yml, Ynone, Yrl, Zm_r, 1},
   458  	{Yi0, Ynone, Yrl, Zclr, 1},
   459  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   460  	{Yi32, Ynone, Yml, Zilo_m, 2},
   461  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   462  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   463  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   464  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   465  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   466  }
   467  
   468  var yret = []ytab{
   469  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   470  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   471  }
   472  
   473  var ymovq = []ytab{
   474  	// valid in 32-bit mode
   475  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   476  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   477  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   478  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   479  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   480  
   481  	// valid only in 64-bit mode, usually with 64-bit prefix
   482  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   483  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   484  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   485  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   486  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   487  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   488  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   489  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   490  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   491  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   492  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   493  }
   494  
   495  var ym_rl = []ytab{
   496  	{Ym, Ynone, Yrl, Zm_r, 1},
   497  }
   498  
   499  var yrl_m = []ytab{
   500  	{Yrl, Ynone, Ym, Zr_m, 1},
   501  }
   502  
   503  var ymb_rl = []ytab{
   504  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   505  }
   506  
   507  var yml_rl = []ytab{
   508  	{Yml, Ynone, Yrl, Zm_r, 1},
   509  }
   510  
   511  var yrl_ml = []ytab{
   512  	{Yrl, Ynone, Yml, Zr_m, 1},
   513  }
   514  
   515  var yml_mb = []ytab{
   516  	{Yrb, Ynone, Ymb, Zr_m, 1},
   517  	{Ymb, Ynone, Yrb, Zm_r, 1},
   518  }
   519  
   520  var yrb_mb = []ytab{
   521  	{Yrb, Ynone, Ymb, Zr_m, 1},
   522  }
   523  
   524  var yxchg = []ytab{
   525  	{Yax, Ynone, Yrl, Z_rp, 1},
   526  	{Yrl, Ynone, Yax, Zrp_, 1},
   527  	{Yrl, Ynone, Yml, Zr_m, 1},
   528  	{Yml, Ynone, Yrl, Zm_r, 1},
   529  }
   530  
   531  var ydivl = []ytab{
   532  	{Yml, Ynone, Ynone, Zm_o, 2},
   533  }
   534  
   535  var ydivb = []ytab{
   536  	{Ymb, Ynone, Ynone, Zm_o, 2},
   537  }
   538  
   539  var yimul = []ytab{
   540  	{Yml, Ynone, Ynone, Zm_o, 2},
   541  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   542  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   543  	{Yml, Ynone, Yrl, Zm_r, 2},
   544  }
   545  
   546  var yimul3 = []ytab{
   547  	{Yi8, Yml, Yrl, Zibm_r, 2},
   548  }
   549  
   550  var ybyte = []ytab{
   551  	{Yi64, Ynone, Ynone, Zbyte, 1},
   552  }
   553  
   554  var yin = []ytab{
   555  	{Yi32, Ynone, Ynone, Zib_, 1},
   556  	{Ynone, Ynone, Ynone, Zlit, 1},
   557  }
   558  
   559  var yint = []ytab{
   560  	{Yi32, Ynone, Ynone, Zib_, 1},
   561  }
   562  
   563  var ypushl = []ytab{
   564  	{Yrl, Ynone, Ynone, Zrp_, 1},
   565  	{Ym, Ynone, Ynone, Zm_o, 2},
   566  	{Yi8, Ynone, Ynone, Zib_, 1},
   567  	{Yi32, Ynone, Ynone, Zil_, 1},
   568  }
   569  
   570  var ypopl = []ytab{
   571  	{Ynone, Ynone, Yrl, Z_rp, 1},
   572  	{Ynone, Ynone, Ym, Zo_m, 2},
   573  }
   574  
   575  var ybswap = []ytab{
   576  	{Ynone, Ynone, Yrl, Z_rp, 2},
   577  }
   578  
   579  var yscond = []ytab{
   580  	{Ynone, Ynone, Ymb, Zo_m, 2},
   581  }
   582  
   583  var yjcond = []ytab{
   584  	{Ynone, Ynone, Ybr, Zbr, 0},
   585  	{Yi0, Ynone, Ybr, Zbr, 0},
   586  	{Yi1, Ynone, Ybr, Zbr, 1},
   587  }
   588  
   589  var yloop = []ytab{
   590  	{Ynone, Ynone, Ybr, Zloop, 1},
   591  }
   592  
   593  var ycall = []ytab{
   594  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   595  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   596  	{Ynone, Ynone, Yindir, Zcallind, 2},
   597  	{Ynone, Ynone, Ybr, Zcall, 0},
   598  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   599  }
   600  
   601  var yduff = []ytab{
   602  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   603  }
   604  
   605  var yjmp = []ytab{
   606  	{Ynone, Ynone, Yml, Zo_m64, 2},
   607  	{Ynone, Ynone, Ybr, Zjmp, 0},
   608  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   609  }
   610  
   611  var yfmvd = []ytab{
   612  	{Ym, Ynone, Yf0, Zm_o, 2},
   613  	{Yf0, Ynone, Ym, Zo_m, 2},
   614  	{Yrf, Ynone, Yf0, Zm_o, 2},
   615  	{Yf0, Ynone, Yrf, Zo_m, 2},
   616  }
   617  
   618  var yfmvdp = []ytab{
   619  	{Yf0, Ynone, Ym, Zo_m, 2},
   620  	{Yf0, Ynone, Yrf, Zo_m, 2},
   621  }
   622  
   623  var yfmvf = []ytab{
   624  	{Ym, Ynone, Yf0, Zm_o, 2},
   625  	{Yf0, Ynone, Ym, Zo_m, 2},
   626  }
   627  
   628  var yfmvx = []ytab{
   629  	{Ym, Ynone, Yf0, Zm_o, 2},
   630  }
   631  
   632  var yfmvp = []ytab{
   633  	{Yf0, Ynone, Ym, Zo_m, 2},
   634  }
   635  
   636  var yfcmv = []ytab{
   637  	{Yrf, Ynone, Yf0, Zm_o, 2},
   638  }
   639  
   640  var yfadd = []ytab{
   641  	{Ym, Ynone, Yf0, Zm_o, 2},
   642  	{Yrf, Ynone, Yf0, Zm_o, 2},
   643  	{Yf0, Ynone, Yrf, Zo_m, 2},
   644  }
   645  
   646  var yfaddp = []ytab{
   647  	{Yf0, Ynone, Yrf, Zo_m, 2},
   648  }
   649  
   650  var yfxch = []ytab{
   651  	{Yf0, Ynone, Yrf, Zo_m, 2},
   652  	{Yrf, Ynone, Yf0, Zm_o, 2},
   653  }
   654  
   655  var ycompp = []ytab{
   656  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   657  }
   658  
   659  var ystsw = []ytab{
   660  	{Ynone, Ynone, Ym, Zo_m, 2},
   661  	{Ynone, Ynone, Yax, Zlit, 1},
   662  }
   663  
   664  var ystcw = []ytab{
   665  	{Ynone, Ynone, Ym, Zo_m, 2},
   666  	{Ym, Ynone, Ynone, Zm_o, 2},
   667  }
   668  
   669  var ysvrs = []ytab{
   670  	{Ynone, Ynone, Ym, Zo_m, 2},
   671  	{Ym, Ynone, Ynone, Zm_o, 2},
   672  }
   673  
   674  var ymm = []ytab{
   675  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   676  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   677  }
   678  
   679  var yxm = []ytab{
   680  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   681  }
   682  
   683  var yxm_q4 = []ytab{
   684  	{Yxm, Ynone, Yxr, Zm_r, 1},
   685  }
   686  
   687  var yxcvm1 = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   689  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   690  }
   691  
   692  var yxcvm2 = []ytab{
   693  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   694  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   695  }
   696  
   697  /*
   698  var yxmq = []ytab{
   699  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   700  }
   701  */
   702  
   703  var yxr = []ytab{
   704  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   705  }
   706  
   707  var yxr_ml = []ytab{
   708  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   709  }
   710  
   711  var ymr = []ytab{
   712  	{Ymr, Ynone, Ymr, Zm_r, 1},
   713  }
   714  
   715  var ymr_ml = []ytab{
   716  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   717  }
   718  
   719  var yxcmp = []ytab{
   720  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   721  }
   722  
   723  var yxcmpi = []ytab{
   724  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   725  }
   726  
   727  var yxmov = []ytab{
   728  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   729  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   730  }
   731  
   732  var yxcvfl = []ytab{
   733  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   734  }
   735  
   736  var yxcvlf = []ytab{
   737  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   738  }
   739  
   740  var yxcvfq = []ytab{
   741  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   742  }
   743  
   744  var yxcvqf = []ytab{
   745  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   746  }
   747  
   748  var yps = []ytab{
   749  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   750  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   751  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   752  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   753  }
   754  
   755  var yxrrl = []ytab{
   756  	{Yxr, Ynone, Yrl, Zm_r, 1},
   757  }
   758  
   759  var ymrxr = []ytab{
   760  	{Ymr, Ynone, Yxr, Zm_r, 1},
   761  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   762  }
   763  
   764  var ymshuf = []ytab{
   765  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   766  }
   767  
   768  var ymshufb = []ytab{
   769  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   770  }
   771  
   772  var yxshuf = []ytab{
   773  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   774  }
   775  
   776  var yextrw = []ytab{
   777  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   778  }
   779  
   780  var yextr = []ytab{
   781  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   782  }
   783  
   784  var yinsrw = []ytab{
   785  	{Yu8, Yml, Yxr, Zibm_r, 2},
   786  }
   787  
   788  var yinsr = []ytab{
   789  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   790  }
   791  
   792  var ypsdq = []ytab{
   793  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   794  }
   795  
   796  var ymskb = []ytab{
   797  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   798  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   799  }
   800  
   801  var ycrc32l = []ytab{
   802  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   803  }
   804  
   805  var yprefetch = []ytab{
   806  	{Ym, Ynone, Ynone, Zm_o, 2},
   807  }
   808  
   809  var yaes = []ytab{
   810  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   811  }
   812  
   813  var yaes2 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   815  }
   816  
   817  var yxbegin = []ytab{
   818  	{Ynone, Ynone, Ybr, Zjmp, 1},
   819  }
   820  
   821  var yxabort = []ytab{
   822  	{Yu8, Ynone, Ynone, Zib_, 1},
   823  }
   824  
   825  var ylddqu = []ytab{
   826  	{Ym, Ynone, Yxr, Zm_r, 1},
   827  }
   828  
   829  // VEX instructions that come in two forms:
   830  //	VTHING xmm2/m128, xmmV, xmm1
   831  //	VTHING ymm2/m256, ymmV, ymm1
   832  // The opcode array in the corresponding Optab entry
   833  // should contain the (VEX prefixes, opcode byte) pair
   834  // for each of the two forms.
   835  // For example, the entries for VPXOR are:
   836  //
   837  //	VPXOR xmm2/m128, xmmV, xmm1
   838  //	VEX.NDS.128.66.0F.WIG EF /r
   839  //
   840  //	VPXOR ymm2/m256, ymmV, ymm1
   841  //	VEX.NDS.256.66.0F.WIG EF /r
   842  //
   843  // The NDS/NDD/DDS part can be dropped, producing this
   844  // Optab entry:
   845  //
   846  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   847  //
   848  var yvex_xy3 = []ytab{
   849  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_ri3 = []ytab{
   854  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   855  }
   856  
   857  var yvex_xyi3 = []ytab{
   858  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   859  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   860  }
   861  
   862  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   863  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   864  }
   865  
   866  var yvex_xyi4 = []ytab{
   867  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   868  }
   869  
   870  var yvex_shift = []ytab{
   871  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   872  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   873  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   874  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   875  }
   876  
   877  var yvex_shift_dq = []ytab{
   878  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   879  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   880  }
   881  
   882  var yvex_r3 = []ytab{
   883  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   884  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   885  }
   886  
   887  var yvex_vmr3 = []ytab{
   888  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   889  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   890  }
   891  
   892  var yvex_xy2 = []ytab{
   893  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   894  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   895  }
   896  
   897  var yvex_xyr2 = []ytab{
   898  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   899  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   900  }
   901  
   902  var yvex_vmovdqa = []ytab{
   903  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   904  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   905  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   906  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   907  }
   908  
   909  var yvex_vmovntdq = []ytab{
   910  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   911  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   912  }
   913  
   914  var yvex_vpbroadcast = []ytab{
   915  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   916  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   917  }
   918  
   919  var ymmxmm0f38 = []ytab{
   920  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   921  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   922  }
   923  
   924  /*
   925   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   926   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   927   * the entry with the given p->as and then looks through the ytable for that
   928   * instruction (the second field in the optab struct) for a line whose first
   929   * two values match the Ytypes of the p->from and p->to operands.  The function
   930   * oclass in span.c computes the specific Ytype of an operand and then the set
   931   * of more general Ytypes that it satisfies is implied by the ycover table, set
   932   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   933   * from the more general 8-bit constants, but instinit says
   934   *
   935   *        ycover[Yi0*Ymax + Ys32] = 1;
   936   *        ycover[Yi1*Ymax + Ys32] = 1;
   937   *        ycover[Yi8*Ymax + Ys32] = 1;
   938   *
   939   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   940   * if that's what an instruction can handle.
   941   *
   942   * In parallel with the scan through the ytable for the appropriate line, there
   943   * is a z pointer that starts out pointing at the strange magic byte list in
   944   * the Optab struct.  With each step past a non-matching ytable line, z
   945   * advances by the 4th entry in the line.  When a matching line is found, that
   946   * z pointer has the extra data to use in laying down the instruction bytes.
   947   * The actual bytes laid down are a function of the 3rd entry in the line (that
   948   * is, the Ztype) and the z bytes.
   949   *
   950   * For example, let's look at AADDL.  The optab line says:
   951   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   952   *
   953   * and yaddl says
   954   *        uchar   yaddl[] =
   955   *        {
   956   *                Yi8,    Yml,    Zibo_m, 2,
   957   *                Yi32,   Yax,    Zil_,   1,
   958   *                Yi32,   Yml,    Zilo_m, 2,
   959   *                Yrl,    Yml,    Zr_m,   1,
   960   *                Yml,    Yrl,    Zm_r,   1,
   961   *                0
   962   *        };
   963   *
   964   * so there are 5 possible types of ADDL instruction that can be laid down, and
   965   * possible states used to lay them down (Ztype and z pointer, assuming z
   966   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   967   *
   968   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   969   *        Yi32, Yax -> Zil_, z+2 (0x05)
   970   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   971   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   972   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   973   *
   974   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   975   * relatively straightforward as this program goes.
   976   *
   977   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   978   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   979   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   980   * Zilo_m is the same but a long (32-bit) immediate.
   981   */
   982  var optab =
   983  /*	as, ytab, andproto, opcode */
   984  []Optab{
   985  	{obj.AXXX, nil, 0, [23]uint8{}},
   986  	{AAAA, ynone, P32, [23]uint8{0x37}},
   987  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   988  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   989  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   990  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   991  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   992  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   993  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   994  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   995  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   996  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   997  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   998  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   999  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
  1000  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
  1001  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1002  	{AADJSP, nil, 0, [23]uint8{}},
  1003  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
  1004  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1005  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
  1006  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
  1007  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
  1008  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
  1009  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1010  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1011  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
  1012  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
  1013  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
  1014  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
  1015  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
  1016  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
  1017  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
  1018  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
  1019  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
  1020  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
  1021  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
  1022  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1023  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1024  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1025  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1026  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1027  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1028  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1029  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1030  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1031  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1032  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1033  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1034  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1035  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1036  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1037  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1038  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1039  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1040  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1041  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1042  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1043  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1044  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1045  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1046  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1047  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1048  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1049  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1050  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1051  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1052  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1053  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1054  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1055  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1056  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1057  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1058  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1059  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1060  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1061  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1062  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1063  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1064  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1065  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1066  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1067  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1068  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1069  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1070  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1071  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1072  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1073  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1074  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1075  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1076  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1077  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1078  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1079  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1080  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1081  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1082  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1083  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1084  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1085  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1086  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1087  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1088  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1089  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1090  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1091  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1092  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1093  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1094  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1095  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1096  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1097  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1098  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1099  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1100  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1101  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1102  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
  1103  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
  1104  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1105  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1106  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1107  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1108  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1109  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1110  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1111  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1112  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1113  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1114  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1115  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1116  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1117  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1118  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1119  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1120  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1121  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1122  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1123  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1124  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1125  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1126  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1127  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1128  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1129  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1130  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1131  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
  1132  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1133  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1134  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
  1135  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1136  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1137  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1138  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1139  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1140  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1141  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1142  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1143  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1144  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1145  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1146  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1147  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1148  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1149  	{obj.AGLOBL, nil, 0, [23]uint8{}},
  1150  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1151  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1152  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1153  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1154  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1155  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1156  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1157  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1158  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1159  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1160  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1161  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1162  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1163  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1164  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1165  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1166  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1167  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1168  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1169  	{AINT, yint, Px, [23]uint8{0xcd}},
  1170  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1171  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1172  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1173  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1174  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1175  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1176  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1177  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1178  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1179  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1180  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1181  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1182  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1183  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1184  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1185  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1186  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1187  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1188  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1189  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1190  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1191  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1192  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1193  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1194  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1195  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1196  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1197  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1198  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1199  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1200  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1201  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1202  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1203  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1204  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1205  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1206  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1207  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1208  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1209  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1210  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1211  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1212  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1213  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1214  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1215  	{ALONG, ybyte, Px, [23]uint8{4}},
  1216  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1217  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1218  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1219  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1220  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1221  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1222  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1223  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1224  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1225  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1226  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1227  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1228  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1229  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1230  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1231  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1232  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1233  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1234  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1235  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1236  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1237  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1238  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1239  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1240  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1241  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1242  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1243  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1244  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1245  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1246  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1247  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1248  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1249  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1250  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1251  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1252  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1253  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1254  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1255  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1256  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1257  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1258  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1259  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1260  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1261  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1262  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1263  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1264  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1265  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1266  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1267  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1268  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1269  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1270  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1271  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1272  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1273  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1274  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1275  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1276  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1277  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1278  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1279  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1280  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1281  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1282  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1283  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1284  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1285  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1286  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1287  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1288  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1289  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1290  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1291  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1292  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1293  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1294  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1295  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1296  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1297  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1298  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1299  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1300  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1301  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1302  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1303  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1304  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1305  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1306  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1307  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1308  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1309  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1310  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1311  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1312  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1313  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1314  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1315  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1316  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1317  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1318  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1319  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1320  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1321  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1322  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1323  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1324  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1325  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1326  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1327  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1328  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1329  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1330  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1331  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1332  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1333  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1334  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1335  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1336  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1337  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1338  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1339  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1340  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1341  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1342  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1343  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1344  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1345  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1346  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1347  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1348  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1349  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1350  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1351  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1352  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1353  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1354  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1355  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1356  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1357  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1358  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1359  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1360  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1361  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1362  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1363  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1364  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1365  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1366  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1367  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1368  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1369  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1370  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1371  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1372  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1373  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1374  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1375  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1376  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1377  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1378  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1379  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1380  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1381  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1382  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1383  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1384  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1385  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1386  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1387  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1388  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1389  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1390  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1391  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1392  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1393  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1394  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1395  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1396  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1397  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1398  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1399  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1400  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1401  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1402  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1403  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1404  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1405  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1406  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1407  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1408  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1409  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1410  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1411  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1412  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1413  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1414  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1415  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1416  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1417  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1418  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1419  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1420  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1421  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1422  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1423  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1424  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1425  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1426  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1427  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1428  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1429  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1430  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1431  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1432  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1433  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1434  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1435  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1436  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1437  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1438  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1439  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1440  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1441  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1442  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1443  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1444  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1445  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1446  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1447  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1448  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1449  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1450  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1451  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1452  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1453  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1454  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1455  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1456  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1457  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1458  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1459  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1460  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1461  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1462  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1463  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1464  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1465  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1466  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1467  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1468  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1469  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1470  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1471  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1472  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1473  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1474  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1475  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1476  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1477  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1478  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1479  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1480  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1481  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1482  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1483  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1484  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1485  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1486  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1487  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1488  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1489  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1490  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1491  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1492  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1493  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1494  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1495  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1496  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1497  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1498  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1500  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1501  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1502  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1503  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1504  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1505  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1506  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1507  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1508  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1509  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1510  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1511  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1512  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1513  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1514  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1515  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1516  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1517  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1518  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1519  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1520  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1521  	{AWORD, ybyte, Px, [23]uint8{2}},
  1522  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1523  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1525  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1526  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1527  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1528  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1530  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1531  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1532  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1533  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1534  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1535  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1536  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1537  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1538  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1539  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1540  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1541  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1542  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1543  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1544  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1545  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1546  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1547  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1548  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1549  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1550  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1551  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1552  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1553  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1554  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1555  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1556  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1557  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1558  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1559  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1560  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1561  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1562  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1563  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1564  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1565  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1566  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1567  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1568  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1569  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1570  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1571  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1572  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1573  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1574  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1575  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1576  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1577  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1578  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1579  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1580  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1581  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1582  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1583  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1584  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1585  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1586  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1587  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1588  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1589  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1590  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1591  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1592  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1593  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1594  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1595  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1596  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1597  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1598  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1599  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1600  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1601  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1602  	{AFFREE, nil, 0, [23]uint8{}},
  1603  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1604  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1605  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1606  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1607  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1608  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1609  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1610  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1611  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1612  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1613  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1614  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1615  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1616  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1617  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1618  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1619  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1620  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1621  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1622  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1623  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1624  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1625  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1626  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1627  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1628  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1629  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1630  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1631  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1632  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1633  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1634  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1635  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1636  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1637  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1638  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1639  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1640  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1641  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1642  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1643  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1644  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1645  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1646  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1647  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1648  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1649  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1650  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1651  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1652  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1653  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1654  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1655  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1656  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1657  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1658  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1659  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1660  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1661  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1662  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1663  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1664  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1665  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1666  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1667  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1668  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1669  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1670  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1671  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1672  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1673  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1674  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1675  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1676  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1677  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1678  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1679  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1680  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1681  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1682  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1683  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1684  
  1685  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1686  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1687  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1688  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1689  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1690  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1691  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1692  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1693  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1694  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1695  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1696  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1697  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1698  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1699  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1700  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1701  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1702  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1703  
  1704  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1705  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1706  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1707  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1708  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1709  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1710  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1711  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1712  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1713  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1714  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1715  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1716  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1717  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1718  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1719  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1720  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1721  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1722  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1723  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1724  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1725  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1726  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1727  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1728  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1729  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1730  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1731  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1732  
  1733  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1734  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1735  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1736  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1737  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1738  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1739  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1740  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1741  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1742  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1743  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1744  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1745  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1746  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1747  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1748  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1749  	{obj.AEND, nil, 0, [23]uint8{}},
  1750  	{0, nil, 0, [23]uint8{}},
  1751  }
  1752  
  1753  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1754  
  1755  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1756  // This happens on systems like Solaris that call .so functions instead of system calls.
  1757  // It does not seem to be necessary for any other systems. This is probably working
  1758  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1759  // what that bug is. And this does fix it.
  1760  func isextern(s *obj.LSym) bool {
  1761  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1762  	return strings.HasPrefix(s.Name, "libc_")
  1763  }
  1764  
  1765  // single-instruction no-ops of various lengths.
  1766  // constructed by hand and disassembled with gdb to verify.
  1767  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1768  var nop = [][16]uint8{
  1769  	{0x90},
  1770  	{0x66, 0x90},
  1771  	{0x0F, 0x1F, 0x00},
  1772  	{0x0F, 0x1F, 0x40, 0x00},
  1773  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1774  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1775  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1776  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1777  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1778  }
  1779  
  1780  // Native Client rejects the repeated 0x66 prefix.
  1781  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1782  func fillnop(p []byte, n int) {
  1783  	var m int
  1784  
  1785  	for n > 0 {
  1786  		m = n
  1787  		if m > len(nop) {
  1788  			m = len(nop)
  1789  		}
  1790  		copy(p[:m], nop[m-1][:m])
  1791  		p = p[m:]
  1792  		n -= m
  1793  	}
  1794  }
  1795  
  1796  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1797  	s.Grow(int64(c) + int64(pad))
  1798  	fillnop(s.P[c:], int(pad))
  1799  	return c + pad
  1800  }
  1801  
  1802  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1803  	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
  1804  		return l
  1805  	}
  1806  	return q
  1807  }
  1808  
  1809  func span6(ctxt *obj.Link, s *obj.LSym) {
  1810  	ctxt.Cursym = s
  1811  
  1812  	if s.P != nil {
  1813  		return
  1814  	}
  1815  
  1816  	if ycover[0] == 0 {
  1817  		instinit()
  1818  	}
  1819  
  1820  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1821  		if p.To.Type == obj.TYPE_BRANCH {
  1822  			if p.Pcond == nil {
  1823  				p.Pcond = p
  1824  			}
  1825  		}
  1826  		if p.As == AADJSP {
  1827  			p.To.Type = obj.TYPE_REG
  1828  			p.To.Reg = REG_SP
  1829  			v := int32(-p.From.Offset)
  1830  			p.From.Offset = int64(v)
  1831  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1832  			if v < 0 {
  1833  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1834  				v = -v
  1835  				p.From.Offset = int64(v)
  1836  			}
  1837  
  1838  			if v == 0 {
  1839  				p.As = obj.ANOP
  1840  			}
  1841  		}
  1842  	}
  1843  
  1844  	var q *obj.Prog
  1845  	var count int64 // rough count of number of instructions
  1846  	for p := s.Text; p != nil; p = p.Link {
  1847  		count++
  1848  		p.Back = 2 // use short branches first time through
  1849  		q = p.Pcond
  1850  		if q != nil && (q.Back&2 != 0) {
  1851  			p.Back |= 1 // backward jump
  1852  			q.Back |= 4 // loop head
  1853  		}
  1854  
  1855  		if p.As == AADJSP {
  1856  			p.To.Type = obj.TYPE_REG
  1857  			p.To.Reg = REG_SP
  1858  			v := int32(-p.From.Offset)
  1859  			p.From.Offset = int64(v)
  1860  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1861  			if v < 0 {
  1862  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1863  				v = -v
  1864  				p.From.Offset = int64(v)
  1865  			}
  1866  
  1867  			if v == 0 {
  1868  				p.As = obj.ANOP
  1869  			}
  1870  		}
  1871  	}
  1872  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1873  
  1874  	n := 0
  1875  	var c int32
  1876  	errors := ctxt.Errors
  1877  	var deferreturn *obj.LSym
  1878  	if ctxt.Headtype == obj.Hnacl {
  1879  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1880  	}
  1881  	for {
  1882  		loop := int32(0)
  1883  		for i := range s.R {
  1884  			s.R[i] = obj.Reloc{}
  1885  		}
  1886  		s.R = s.R[:0]
  1887  		s.P = s.P[:0]
  1888  		c = 0
  1889  		for p := s.Text; p != nil; p = p.Link {
  1890  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1891  
  1892  				// pad everything to avoid crossing 32-byte boundary
  1893  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1894  					c = naclpad(ctxt, s, c, -c&31)
  1895  				}
  1896  
  1897  				// pad call deferreturn to start at 32-byte boundary
  1898  				// so that subtracting 5 in jmpdefer will jump back
  1899  				// to that boundary and rerun the call.
  1900  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1901  					c = naclpad(ctxt, s, c, -c&31)
  1902  				}
  1903  
  1904  				// pad call to end at 32-byte boundary
  1905  				if p.As == obj.ACALL {
  1906  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1907  				}
  1908  
  1909  				// the linker treats REP and STOSQ as different instructions
  1910  				// but in fact the REP is a prefix on the STOSQ.
  1911  				// make sure REP has room for 2 more bytes, so that
  1912  				// padding will not be inserted before the next instruction.
  1913  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1914  					c = naclpad(ctxt, s, c, -c&31)
  1915  				}
  1916  
  1917  				// same for LOCK.
  1918  				// various instructions follow; the longest is 4 bytes.
  1919  				// give ourselves 8 bytes so as to avoid surprises.
  1920  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1921  					c = naclpad(ctxt, s, c, -c&31)
  1922  				}
  1923  			}
  1924  
  1925  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1926  				// pad with NOPs
  1927  				v := -c & (LoopAlign - 1)
  1928  
  1929  				if v <= MaxLoopPad {
  1930  					s.Grow(int64(c) + int64(v))
  1931  					fillnop(s.P[c:], int(v))
  1932  					c += v
  1933  				}
  1934  			}
  1935  
  1936  			p.Pc = int64(c)
  1937  
  1938  			// process forward jumps to p
  1939  			for q = p.Rel; q != nil; q = q.Forwd {
  1940  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1941  				if q.Back&2 != 0 { // short
  1942  					if v > 127 {
  1943  						loop++
  1944  						q.Back ^= 2
  1945  					}
  1946  
  1947  					if q.As == AJCXZL || q.As == AXBEGIN {
  1948  						s.P[q.Pc+2] = byte(v)
  1949  					} else {
  1950  						s.P[q.Pc+1] = byte(v)
  1951  					}
  1952  				} else {
  1953  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1954  				}
  1955  			}
  1956  
  1957  			p.Rel = nil
  1958  
  1959  			p.Pc = int64(c)
  1960  			asmins(ctxt, p)
  1961  			m := ctxt.AsmBuf.Len()
  1962  			if int(p.Isize) != m {
  1963  				p.Isize = uint8(m)
  1964  				loop++
  1965  			}
  1966  
  1967  			s.Grow(p.Pc + int64(m))
  1968  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1969  			c += int32(m)
  1970  		}
  1971  
  1972  		n++
  1973  		if n > 20 {
  1974  			ctxt.Diag("span must be looping")
  1975  			log.Fatalf("loop")
  1976  		}
  1977  		if loop == 0 {
  1978  			break
  1979  		}
  1980  		if ctxt.Errors > errors {
  1981  			return
  1982  		}
  1983  	}
  1984  
  1985  	if ctxt.Headtype == obj.Hnacl {
  1986  		c = naclpad(ctxt, s, c, -c&31)
  1987  	}
  1988  
  1989  	// Pad functions with trap instruction, to catch invalid jumps
  1990  	if c&(FuncAlign-1) != 0 {
  1991  		v := -c & (FuncAlign - 1)
  1992  		s.Grow(int64(c) + int64(v))
  1993  		for i := c; i < c+v; i++ {
  1994  			// 0xCC is INT $3 - breakpoint instruction
  1995  			s.P[i] = uint8(0xCC)
  1996  		}
  1997  		c += v
  1998  	}
  1999  	s.Size = int64(c)
  2000  
  2001  	if false { /* debug['a'] > 1 */
  2002  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2003  		var i int
  2004  		for i = 0; i < len(s.P); i++ {
  2005  			fmt.Printf(" %.2x", s.P[i])
  2006  			if i%16 == 15 {
  2007  				fmt.Printf("\n  %.6x", uint(i+1))
  2008  			}
  2009  		}
  2010  
  2011  		if i%16 != 0 {
  2012  			fmt.Printf("\n")
  2013  		}
  2014  
  2015  		for i := 0; i < len(s.R); i++ {
  2016  			r := &s.R[i]
  2017  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2018  		}
  2019  	}
  2020  }
  2021  
  2022  func instinit() {
  2023  	for i := 1; optab[i].as != 0; i++ {
  2024  		c := optab[i].as
  2025  		if opindex[c&obj.AMask] != nil {
  2026  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  2027  		}
  2028  		opindex[c&obj.AMask] = &optab[i]
  2029  	}
  2030  
  2031  	for i := 0; i < Ymax; i++ {
  2032  		ycover[i*Ymax+i] = 1
  2033  	}
  2034  
  2035  	ycover[Yi0*Ymax+Yi8] = 1
  2036  	ycover[Yi1*Ymax+Yi8] = 1
  2037  	ycover[Yu7*Ymax+Yi8] = 1
  2038  
  2039  	ycover[Yi0*Ymax+Yu7] = 1
  2040  	ycover[Yi1*Ymax+Yu7] = 1
  2041  
  2042  	ycover[Yi0*Ymax+Yu8] = 1
  2043  	ycover[Yi1*Ymax+Yu8] = 1
  2044  	ycover[Yu7*Ymax+Yu8] = 1
  2045  
  2046  	ycover[Yi0*Ymax+Ys32] = 1
  2047  	ycover[Yi1*Ymax+Ys32] = 1
  2048  	ycover[Yu7*Ymax+Ys32] = 1
  2049  	ycover[Yu8*Ymax+Ys32] = 1
  2050  	ycover[Yi8*Ymax+Ys32] = 1
  2051  
  2052  	ycover[Yi0*Ymax+Yi32] = 1
  2053  	ycover[Yi1*Ymax+Yi32] = 1
  2054  	ycover[Yu7*Ymax+Yi32] = 1
  2055  	ycover[Yu8*Ymax+Yi32] = 1
  2056  	ycover[Yi8*Ymax+Yi32] = 1
  2057  	ycover[Ys32*Ymax+Yi32] = 1
  2058  
  2059  	ycover[Yi0*Ymax+Yi64] = 1
  2060  	ycover[Yi1*Ymax+Yi64] = 1
  2061  	ycover[Yu7*Ymax+Yi64] = 1
  2062  	ycover[Yu8*Ymax+Yi64] = 1
  2063  	ycover[Yi8*Ymax+Yi64] = 1
  2064  	ycover[Ys32*Ymax+Yi64] = 1
  2065  	ycover[Yi32*Ymax+Yi64] = 1
  2066  
  2067  	ycover[Yal*Ymax+Yrb] = 1
  2068  	ycover[Ycl*Ymax+Yrb] = 1
  2069  	ycover[Yax*Ymax+Yrb] = 1
  2070  	ycover[Ycx*Ymax+Yrb] = 1
  2071  	ycover[Yrx*Ymax+Yrb] = 1
  2072  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2073  
  2074  	ycover[Ycl*Ymax+Ycx] = 1
  2075  
  2076  	ycover[Yax*Ymax+Yrx] = 1
  2077  	ycover[Ycx*Ymax+Yrx] = 1
  2078  
  2079  	ycover[Yax*Ymax+Yrl] = 1
  2080  	ycover[Ycx*Ymax+Yrl] = 1
  2081  	ycover[Yrx*Ymax+Yrl] = 1
  2082  	ycover[Yrl32*Ymax+Yrl] = 1
  2083  
  2084  	ycover[Yf0*Ymax+Yrf] = 1
  2085  
  2086  	ycover[Yal*Ymax+Ymb] = 1
  2087  	ycover[Ycl*Ymax+Ymb] = 1
  2088  	ycover[Yax*Ymax+Ymb] = 1
  2089  	ycover[Ycx*Ymax+Ymb] = 1
  2090  	ycover[Yrx*Ymax+Ymb] = 1
  2091  	ycover[Yrb*Ymax+Ymb] = 1
  2092  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2093  	ycover[Ym*Ymax+Ymb] = 1
  2094  
  2095  	ycover[Yax*Ymax+Yml] = 1
  2096  	ycover[Ycx*Ymax+Yml] = 1
  2097  	ycover[Yrx*Ymax+Yml] = 1
  2098  	ycover[Yrl*Ymax+Yml] = 1
  2099  	ycover[Yrl32*Ymax+Yml] = 1
  2100  	ycover[Ym*Ymax+Yml] = 1
  2101  
  2102  	ycover[Yax*Ymax+Ymm] = 1
  2103  	ycover[Ycx*Ymax+Ymm] = 1
  2104  	ycover[Yrx*Ymax+Ymm] = 1
  2105  	ycover[Yrl*Ymax+Ymm] = 1
  2106  	ycover[Yrl32*Ymax+Ymm] = 1
  2107  	ycover[Ym*Ymax+Ymm] = 1
  2108  	ycover[Ymr*Ymax+Ymm] = 1
  2109  
  2110  	ycover[Ym*Ymax+Yxm] = 1
  2111  	ycover[Yxr*Ymax+Yxm] = 1
  2112  
  2113  	ycover[Ym*Ymax+Yym] = 1
  2114  	ycover[Yyr*Ymax+Yym] = 1
  2115  
  2116  	for i := 0; i < MAXREG; i++ {
  2117  		reg[i] = -1
  2118  		if i >= REG_AL && i <= REG_R15B {
  2119  			reg[i] = (i - REG_AL) & 7
  2120  			if i >= REG_SPB && i <= REG_DIB {
  2121  				regrex[i] = 0x40
  2122  			}
  2123  			if i >= REG_R8B && i <= REG_R15B {
  2124  				regrex[i] = Rxr | Rxx | Rxb
  2125  			}
  2126  		}
  2127  
  2128  		if i >= REG_AH && i <= REG_BH {
  2129  			reg[i] = 4 + ((i - REG_AH) & 7)
  2130  		}
  2131  		if i >= REG_AX && i <= REG_R15 {
  2132  			reg[i] = (i - REG_AX) & 7
  2133  			if i >= REG_R8 {
  2134  				regrex[i] = Rxr | Rxx | Rxb
  2135  			}
  2136  		}
  2137  
  2138  		if i >= REG_F0 && i <= REG_F0+7 {
  2139  			reg[i] = (i - REG_F0) & 7
  2140  		}
  2141  		if i >= REG_M0 && i <= REG_M0+7 {
  2142  			reg[i] = (i - REG_M0) & 7
  2143  		}
  2144  		if i >= REG_X0 && i <= REG_X0+15 {
  2145  			reg[i] = (i - REG_X0) & 7
  2146  			if i >= REG_X0+8 {
  2147  				regrex[i] = Rxr | Rxx | Rxb
  2148  			}
  2149  		}
  2150  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2151  			reg[i] = (i - REG_Y0) & 7
  2152  			if i >= REG_Y0+8 {
  2153  				regrex[i] = Rxr | Rxx | Rxb
  2154  			}
  2155  		}
  2156  
  2157  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2158  			regrex[i] = Rxr
  2159  		}
  2160  	}
  2161  }
  2162  
  2163  var isAndroid = (obj.Getgoos() == "android")
  2164  
  2165  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2166  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2167  		return 0
  2168  	}
  2169  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2170  		switch a.Reg {
  2171  		case REG_CS:
  2172  			return 0x2e
  2173  
  2174  		case REG_DS:
  2175  			return 0x3e
  2176  
  2177  		case REG_ES:
  2178  			return 0x26
  2179  
  2180  		case REG_FS:
  2181  			return 0x64
  2182  
  2183  		case REG_GS:
  2184  			return 0x65
  2185  
  2186  		case REG_TLS:
  2187  			// NOTE: Systems listed here should be only systems that
  2188  			// support direct TLS references like 8(TLS) implemented as
  2189  			// direct references from FS or GS. Systems that require
  2190  			// the initial-exec model, where you load the TLS base into
  2191  			// a register and then index from that register, do not reach
  2192  			// this code and should not be listed.
  2193  			if p.Mode == 32 {
  2194  				switch ctxt.Headtype {
  2195  				default:
  2196  					if isAndroid {
  2197  						return 0x65 // GS
  2198  					}
  2199  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2200  
  2201  				case obj.Hdarwin,
  2202  					obj.Hdragonfly,
  2203  					obj.Hfreebsd,
  2204  					obj.Hnetbsd,
  2205  					obj.Hopenbsd:
  2206  					return 0x65 // GS
  2207  				}
  2208  			}
  2209  
  2210  			switch ctxt.Headtype {
  2211  			default:
  2212  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2213  
  2214  			case obj.Hlinux:
  2215  				if isAndroid {
  2216  					return 0x64 // FS
  2217  				}
  2218  
  2219  				if ctxt.Flag_shared {
  2220  					log.Fatalf("unknown TLS base register for linux with -shared")
  2221  				} else {
  2222  					return 0x64 // FS
  2223  				}
  2224  
  2225  			case obj.Hdragonfly,
  2226  				obj.Hfreebsd,
  2227  				obj.Hnetbsd,
  2228  				obj.Hopenbsd,
  2229  				obj.Hsolaris:
  2230  				return 0x64 // FS
  2231  
  2232  			case obj.Hdarwin:
  2233  				return 0x65 // GS
  2234  			}
  2235  		}
  2236  	}
  2237  
  2238  	if p.Mode == 32 {
  2239  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2240  			// When building for inclusion into a shared library, an instruction of the form
  2241  			//     MOVL 0(CX)(TLS*1), AX
  2242  			// becomes
  2243  			//     mov %gs:(%ecx), %eax
  2244  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2245  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2246  			// a shared library the instruction it becomes
  2247  			//     mov 0x0(%ecx), $eax
  2248  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2249  			if a.Offset != 0 {
  2250  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2251  			}
  2252  			return 0x65 // GS
  2253  		}
  2254  		return 0
  2255  	}
  2256  
  2257  	switch a.Index {
  2258  	case REG_CS:
  2259  		return 0x2e
  2260  
  2261  	case REG_DS:
  2262  		return 0x3e
  2263  
  2264  	case REG_ES:
  2265  		return 0x26
  2266  
  2267  	case REG_TLS:
  2268  		if ctxt.Flag_shared {
  2269  			// When building for inclusion into a shared library, an instruction of the form
  2270  			//     MOV 0(CX)(TLS*1), AX
  2271  			// becomes
  2272  			//     mov %fs:(%rcx), %rax
  2273  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2274  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2275  			// a shared library the instruction does not require a prefix.
  2276  			if a.Offset != 0 {
  2277  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2278  			}
  2279  			return 0x64
  2280  		}
  2281  
  2282  	case REG_FS:
  2283  		return 0x64
  2284  
  2285  	case REG_GS:
  2286  		return 0x65
  2287  	}
  2288  
  2289  	return 0
  2290  }
  2291  
  2292  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2293  	switch a.Type {
  2294  	case obj.TYPE_NONE:
  2295  		return Ynone
  2296  
  2297  	case obj.TYPE_BRANCH:
  2298  		return Ybr
  2299  
  2300  	case obj.TYPE_INDIR:
  2301  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2302  			return Yindir
  2303  		}
  2304  		return Yxxx
  2305  
  2306  	case obj.TYPE_MEM:
  2307  		if a.Index == REG_SP {
  2308  			// Can't use SP as the index register
  2309  			return Yxxx
  2310  		}
  2311  		if ctxt.Asmode == 64 {
  2312  			switch a.Name {
  2313  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2314  				// Global variables can't use index registers and their
  2315  				// base register is %rip (%rip is encoded as REG_NONE).
  2316  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2317  					return Yxxx
  2318  				}
  2319  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2320  				// These names must have a base of SP.  The old compiler
  2321  				// uses 0 for the base register. SSA uses REG_SP.
  2322  				if a.Reg != REG_SP && a.Reg != 0 {
  2323  					return Yxxx
  2324  				}
  2325  			case obj.NAME_NONE:
  2326  				// everything is ok
  2327  			default:
  2328  				// unknown name
  2329  				return Yxxx
  2330  			}
  2331  		}
  2332  		return Ym
  2333  
  2334  	case obj.TYPE_ADDR:
  2335  		switch a.Name {
  2336  		case obj.NAME_GOTREF:
  2337  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2338  			return Yxxx
  2339  
  2340  		case obj.NAME_EXTERN,
  2341  			obj.NAME_STATIC:
  2342  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
  2343  				return Yi32
  2344  			}
  2345  			return Yiauto // use pc-relative addressing
  2346  
  2347  		case obj.NAME_AUTO,
  2348  			obj.NAME_PARAM:
  2349  			return Yiauto
  2350  		}
  2351  
  2352  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2353  		// and got Yi32 in an earlier version of this code.
  2354  		// Keep doing that until we fix yduff etc.
  2355  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2356  			return Yi32
  2357  		}
  2358  
  2359  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2360  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2361  		}
  2362  		fallthrough
  2363  
  2364  		// fall through
  2365  
  2366  	case obj.TYPE_CONST:
  2367  		if a.Sym != nil {
  2368  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2369  		}
  2370  
  2371  		v := a.Offset
  2372  		if p.Mode == 32 {
  2373  			v = int64(int32(v))
  2374  		}
  2375  		if v == 0 {
  2376  			if p.Mark&PRESERVEFLAGS != 0 {
  2377  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2378  				return Yu7
  2379  			}
  2380  			return Yi0
  2381  		}
  2382  		if v == 1 {
  2383  			return Yi1
  2384  		}
  2385  		if v >= 0 && v <= 127 {
  2386  			return Yu7
  2387  		}
  2388  		if v >= 0 && v <= 255 {
  2389  			return Yu8
  2390  		}
  2391  		if v >= -128 && v <= 127 {
  2392  			return Yi8
  2393  		}
  2394  		if p.Mode == 32 {
  2395  			return Yi32
  2396  		}
  2397  		l := int32(v)
  2398  		if int64(l) == v {
  2399  			return Ys32 /* can sign extend */
  2400  		}
  2401  		if v>>32 == 0 {
  2402  			return Yi32 /* unsigned */
  2403  		}
  2404  		return Yi64
  2405  
  2406  	case obj.TYPE_TEXTSIZE:
  2407  		return Ytextsize
  2408  	}
  2409  
  2410  	if a.Type != obj.TYPE_REG {
  2411  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2412  		return Yxxx
  2413  	}
  2414  
  2415  	switch a.Reg {
  2416  	case REG_AL:
  2417  		return Yal
  2418  
  2419  	case REG_AX:
  2420  		return Yax
  2421  
  2422  		/*
  2423  			case REG_SPB:
  2424  		*/
  2425  	case REG_BPB,
  2426  		REG_SIB,
  2427  		REG_DIB,
  2428  		REG_R8B,
  2429  		REG_R9B,
  2430  		REG_R10B,
  2431  		REG_R11B,
  2432  		REG_R12B,
  2433  		REG_R13B,
  2434  		REG_R14B,
  2435  		REG_R15B:
  2436  		if ctxt.Asmode != 64 {
  2437  			return Yxxx
  2438  		}
  2439  		fallthrough
  2440  
  2441  	case REG_DL,
  2442  		REG_BL,
  2443  		REG_AH,
  2444  		REG_CH,
  2445  		REG_DH,
  2446  		REG_BH:
  2447  		return Yrb
  2448  
  2449  	case REG_CL:
  2450  		return Ycl
  2451  
  2452  	case REG_CX:
  2453  		return Ycx
  2454  
  2455  	case REG_DX, REG_BX:
  2456  		return Yrx
  2457  
  2458  	case REG_R8, /* not really Yrl */
  2459  		REG_R9,
  2460  		REG_R10,
  2461  		REG_R11,
  2462  		REG_R12,
  2463  		REG_R13,
  2464  		REG_R14,
  2465  		REG_R15:
  2466  		if ctxt.Asmode != 64 {
  2467  			return Yxxx
  2468  		}
  2469  		fallthrough
  2470  
  2471  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2472  		if p.Mode == 32 {
  2473  			return Yrl32
  2474  		}
  2475  		return Yrl
  2476  
  2477  	case REG_F0 + 0:
  2478  		return Yf0
  2479  
  2480  	case REG_F0 + 1,
  2481  		REG_F0 + 2,
  2482  		REG_F0 + 3,
  2483  		REG_F0 + 4,
  2484  		REG_F0 + 5,
  2485  		REG_F0 + 6,
  2486  		REG_F0 + 7:
  2487  		return Yrf
  2488  
  2489  	case REG_M0 + 0,
  2490  		REG_M0 + 1,
  2491  		REG_M0 + 2,
  2492  		REG_M0 + 3,
  2493  		REG_M0 + 4,
  2494  		REG_M0 + 5,
  2495  		REG_M0 + 6,
  2496  		REG_M0 + 7:
  2497  		return Ymr
  2498  
  2499  	case REG_X0 + 0,
  2500  		REG_X0 + 1,
  2501  		REG_X0 + 2,
  2502  		REG_X0 + 3,
  2503  		REG_X0 + 4,
  2504  		REG_X0 + 5,
  2505  		REG_X0 + 6,
  2506  		REG_X0 + 7,
  2507  		REG_X0 + 8,
  2508  		REG_X0 + 9,
  2509  		REG_X0 + 10,
  2510  		REG_X0 + 11,
  2511  		REG_X0 + 12,
  2512  		REG_X0 + 13,
  2513  		REG_X0 + 14,
  2514  		REG_X0 + 15:
  2515  		return Yxr
  2516  
  2517  	case REG_Y0 + 0,
  2518  		REG_Y0 + 1,
  2519  		REG_Y0 + 2,
  2520  		REG_Y0 + 3,
  2521  		REG_Y0 + 4,
  2522  		REG_Y0 + 5,
  2523  		REG_Y0 + 6,
  2524  		REG_Y0 + 7,
  2525  		REG_Y0 + 8,
  2526  		REG_Y0 + 9,
  2527  		REG_Y0 + 10,
  2528  		REG_Y0 + 11,
  2529  		REG_Y0 + 12,
  2530  		REG_Y0 + 13,
  2531  		REG_Y0 + 14,
  2532  		REG_Y0 + 15:
  2533  		return Yyr
  2534  
  2535  	case REG_CS:
  2536  		return Ycs
  2537  	case REG_SS:
  2538  		return Yss
  2539  	case REG_DS:
  2540  		return Yds
  2541  	case REG_ES:
  2542  		return Yes
  2543  	case REG_FS:
  2544  		return Yfs
  2545  	case REG_GS:
  2546  		return Ygs
  2547  	case REG_TLS:
  2548  		return Ytls
  2549  
  2550  	case REG_GDTR:
  2551  		return Ygdtr
  2552  	case REG_IDTR:
  2553  		return Yidtr
  2554  	case REG_LDTR:
  2555  		return Yldtr
  2556  	case REG_MSW:
  2557  		return Ymsw
  2558  	case REG_TASK:
  2559  		return Ytask
  2560  
  2561  	case REG_CR + 0:
  2562  		return Ycr0
  2563  	case REG_CR + 1:
  2564  		return Ycr1
  2565  	case REG_CR + 2:
  2566  		return Ycr2
  2567  	case REG_CR + 3:
  2568  		return Ycr3
  2569  	case REG_CR + 4:
  2570  		return Ycr4
  2571  	case REG_CR + 5:
  2572  		return Ycr5
  2573  	case REG_CR + 6:
  2574  		return Ycr6
  2575  	case REG_CR + 7:
  2576  		return Ycr7
  2577  	case REG_CR + 8:
  2578  		return Ycr8
  2579  
  2580  	case REG_DR + 0:
  2581  		return Ydr0
  2582  	case REG_DR + 1:
  2583  		return Ydr1
  2584  	case REG_DR + 2:
  2585  		return Ydr2
  2586  	case REG_DR + 3:
  2587  		return Ydr3
  2588  	case REG_DR + 4:
  2589  		return Ydr4
  2590  	case REG_DR + 5:
  2591  		return Ydr5
  2592  	case REG_DR + 6:
  2593  		return Ydr6
  2594  	case REG_DR + 7:
  2595  		return Ydr7
  2596  
  2597  	case REG_TR + 0:
  2598  		return Ytr0
  2599  	case REG_TR + 1:
  2600  		return Ytr1
  2601  	case REG_TR + 2:
  2602  		return Ytr2
  2603  	case REG_TR + 3:
  2604  		return Ytr3
  2605  	case REG_TR + 4:
  2606  		return Ytr4
  2607  	case REG_TR + 5:
  2608  		return Ytr5
  2609  	case REG_TR + 6:
  2610  		return Ytr6
  2611  	case REG_TR + 7:
  2612  		return Ytr7
  2613  	}
  2614  
  2615  	return Yxxx
  2616  }
  2617  
  2618  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2619  	var i int
  2620  
  2621  	switch index {
  2622  	default:
  2623  		goto bad
  2624  
  2625  	case REG_NONE:
  2626  		i = 4 << 3
  2627  		goto bas
  2628  
  2629  	case REG_R8,
  2630  		REG_R9,
  2631  		REG_R10,
  2632  		REG_R11,
  2633  		REG_R12,
  2634  		REG_R13,
  2635  		REG_R14,
  2636  		REG_R15:
  2637  		if ctxt.Asmode != 64 {
  2638  			goto bad
  2639  		}
  2640  		fallthrough
  2641  
  2642  	case REG_AX,
  2643  		REG_CX,
  2644  		REG_DX,
  2645  		REG_BX,
  2646  		REG_BP,
  2647  		REG_SI,
  2648  		REG_DI:
  2649  		i = reg[index] << 3
  2650  	}
  2651  
  2652  	switch scale {
  2653  	default:
  2654  		goto bad
  2655  
  2656  	case 1:
  2657  		break
  2658  
  2659  	case 2:
  2660  		i |= 1 << 6
  2661  
  2662  	case 4:
  2663  		i |= 2 << 6
  2664  
  2665  	case 8:
  2666  		i |= 3 << 6
  2667  	}
  2668  
  2669  bas:
  2670  	switch base {
  2671  	default:
  2672  		goto bad
  2673  
  2674  	case REG_NONE: /* must be mod=00 */
  2675  		i |= 5
  2676  
  2677  	case REG_R8,
  2678  		REG_R9,
  2679  		REG_R10,
  2680  		REG_R11,
  2681  		REG_R12,
  2682  		REG_R13,
  2683  		REG_R14,
  2684  		REG_R15:
  2685  		if ctxt.Asmode != 64 {
  2686  			goto bad
  2687  		}
  2688  		fallthrough
  2689  
  2690  	case REG_AX,
  2691  		REG_CX,
  2692  		REG_DX,
  2693  		REG_BX,
  2694  		REG_SP,
  2695  		REG_BP,
  2696  		REG_SI,
  2697  		REG_DI:
  2698  		i |= reg[base]
  2699  	}
  2700  
  2701  	ctxt.AsmBuf.Put1(byte(i))
  2702  	return
  2703  
  2704  bad:
  2705  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2706  	ctxt.AsmBuf.Put1(0)
  2707  	return
  2708  }
  2709  
  2710  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2711  	var rel obj.Reloc
  2712  
  2713  	v := vaddr(ctxt, p, a, &rel)
  2714  	if rel.Siz != 0 {
  2715  		if rel.Siz != 4 {
  2716  			ctxt.Diag("bad reloc")
  2717  		}
  2718  		r := obj.Addrel(ctxt.Cursym)
  2719  		*r = rel
  2720  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2721  	}
  2722  
  2723  	ctxt.AsmBuf.PutInt32(int32(v))
  2724  }
  2725  
  2726  /*
  2727  static void
  2728  relput8(Prog *p, Addr *a)
  2729  {
  2730  	vlong v;
  2731  	Reloc rel, *r;
  2732  
  2733  	v = vaddr(ctxt, p, a, &rel);
  2734  	if(rel.siz != 0) {
  2735  		r = addrel(ctxt->cursym);
  2736  		*r = rel;
  2737  		r->siz = 8;
  2738  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2739  	}
  2740  	put8(ctxt, v);
  2741  }
  2742  */
  2743  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2744  	if r != nil {
  2745  		*r = obj.Reloc{}
  2746  	}
  2747  
  2748  	switch a.Name {
  2749  	case obj.NAME_STATIC,
  2750  		obj.NAME_GOTREF,
  2751  		obj.NAME_EXTERN:
  2752  		s := a.Sym
  2753  		if r == nil {
  2754  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2755  			log.Fatalf("reloc")
  2756  		}
  2757  
  2758  		if a.Name == obj.NAME_GOTREF {
  2759  			r.Siz = 4
  2760  			r.Type = obj.R_GOTPCREL
  2761  		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
  2762  			r.Siz = 4
  2763  			r.Type = obj.R_ADDR
  2764  		} else {
  2765  			r.Siz = 4
  2766  			r.Type = obj.R_PCREL
  2767  		}
  2768  
  2769  		r.Off = -1 // caller must fill in
  2770  		r.Sym = s
  2771  		r.Add = a.Offset
  2772  
  2773  		return 0
  2774  	}
  2775  
  2776  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2777  		if r == nil {
  2778  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2779  			log.Fatalf("reloc")
  2780  		}
  2781  
  2782  		if !ctxt.Flag_shared || isAndroid {
  2783  			r.Type = obj.R_TLS_LE
  2784  			r.Siz = 4
  2785  			r.Off = -1 // caller must fill in
  2786  			r.Add = a.Offset
  2787  		}
  2788  		return 0
  2789  	}
  2790  
  2791  	return a.Offset
  2792  }
  2793  
  2794  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2795  	var base int
  2796  	var rel obj.Reloc
  2797  
  2798  	rex &= 0x40 | Rxr
  2799  	switch {
  2800  	case int64(int32(a.Offset)) == a.Offset:
  2801  		// Offset fits in sign-extended 32 bits.
  2802  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2803  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2804  		// This is allowed for assembly that wants to use 32-bit hex
  2805  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2806  	default:
  2807  		ctxt.Diag("offset too large in %s", p)
  2808  	}
  2809  	v := int32(a.Offset)
  2810  	rel.Siz = 0
  2811  
  2812  	switch a.Type {
  2813  	case obj.TYPE_ADDR:
  2814  		if a.Name == obj.NAME_NONE {
  2815  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2816  		}
  2817  		if a.Index == REG_TLS {
  2818  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2819  		}
  2820  		goto bad
  2821  
  2822  	case obj.TYPE_REG:
  2823  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2824  			goto bad
  2825  		}
  2826  		if v != 0 {
  2827  			goto bad
  2828  		}
  2829  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2830  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2831  		return
  2832  	}
  2833  
  2834  	if a.Type != obj.TYPE_MEM {
  2835  		goto bad
  2836  	}
  2837  
  2838  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2839  		base := int(a.Reg)
  2840  		switch a.Name {
  2841  		case obj.NAME_EXTERN,
  2842  			obj.NAME_GOTREF,
  2843  			obj.NAME_STATIC:
  2844  			if !isextern(a.Sym) && p.Mode == 64 {
  2845  				goto bad
  2846  			}
  2847  			if p.Mode == 32 && ctxt.Flag_shared {
  2848  				base = REG_CX
  2849  			} else {
  2850  				base = REG_NONE
  2851  			}
  2852  			v = int32(vaddr(ctxt, p, a, &rel))
  2853  
  2854  		case obj.NAME_AUTO,
  2855  			obj.NAME_PARAM:
  2856  			base = REG_SP
  2857  		}
  2858  
  2859  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2860  		if base == REG_NONE {
  2861  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2862  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2863  			goto putrelv
  2864  		}
  2865  
  2866  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2867  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2868  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2869  			return
  2870  		}
  2871  
  2872  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2873  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2874  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2875  			ctxt.AsmBuf.Put1(byte(v))
  2876  			return
  2877  		}
  2878  
  2879  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2880  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2881  		goto putrelv
  2882  	}
  2883  
  2884  	base = int(a.Reg)
  2885  	switch a.Name {
  2886  	case obj.NAME_STATIC,
  2887  		obj.NAME_GOTREF,
  2888  		obj.NAME_EXTERN:
  2889  		if a.Sym == nil {
  2890  			ctxt.Diag("bad addr: %v", p)
  2891  		}
  2892  		if p.Mode == 32 && ctxt.Flag_shared {
  2893  			base = REG_CX
  2894  		} else {
  2895  			base = REG_NONE
  2896  		}
  2897  		v = int32(vaddr(ctxt, p, a, &rel))
  2898  
  2899  	case obj.NAME_AUTO,
  2900  		obj.NAME_PARAM:
  2901  		base = REG_SP
  2902  	}
  2903  
  2904  	if base == REG_TLS {
  2905  		v = int32(vaddr(ctxt, p, a, &rel))
  2906  	}
  2907  
  2908  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2909  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2910  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2911  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2912  				ctxt.Diag("%v has offset against gotref", p)
  2913  			}
  2914  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2915  			goto putrelv
  2916  		}
  2917  
  2918  		// temporary
  2919  		ctxt.AsmBuf.Put2(
  2920  			byte(0<<6|4<<0|r<<3), // sib present
  2921  			0<<6|4<<3|5<<0,       // DS:d32
  2922  		)
  2923  		goto putrelv
  2924  	}
  2925  
  2926  	if base == REG_SP || base == REG_R12 {
  2927  		if v == 0 {
  2928  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2929  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2930  			return
  2931  		}
  2932  
  2933  		if v >= -128 && v < 128 {
  2934  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2935  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2936  			ctxt.AsmBuf.Put1(byte(v))
  2937  			return
  2938  		}
  2939  
  2940  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2941  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2942  		goto putrelv
  2943  	}
  2944  
  2945  	if REG_AX <= base && base <= REG_R15 {
  2946  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2947  			rel = obj.Reloc{}
  2948  			rel.Type = obj.R_TLS_LE
  2949  			rel.Siz = 4
  2950  			rel.Sym = nil
  2951  			rel.Add = int64(v)
  2952  			v = 0
  2953  		}
  2954  
  2955  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2956  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2957  			return
  2958  		}
  2959  
  2960  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2961  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2962  			return
  2963  		}
  2964  
  2965  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2966  		goto putrelv
  2967  	}
  2968  
  2969  	goto bad
  2970  
  2971  putrelv:
  2972  	if rel.Siz != 0 {
  2973  		if rel.Siz != 4 {
  2974  			ctxt.Diag("bad rel")
  2975  			goto bad
  2976  		}
  2977  
  2978  		r := obj.Addrel(ctxt.Cursym)
  2979  		*r = rel
  2980  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2981  	}
  2982  
  2983  	ctxt.AsmBuf.PutInt32(v)
  2984  	return
  2985  
  2986  bad:
  2987  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2988  	return
  2989  }
  2990  
  2991  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2992  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2993  }
  2994  
  2995  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2996  	asmandsz(ctxt, p, a, o, 0, 0)
  2997  }
  2998  
  2999  func bytereg(a *obj.Addr, t *uint8) {
  3000  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3001  		a.Reg += REG_AL - REG_AX
  3002  		*t = 0
  3003  	}
  3004  }
  3005  
  3006  func unbytereg(a *obj.Addr, t *uint8) {
  3007  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3008  		a.Reg += REG_AX - REG_AL
  3009  		*t = 0
  3010  	}
  3011  }
  3012  
  3013  const (
  3014  	E = 0xff
  3015  )
  3016  
  3017  var ymovtab = []Movtab{
  3018  	/* push */
  3019  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3020  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3021  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3022  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3023  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3024  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3025  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3026  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3027  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3028  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3029  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3030  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3031  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3032  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3033  
  3034  	/* pop */
  3035  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3036  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3037  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3038  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3039  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3040  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3041  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3042  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3043  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3044  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3045  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3046  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3047  
  3048  	/* mov seg */
  3049  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3050  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3051  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3052  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3053  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3054  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3055  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3056  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3057  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3058  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3059  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3060  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3061  
  3062  	/* mov cr */
  3063  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3064  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3065  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3066  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3067  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3068  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3069  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3070  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3071  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3072  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3073  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3074  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3075  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3076  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3077  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3078  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3079  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3080  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3081  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3082  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3083  
  3084  	/* mov dr */
  3085  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3086  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3087  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3088  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3089  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3090  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3091  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3092  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3093  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3094  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3095  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3096  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3097  
  3098  	/* mov tr */
  3099  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3100  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3101  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3102  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3103  
  3104  	/* lgdt, sgdt, lidt, sidt */
  3105  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3106  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3107  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3108  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3109  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3110  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3111  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3112  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3113  
  3114  	/* lldt, sldt */
  3115  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3116  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3117  
  3118  	/* lmsw, smsw */
  3119  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3120  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3121  
  3122  	/* ltr, str */
  3123  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3124  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3125  
  3126  	/* load full pointer - unsupported
  3127  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3128  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3129  	*/
  3130  
  3131  	/* double shift */
  3132  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3133  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3134  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3135  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3136  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3137  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3138  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3139  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3140  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3141  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3142  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3143  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3144  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3145  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3146  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3147  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3148  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3149  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3150  
  3151  	/* load TLS base */
  3152  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3153  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3154  	{0, 0, 0, 0, 0, [4]uint8{}},
  3155  }
  3156  
  3157  func isax(a *obj.Addr) bool {
  3158  	switch a.Reg {
  3159  	case REG_AX, REG_AL, REG_AH:
  3160  		return true
  3161  	}
  3162  
  3163  	if a.Index == REG_AX {
  3164  		return true
  3165  	}
  3166  	return false
  3167  }
  3168  
  3169  func subreg(p *obj.Prog, from int, to int) {
  3170  	if false { /* debug['Q'] */
  3171  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3172  	}
  3173  
  3174  	if int(p.From.Reg) == from {
  3175  		p.From.Reg = int16(to)
  3176  		p.Ft = 0
  3177  	}
  3178  
  3179  	if int(p.To.Reg) == from {
  3180  		p.To.Reg = int16(to)
  3181  		p.Tt = 0
  3182  	}
  3183  
  3184  	if int(p.From.Index) == from {
  3185  		p.From.Index = int16(to)
  3186  		p.Ft = 0
  3187  	}
  3188  
  3189  	if int(p.To.Index) == from {
  3190  		p.To.Index = int16(to)
  3191  		p.Tt = 0
  3192  	}
  3193  
  3194  	if false { /* debug['Q'] */
  3195  		fmt.Printf("%v\n", p)
  3196  	}
  3197  }
  3198  
  3199  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3200  	switch op {
  3201  	case Pm, Pe, Pf2, Pf3:
  3202  		if osize != 1 {
  3203  			if op != Pm {
  3204  				ctxt.AsmBuf.Put1(byte(op))
  3205  			}
  3206  			ctxt.AsmBuf.Put1(Pm)
  3207  			z++
  3208  			op = int(o.op[z])
  3209  			break
  3210  		}
  3211  		fallthrough
  3212  
  3213  	default:
  3214  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3215  			ctxt.AsmBuf.Put1(Pm)
  3216  		}
  3217  	}
  3218  
  3219  	ctxt.AsmBuf.Put1(byte(op))
  3220  	return z
  3221  }
  3222  
  3223  var bpduff1 = []byte{
  3224  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3225  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3226  }
  3227  
  3228  var bpduff2 = []byte{
  3229  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3230  }
  3231  
  3232  // Emit VEX prefix and opcode byte.
  3233  // The three addresses are the r/m, vvvv, and reg fields.
  3234  // The reg and rm arguments appear in the same order as the
  3235  // arguments to asmand, which typically follows the call to asmvex.
  3236  // The final two arguments are the VEX prefix (see encoding above)
  3237  // and the opcode byte.
  3238  // For details about vex prefix see:
  3239  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3240  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3241  	ctxt.Vexflag = 1
  3242  	rexR := 0
  3243  	if r != nil {
  3244  		rexR = regrex[r.Reg] & Rxr
  3245  	}
  3246  	rexB := 0
  3247  	rexX := 0
  3248  	if rm != nil {
  3249  		rexB = regrex[rm.Reg] & Rxb
  3250  		rexX = regrex[rm.Index] & Rxx
  3251  	}
  3252  	vexM := (vex >> 3) & 0xF
  3253  	vexWLP := vex & 0x87
  3254  	vexV := byte(0)
  3255  	if v != nil {
  3256  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3257  	}
  3258  	vexV ^= 0xF
  3259  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3260  		// Can use 2-byte encoding.
  3261  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3262  	} else {
  3263  		// Must use 3-byte encoding.
  3264  		ctxt.AsmBuf.Put3(0xc4,
  3265  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3266  			vexV<<3|vexWLP,
  3267  		)
  3268  	}
  3269  	ctxt.AsmBuf.Put1(opcode)
  3270  }
  3271  
  3272  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3273  	ctxt.Curp = p // TODO
  3274  
  3275  	o := opindex[p.As&obj.AMask]
  3276  
  3277  	if o == nil {
  3278  		ctxt.Diag("asmins: missing op %v", p)
  3279  		return
  3280  	}
  3281  
  3282  	pre := prefixof(ctxt, p, &p.From)
  3283  	if pre != 0 {
  3284  		ctxt.AsmBuf.Put1(byte(pre))
  3285  	}
  3286  	pre = prefixof(ctxt, p, &p.To)
  3287  	if pre != 0 {
  3288  		ctxt.AsmBuf.Put1(byte(pre))
  3289  	}
  3290  
  3291  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3292  	// which encodes as SHRQ $32(DX*0), AX.
  3293  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3294  	// Change encoding generated by assemblers and compilers and remove.
  3295  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3296  		p.From3 = new(obj.Addr)
  3297  		p.From3.Type = obj.TYPE_REG
  3298  		p.From3.Reg = p.From.Index
  3299  		p.From.Index = 0
  3300  	}
  3301  
  3302  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3303  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3304  	switch p.As {
  3305  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3306  		if p.From3Type() == obj.TYPE_NONE {
  3307  			p.From3 = new(obj.Addr)
  3308  			*p.From3 = p.From
  3309  			p.From = obj.Addr{}
  3310  			p.From.Type = obj.TYPE_CONST
  3311  			p.From.Offset = p.To.Offset
  3312  			p.To.Offset = 0
  3313  		}
  3314  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3315  		if p.From3Type() == obj.TYPE_NONE {
  3316  			p.From3 = new(obj.Addr)
  3317  			*p.From3 = p.To
  3318  			p.To = obj.Addr{}
  3319  			p.To.Type = obj.TYPE_CONST
  3320  			p.To.Offset = p.From3.Offset
  3321  			p.From3.Offset = 0
  3322  		}
  3323  	}
  3324  
  3325  	if p.Ft == 0 {
  3326  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3327  	}
  3328  	if p.Tt == 0 {
  3329  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3330  	}
  3331  
  3332  	ft := int(p.Ft) * Ymax
  3333  	f3t := Ynone * Ymax
  3334  	if p.From3 != nil {
  3335  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3336  	}
  3337  	tt := int(p.Tt) * Ymax
  3338  
  3339  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3340  	z := 0
  3341  	var a *obj.Addr
  3342  	var l int
  3343  	var op int
  3344  	var q *obj.Prog
  3345  	var r *obj.Reloc
  3346  	var rel obj.Reloc
  3347  	var v int64
  3348  	for i := range o.ytab {
  3349  		yt := &o.ytab[i]
  3350  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3351  			switch o.prefix {
  3352  			case Px1: /* first option valid only in 32-bit mode */
  3353  				if ctxt.Mode == 64 && z == 0 {
  3354  					z += int(yt.zoffset) + xo
  3355  					continue
  3356  				}
  3357  			case Pq: /* 16 bit escape and opcode escape */
  3358  				ctxt.AsmBuf.Put2(Pe, Pm)
  3359  
  3360  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3361  				ctxt.Rexflag |= Pw
  3362  				ctxt.AsmBuf.Put2(Pe, Pm)
  3363  
  3364  			case Pq4: /*  66 0F 38 */
  3365  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3366  
  3367  			case Pf2, /* xmm opcode escape */
  3368  				Pf3:
  3369  				ctxt.AsmBuf.Put2(o.prefix, Pm)
  3370  
  3371  			case Pef3:
  3372  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3373  
  3374  			case Pfw: /* xmm opcode escape + REX.W */
  3375  				ctxt.Rexflag |= Pw
  3376  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3377  
  3378  			case Pm: /* opcode escape */
  3379  				ctxt.AsmBuf.Put1(Pm)
  3380  
  3381  			case Pe: /* 16 bit escape */
  3382  				ctxt.AsmBuf.Put1(Pe)
  3383  
  3384  			case Pw: /* 64-bit escape */
  3385  				if p.Mode != 64 {
  3386  					ctxt.Diag("asmins: illegal 64: %v", p)
  3387  				}
  3388  				ctxt.Rexflag |= Pw
  3389  
  3390  			case Pw8: /* 64-bit escape if z >= 8 */
  3391  				if z >= 8 {
  3392  					if p.Mode != 64 {
  3393  						ctxt.Diag("asmins: illegal 64: %v", p)
  3394  					}
  3395  					ctxt.Rexflag |= Pw
  3396  				}
  3397  
  3398  			case Pb: /* botch */
  3399  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3400  					goto bad
  3401  				}
  3402  				// NOTE(rsc): This is probably safe to do always,
  3403  				// but when enabled it chooses different encodings
  3404  				// than the old cmd/internal/obj/i386 code did,
  3405  				// which breaks our "same bits out" checks.
  3406  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3407  				// in the original obj/i386, and it would encode
  3408  				// (using a valid, shorter form) as 3c 00 if we enabled
  3409  				// the call to bytereg here.
  3410  				if p.Mode == 64 {
  3411  					bytereg(&p.From, &p.Ft)
  3412  					bytereg(&p.To, &p.Tt)
  3413  				}
  3414  
  3415  			case P32: /* 32 bit but illegal if 64-bit mode */
  3416  				if p.Mode == 64 {
  3417  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3418  				}
  3419  
  3420  			case Py: /* 64-bit only, no prefix */
  3421  				if p.Mode != 64 {
  3422  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3423  				}
  3424  
  3425  			case Py1: /* 64-bit only if z < 1, no prefix */
  3426  				if z < 1 && p.Mode != 64 {
  3427  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3428  				}
  3429  
  3430  			case Py3: /* 64-bit only if z < 3, no prefix */
  3431  				if z < 3 && p.Mode != 64 {
  3432  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3433  				}
  3434  			}
  3435  
  3436  			if z >= len(o.op) {
  3437  				log.Fatalf("asmins bad table %v", p)
  3438  			}
  3439  			op = int(o.op[z])
  3440  			if op == 0x0f {
  3441  				ctxt.AsmBuf.Put1(byte(op))
  3442  				z++
  3443  				op = int(o.op[z])
  3444  			}
  3445  
  3446  			switch yt.zcase {
  3447  			default:
  3448  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3449  				return
  3450  
  3451  			case Zpseudo:
  3452  				break
  3453  
  3454  			case Zlit:
  3455  				for ; ; z++ {
  3456  					op = int(o.op[z])
  3457  					if op == 0 {
  3458  						break
  3459  					}
  3460  					ctxt.AsmBuf.Put1(byte(op))
  3461  				}
  3462  
  3463  			case Zlitm_r:
  3464  				for ; ; z++ {
  3465  					op = int(o.op[z])
  3466  					if op == 0 {
  3467  						break
  3468  					}
  3469  					ctxt.AsmBuf.Put1(byte(op))
  3470  				}
  3471  				asmand(ctxt, p, &p.From, &p.To)
  3472  
  3473  			case Zmb_r:
  3474  				bytereg(&p.From, &p.Ft)
  3475  				fallthrough
  3476  
  3477  			case Zm_r:
  3478  				ctxt.AsmBuf.Put1(byte(op))
  3479  				asmand(ctxt, p, &p.From, &p.To)
  3480  
  3481  			case Zm2_r:
  3482  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3483  				asmand(ctxt, p, &p.From, &p.To)
  3484  
  3485  			case Zm_r_xm:
  3486  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3487  				asmand(ctxt, p, &p.From, &p.To)
  3488  
  3489  			case Zm_r_xm_nr:
  3490  				ctxt.Rexflag = 0
  3491  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3492  				asmand(ctxt, p, &p.From, &p.To)
  3493  
  3494  			case Zm_r_i_xm:
  3495  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3496  				asmand(ctxt, p, &p.From, p.From3)
  3497  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3498  
  3499  			case Zibm_r, Zibr_m:
  3500  				for {
  3501  					tmp1 := z
  3502  					z++
  3503  					op = int(o.op[tmp1])
  3504  					if op == 0 {
  3505  						break
  3506  					}
  3507  					ctxt.AsmBuf.Put1(byte(op))
  3508  				}
  3509  				if yt.zcase == Zibr_m {
  3510  					asmand(ctxt, p, &p.To, p.From3)
  3511  				} else {
  3512  					asmand(ctxt, p, p.From3, &p.To)
  3513  				}
  3514  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3515  
  3516  			case Zaut_r:
  3517  				ctxt.AsmBuf.Put1(0x8d) // leal
  3518  				if p.From.Type != obj.TYPE_ADDR {
  3519  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3520  				}
  3521  				p.From.Type = obj.TYPE_MEM
  3522  				asmand(ctxt, p, &p.From, &p.To)
  3523  				p.From.Type = obj.TYPE_ADDR
  3524  
  3525  			case Zm_o:
  3526  				ctxt.AsmBuf.Put1(byte(op))
  3527  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3528  
  3529  			case Zr_m:
  3530  				ctxt.AsmBuf.Put1(byte(op))
  3531  				asmand(ctxt, p, &p.To, &p.From)
  3532  
  3533  			case Zvex_rm_v_r:
  3534  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3535  				asmand(ctxt, p, &p.From, &p.To)
  3536  
  3537  			case Zvex_i_r_v:
  3538  				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3539  				regnum := byte(0x7)
  3540  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3541  					regnum &= byte(p.From3.Reg - REG_X0)
  3542  				} else {
  3543  					regnum &= byte(p.From3.Reg - REG_Y0)
  3544  				}
  3545  				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
  3546  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3547  
  3548  			case Zvex_i_rm_v_r:
  3549  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3550  				asmand(ctxt, p, &p.From, &p.To)
  3551  				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
  3552  
  3553  			case Zvex_i_rm_r:
  3554  				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3555  				asmand(ctxt, p, p.From3, &p.To)
  3556  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3557  
  3558  			case Zvex_v_rm_r:
  3559  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3560  				asmand(ctxt, p, p.From3, &p.To)
  3561  
  3562  			case Zvex_r_v_rm:
  3563  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3564  				asmand(ctxt, p, &p.To, &p.From)
  3565  
  3566  			case Zr_m_xm:
  3567  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3568  				asmand(ctxt, p, &p.To, &p.From)
  3569  
  3570  			case Zr_m_xm_nr:
  3571  				ctxt.Rexflag = 0
  3572  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3573  				asmand(ctxt, p, &p.To, &p.From)
  3574  
  3575  			case Zo_m:
  3576  				ctxt.AsmBuf.Put1(byte(op))
  3577  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3578  
  3579  			case Zcallindreg:
  3580  				r = obj.Addrel(ctxt.Cursym)
  3581  				r.Off = int32(p.Pc)
  3582  				r.Type = obj.R_CALLIND
  3583  				r.Siz = 0
  3584  				fallthrough
  3585  
  3586  			case Zo_m64:
  3587  				ctxt.AsmBuf.Put1(byte(op))
  3588  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3589  
  3590  			case Zm_ibo:
  3591  				ctxt.AsmBuf.Put1(byte(op))
  3592  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3593  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3594  
  3595  			case Zibo_m:
  3596  				ctxt.AsmBuf.Put1(byte(op))
  3597  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3598  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3599  
  3600  			case Zibo_m_xm:
  3601  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3602  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3603  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3604  
  3605  			case Z_ib, Zib_:
  3606  				if yt.zcase == Zib_ {
  3607  					a = &p.From
  3608  				} else {
  3609  					a = &p.To
  3610  				}
  3611  				ctxt.AsmBuf.Put1(byte(op))
  3612  				if p.As == AXABORT {
  3613  					ctxt.AsmBuf.Put1(o.op[z+1])
  3614  				}
  3615  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3616  
  3617  			case Zib_rp:
  3618  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3619  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3620  
  3621  			case Zil_rp:
  3622  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3623  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3624  				if o.prefix == Pe {
  3625  					v = vaddr(ctxt, p, &p.From, nil)
  3626  					ctxt.AsmBuf.PutInt16(int16(v))
  3627  				} else {
  3628  					relput4(ctxt, p, &p.From)
  3629  				}
  3630  
  3631  			case Zo_iw:
  3632  				ctxt.AsmBuf.Put1(byte(op))
  3633  				if p.From.Type != obj.TYPE_NONE {
  3634  					v = vaddr(ctxt, p, &p.From, nil)
  3635  					ctxt.AsmBuf.PutInt16(int16(v))
  3636  				}
  3637  
  3638  			case Ziq_rp:
  3639  				v = vaddr(ctxt, p, &p.From, &rel)
  3640  				l = int(v >> 32)
  3641  				if l == 0 && rel.Siz != 8 {
  3642  					//p->mark |= 0100;
  3643  					//print("zero: %llux %v\n", v, p);
  3644  					ctxt.Rexflag &^= (0x40 | Rxw)
  3645  
  3646  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3647  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3648  					if rel.Type != 0 {
  3649  						r = obj.Addrel(ctxt.Cursym)
  3650  						*r = rel
  3651  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3652  					}
  3653  
  3654  					ctxt.AsmBuf.PutInt32(int32(v))
  3655  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3656  
  3657  					//p->mark |= 0100;
  3658  					//print("sign: %llux %v\n", v, p);
  3659  					ctxt.AsmBuf.Put1(0xc7)
  3660  					asmando(ctxt, p, &p.To, 0)
  3661  
  3662  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3663  				} else {
  3664  					//print("all: %llux %v\n", v, p);
  3665  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3666  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3667  					if rel.Type != 0 {
  3668  						r = obj.Addrel(ctxt.Cursym)
  3669  						*r = rel
  3670  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3671  					}
  3672  
  3673  					ctxt.AsmBuf.PutInt64(v)
  3674  				}
  3675  
  3676  			case Zib_rr:
  3677  				ctxt.AsmBuf.Put1(byte(op))
  3678  				asmand(ctxt, p, &p.To, &p.To)
  3679  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3680  
  3681  			case Z_il, Zil_:
  3682  				if yt.zcase == Zil_ {
  3683  					a = &p.From
  3684  				} else {
  3685  					a = &p.To
  3686  				}
  3687  				ctxt.AsmBuf.Put1(byte(op))
  3688  				if o.prefix == Pe {
  3689  					v = vaddr(ctxt, p, a, nil)
  3690  					ctxt.AsmBuf.PutInt16(int16(v))
  3691  				} else {
  3692  					relput4(ctxt, p, a)
  3693  				}
  3694  
  3695  			case Zm_ilo, Zilo_m:
  3696  				ctxt.AsmBuf.Put1(byte(op))
  3697  				if yt.zcase == Zilo_m {
  3698  					a = &p.From
  3699  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3700  				} else {
  3701  					a = &p.To
  3702  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3703  				}
  3704  
  3705  				if o.prefix == Pe {
  3706  					v = vaddr(ctxt, p, a, nil)
  3707  					ctxt.AsmBuf.PutInt16(int16(v))
  3708  				} else {
  3709  					relput4(ctxt, p, a)
  3710  				}
  3711  
  3712  			case Zil_rr:
  3713  				ctxt.AsmBuf.Put1(byte(op))
  3714  				asmand(ctxt, p, &p.To, &p.To)
  3715  				if o.prefix == Pe {
  3716  					v = vaddr(ctxt, p, &p.From, nil)
  3717  					ctxt.AsmBuf.PutInt16(int16(v))
  3718  				} else {
  3719  					relput4(ctxt, p, &p.From)
  3720  				}
  3721  
  3722  			case Z_rp:
  3723  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3724  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3725  
  3726  			case Zrp_:
  3727  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3728  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3729  
  3730  			case Zclr:
  3731  				ctxt.Rexflag &^= Pw
  3732  				ctxt.AsmBuf.Put1(byte(op))
  3733  				asmand(ctxt, p, &p.To, &p.To)
  3734  
  3735  			case Zcallcon, Zjmpcon:
  3736  				if yt.zcase == Zcallcon {
  3737  					ctxt.AsmBuf.Put1(byte(op))
  3738  				} else {
  3739  					ctxt.AsmBuf.Put1(o.op[z+1])
  3740  				}
  3741  				r = obj.Addrel(ctxt.Cursym)
  3742  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3743  				r.Type = obj.R_PCREL
  3744  				r.Siz = 4
  3745  				r.Add = p.To.Offset
  3746  				ctxt.AsmBuf.PutInt32(0)
  3747  
  3748  			case Zcallind:
  3749  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3750  				r = obj.Addrel(ctxt.Cursym)
  3751  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3752  				r.Type = obj.R_ADDR
  3753  				r.Siz = 4
  3754  				r.Add = p.To.Offset
  3755  				r.Sym = p.To.Sym
  3756  				ctxt.AsmBuf.PutInt32(0)
  3757  
  3758  			case Zcall, Zcallduff:
  3759  				if p.To.Sym == nil {
  3760  					ctxt.Diag("call without target")
  3761  					log.Fatalf("bad code")
  3762  				}
  3763  
  3764  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3765  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3766  				}
  3767  
  3768  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3769  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3770  					// (the call jumps into the middle of the function).
  3771  					// This makes it possible to see call sites for duffcopy/duffzero in
  3772  					// BP-based profiling tools like Linux perf (which is the
  3773  					// whole point of obj.Framepointer_enabled).
  3774  					// MOVQ BP, -16(SP)
  3775  					// LEAQ -16(SP), BP
  3776  					ctxt.AsmBuf.Put(bpduff1)
  3777  				}
  3778  				ctxt.AsmBuf.Put1(byte(op))
  3779  				r = obj.Addrel(ctxt.Cursym)
  3780  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3781  				r.Sym = p.To.Sym
  3782  				r.Add = p.To.Offset
  3783  				r.Type = obj.R_CALL
  3784  				r.Siz = 4
  3785  				ctxt.AsmBuf.PutInt32(0)
  3786  
  3787  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3788  					// Pop BP pushed above.
  3789  					// MOVQ 0(BP), BP
  3790  					ctxt.AsmBuf.Put(bpduff2)
  3791  				}
  3792  
  3793  			// TODO: jump across functions needs reloc
  3794  			case Zbr, Zjmp, Zloop:
  3795  				if p.As == AXBEGIN {
  3796  					ctxt.AsmBuf.Put1(byte(op))
  3797  				}
  3798  				if p.To.Sym != nil {
  3799  					if yt.zcase != Zjmp {
  3800  						ctxt.Diag("branch to ATEXT")
  3801  						log.Fatalf("bad code")
  3802  					}
  3803  
  3804  					ctxt.AsmBuf.Put1(o.op[z+1])
  3805  					r = obj.Addrel(ctxt.Cursym)
  3806  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3807  					r.Sym = p.To.Sym
  3808  					r.Type = obj.R_PCREL
  3809  					r.Siz = 4
  3810  					ctxt.AsmBuf.PutInt32(0)
  3811  					break
  3812  				}
  3813  
  3814  				// Assumes q is in this function.
  3815  				// TODO: Check in input, preserve in brchain.
  3816  
  3817  				// Fill in backward jump now.
  3818  				q = p.Pcond
  3819  
  3820  				if q == nil {
  3821  					ctxt.Diag("jmp/branch/loop without target")
  3822  					log.Fatalf("bad code")
  3823  				}
  3824  
  3825  				if p.Back&1 != 0 {
  3826  					v = q.Pc - (p.Pc + 2)
  3827  					if v >= -128 && p.As != AXBEGIN {
  3828  						if p.As == AJCXZL {
  3829  							ctxt.AsmBuf.Put1(0x67)
  3830  						}
  3831  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3832  					} else if yt.zcase == Zloop {
  3833  						ctxt.Diag("loop too far: %v", p)
  3834  					} else {
  3835  						v -= 5 - 2
  3836  						if p.As == AXBEGIN {
  3837  							v--
  3838  						}
  3839  						if yt.zcase == Zbr {
  3840  							ctxt.AsmBuf.Put1(0x0f)
  3841  							v--
  3842  						}
  3843  
  3844  						ctxt.AsmBuf.Put1(o.op[z+1])
  3845  						ctxt.AsmBuf.PutInt32(int32(v))
  3846  					}
  3847  
  3848  					break
  3849  				}
  3850  
  3851  				// Annotate target; will fill in later.
  3852  				p.Forwd = q.Rel
  3853  
  3854  				q.Rel = p
  3855  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3856  					if p.As == AJCXZL {
  3857  						ctxt.AsmBuf.Put1(0x67)
  3858  					}
  3859  					ctxt.AsmBuf.Put2(byte(op), 0)
  3860  				} else if yt.zcase == Zloop {
  3861  					ctxt.Diag("loop too far: %v", p)
  3862  				} else {
  3863  					if yt.zcase == Zbr {
  3864  						ctxt.AsmBuf.Put1(0x0f)
  3865  					}
  3866  					ctxt.AsmBuf.Put1(o.op[z+1])
  3867  					ctxt.AsmBuf.PutInt32(0)
  3868  				}
  3869  
  3870  				break
  3871  
  3872  			/*
  3873  				v = q->pc - p->pc - 2;
  3874  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3875  					*ctxt->andptr++ = op;
  3876  					*ctxt->andptr++ = v;
  3877  				} else {
  3878  					v -= 5-2;
  3879  					if(yt.zcase == Zbr) {
  3880  						*ctxt->andptr++ = 0x0f;
  3881  						v--;
  3882  					}
  3883  					*ctxt->andptr++ = o->op[z+1];
  3884  					*ctxt->andptr++ = v;
  3885  					*ctxt->andptr++ = v>>8;
  3886  					*ctxt->andptr++ = v>>16;
  3887  					*ctxt->andptr++ = v>>24;
  3888  				}
  3889  			*/
  3890  
  3891  			case Zbyte:
  3892  				v = vaddr(ctxt, p, &p.From, &rel)
  3893  				if rel.Siz != 0 {
  3894  					rel.Siz = uint8(op)
  3895  					r = obj.Addrel(ctxt.Cursym)
  3896  					*r = rel
  3897  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3898  				}
  3899  
  3900  				ctxt.AsmBuf.Put1(byte(v))
  3901  				if op > 1 {
  3902  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3903  					if op > 2 {
  3904  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3905  						if op > 4 {
  3906  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3907  						}
  3908  					}
  3909  				}
  3910  			}
  3911  
  3912  			return
  3913  		}
  3914  		z += int(yt.zoffset) + xo
  3915  	}
  3916  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3917  		var pp obj.Prog
  3918  		var t []byte
  3919  		if p.As == mo[0].as {
  3920  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3921  				t = mo[0].op[:]
  3922  				switch mo[0].code {
  3923  				default:
  3924  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3925  
  3926  				case 0: /* lit */
  3927  					for z = 0; t[z] != E; z++ {
  3928  						ctxt.AsmBuf.Put1(t[z])
  3929  					}
  3930  
  3931  				case 1: /* r,m */
  3932  					ctxt.AsmBuf.Put1(t[0])
  3933  					asmando(ctxt, p, &p.To, int(t[1]))
  3934  
  3935  				case 2: /* m,r */
  3936  					ctxt.AsmBuf.Put1(t[0])
  3937  					asmando(ctxt, p, &p.From, int(t[1]))
  3938  
  3939  				case 3: /* r,m - 2op */
  3940  					ctxt.AsmBuf.Put2(t[0], t[1])
  3941  					asmando(ctxt, p, &p.To, int(t[2]))
  3942  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3943  
  3944  				case 4: /* m,r - 2op */
  3945  					ctxt.AsmBuf.Put2(t[0], t[1])
  3946  					asmando(ctxt, p, &p.From, int(t[2]))
  3947  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3948  
  3949  				case 5: /* load full pointer, trash heap */
  3950  					if t[0] != 0 {
  3951  						ctxt.AsmBuf.Put1(t[0])
  3952  					}
  3953  					switch p.To.Index {
  3954  					default:
  3955  						goto bad
  3956  
  3957  					case REG_DS:
  3958  						ctxt.AsmBuf.Put1(0xc5)
  3959  
  3960  					case REG_SS:
  3961  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3962  
  3963  					case REG_ES:
  3964  						ctxt.AsmBuf.Put1(0xc4)
  3965  
  3966  					case REG_FS:
  3967  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3968  
  3969  					case REG_GS:
  3970  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3971  					}
  3972  
  3973  					asmand(ctxt, p, &p.From, &p.To)
  3974  
  3975  				case 6: /* double shift */
  3976  					if t[0] == Pw {
  3977  						if p.Mode != 64 {
  3978  							ctxt.Diag("asmins: illegal 64: %v", p)
  3979  						}
  3980  						ctxt.Rexflag |= Pw
  3981  						t = t[1:]
  3982  					} else if t[0] == Pe {
  3983  						ctxt.AsmBuf.Put1(Pe)
  3984  						t = t[1:]
  3985  					}
  3986  
  3987  					switch p.From.Type {
  3988  					default:
  3989  						goto bad
  3990  
  3991  					case obj.TYPE_CONST:
  3992  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3993  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3994  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3995  
  3996  					case obj.TYPE_REG:
  3997  						switch p.From.Reg {
  3998  						default:
  3999  							goto bad
  4000  
  4001  						case REG_CL, REG_CX:
  4002  							ctxt.AsmBuf.Put2(0x0f, t[1])
  4003  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4004  						}
  4005  					}
  4006  
  4007  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4008  				// where you load the TLS base register into a register and then index off that
  4009  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4010  				// are handled in prefixof above and should not be listed here.
  4011  				case 7: /* mov tls, r */
  4012  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  4013  						ctxt.Diag("invalid load of TLS: %v", p)
  4014  					}
  4015  
  4016  					if p.Mode == 32 {
  4017  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4018  						// where you load the TLS base register into a register and then index off that
  4019  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4020  						// are handled in prefixof above and should not be listed here.
  4021  						switch ctxt.Headtype {
  4022  						default:
  4023  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4024  
  4025  						case obj.Hlinux,
  4026  							obj.Hnacl:
  4027  							if ctxt.Flag_shared {
  4028  								// Note that this is not generating the same insns as the other cases.
  4029  								//     MOV TLS, R_to
  4030  								// becomes
  4031  								//     call __x86.get_pc_thunk.cx
  4032  								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
  4033  								// which is encoded as
  4034  								//     call __x86.get_pc_thunk.cx
  4035  								//     movq 0(%ecx), R_to
  4036  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4037  								// is g, which we can't check here, but will when we assemble the second
  4038  								// instruction.
  4039  								ctxt.AsmBuf.Put1(0xe8)
  4040  								r = obj.Addrel(ctxt.Cursym)
  4041  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4042  								r.Type = obj.R_CALL
  4043  								r.Siz = 4
  4044  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
  4045  								ctxt.AsmBuf.PutInt32(0)
  4046  
  4047  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[REG_CX]|(reg[p.To.Reg]<<3)))
  4048  								r = obj.Addrel(ctxt.Cursym)
  4049  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4050  								r.Type = obj.R_TLS_IE
  4051  								r.Siz = 4
  4052  								r.Add = 2
  4053  								ctxt.AsmBuf.PutInt32(0)
  4054  							} else {
  4055  								// ELF TLS base is 0(GS).
  4056  								pp.From = p.From
  4057  
  4058  								pp.From.Type = obj.TYPE_MEM
  4059  								pp.From.Reg = REG_GS
  4060  								pp.From.Offset = 0
  4061  								pp.From.Index = REG_NONE
  4062  								pp.From.Scale = 0
  4063  								ctxt.AsmBuf.Put2(0x65, // GS
  4064  									0x8B)
  4065  								asmand(ctxt, p, &pp.From, &p.To)
  4066  							}
  4067  						case obj.Hplan9:
  4068  							if ctxt.Plan9privates == nil {
  4069  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4070  							}
  4071  							pp.From = obj.Addr{}
  4072  							pp.From.Type = obj.TYPE_MEM
  4073  							pp.From.Name = obj.NAME_EXTERN
  4074  							pp.From.Sym = ctxt.Plan9privates
  4075  							pp.From.Offset = 0
  4076  							pp.From.Index = REG_NONE
  4077  							ctxt.AsmBuf.Put1(0x8B)
  4078  							asmand(ctxt, p, &pp.From, &p.To)
  4079  
  4080  						case obj.Hwindows:
  4081  							// Windows TLS base is always 0x14(FS).
  4082  							pp.From = p.From
  4083  
  4084  							pp.From.Type = obj.TYPE_MEM
  4085  							pp.From.Reg = REG_FS
  4086  							pp.From.Offset = 0x14
  4087  							pp.From.Index = REG_NONE
  4088  							pp.From.Scale = 0
  4089  							ctxt.AsmBuf.Put2(0x64, // FS
  4090  								0x8B)
  4091  							asmand(ctxt, p, &pp.From, &p.To)
  4092  						}
  4093  						break
  4094  					}
  4095  
  4096  					switch ctxt.Headtype {
  4097  					default:
  4098  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4099  
  4100  					case obj.Hlinux:
  4101  						if !ctxt.Flag_shared {
  4102  							log.Fatalf("unknown TLS base location for linux without -shared")
  4103  						}
  4104  						// Note that this is not generating the same insn as the other cases.
  4105  						//     MOV TLS, R_to
  4106  						// becomes
  4107  						//     movq g@gottpoff(%rip), R_to
  4108  						// which is encoded as
  4109  						//     movq 0(%rip), R_to
  4110  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4111  						// is g, which we can't check here, but will when we assemble the second
  4112  						// instruction.
  4113  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4114  
  4115  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4116  						r = obj.Addrel(ctxt.Cursym)
  4117  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4118  						r.Type = obj.R_TLS_IE
  4119  						r.Siz = 4
  4120  						r.Add = -4
  4121  						ctxt.AsmBuf.PutInt32(0)
  4122  
  4123  					case obj.Hplan9:
  4124  						if ctxt.Plan9privates == nil {
  4125  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4126  						}
  4127  						pp.From = obj.Addr{}
  4128  						pp.From.Type = obj.TYPE_MEM
  4129  						pp.From.Name = obj.NAME_EXTERN
  4130  						pp.From.Sym = ctxt.Plan9privates
  4131  						pp.From.Offset = 0
  4132  						pp.From.Index = REG_NONE
  4133  						ctxt.Rexflag |= Pw
  4134  						ctxt.AsmBuf.Put1(0x8B)
  4135  						asmand(ctxt, p, &pp.From, &p.To)
  4136  
  4137  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4138  						// TLS base is 0(FS).
  4139  						pp.From = p.From
  4140  
  4141  						pp.From.Type = obj.TYPE_MEM
  4142  						pp.From.Name = obj.NAME_NONE
  4143  						pp.From.Reg = REG_NONE
  4144  						pp.From.Offset = 0
  4145  						pp.From.Index = REG_NONE
  4146  						pp.From.Scale = 0
  4147  						ctxt.Rexflag |= Pw
  4148  						ctxt.AsmBuf.Put2(0x64, // FS
  4149  							0x8B)
  4150  						asmand(ctxt, p, &pp.From, &p.To)
  4151  
  4152  					case obj.Hwindows:
  4153  						// Windows TLS base is always 0x28(GS).
  4154  						pp.From = p.From
  4155  
  4156  						pp.From.Type = obj.TYPE_MEM
  4157  						pp.From.Name = obj.NAME_NONE
  4158  						pp.From.Reg = REG_GS
  4159  						pp.From.Offset = 0x28
  4160  						pp.From.Index = REG_NONE
  4161  						pp.From.Scale = 0
  4162  						ctxt.Rexflag |= Pw
  4163  						ctxt.AsmBuf.Put2(0x65, // GS
  4164  							0x8B)
  4165  						asmand(ctxt, p, &pp.From, &p.To)
  4166  					}
  4167  				}
  4168  				return
  4169  			}
  4170  		}
  4171  	}
  4172  	goto bad
  4173  
  4174  bad:
  4175  	if p.Mode != 64 {
  4176  		/*
  4177  		 * here, the assembly has failed.
  4178  		 * if its a byte instruction that has
  4179  		 * unaddressable registers, try to
  4180  		 * exchange registers and reissue the
  4181  		 * instruction with the operands renamed.
  4182  		 */
  4183  		pp := *p
  4184  
  4185  		unbytereg(&pp.From, &pp.Ft)
  4186  		unbytereg(&pp.To, &pp.Tt)
  4187  
  4188  		z := int(p.From.Reg)
  4189  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4190  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4191  			// For now, different to keep bit-for-bit compatibility.
  4192  			if p.Mode == 32 {
  4193  				breg := byteswapreg(ctxt, &p.To)
  4194  				if breg != REG_AX {
  4195  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4196  					asmando(ctxt, p, &p.From, reg[breg])
  4197  					subreg(&pp, z, breg)
  4198  					doasm(ctxt, &pp)
  4199  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4200  					asmando(ctxt, p, &p.From, reg[breg])
  4201  				} else {
  4202  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4203  					subreg(&pp, z, REG_AX)
  4204  					doasm(ctxt, &pp)
  4205  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4206  				}
  4207  				return
  4208  			}
  4209  
  4210  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4211  				// We certainly don't want to exchange
  4212  				// with AX if the op is MUL or DIV.
  4213  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4214  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4215  				subreg(&pp, z, REG_BX)
  4216  				doasm(ctxt, &pp)
  4217  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4218  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4219  			} else {
  4220  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4221  				subreg(&pp, z, REG_AX)
  4222  				doasm(ctxt, &pp)
  4223  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4224  			}
  4225  			return
  4226  		}
  4227  
  4228  		z = int(p.To.Reg)
  4229  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4230  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4231  			// For now, different to keep bit-for-bit compatibility.
  4232  			if p.Mode == 32 {
  4233  				breg := byteswapreg(ctxt, &p.From)
  4234  				if breg != REG_AX {
  4235  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4236  					asmando(ctxt, p, &p.To, reg[breg])
  4237  					subreg(&pp, z, breg)
  4238  					doasm(ctxt, &pp)
  4239  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4240  					asmando(ctxt, p, &p.To, reg[breg])
  4241  				} else {
  4242  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4243  					subreg(&pp, z, REG_AX)
  4244  					doasm(ctxt, &pp)
  4245  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4246  				}
  4247  				return
  4248  			}
  4249  
  4250  			if isax(&p.From) {
  4251  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4252  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4253  				subreg(&pp, z, REG_BX)
  4254  				doasm(ctxt, &pp)
  4255  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4256  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4257  			} else {
  4258  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4259  				subreg(&pp, z, REG_AX)
  4260  				doasm(ctxt, &pp)
  4261  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4262  			}
  4263  			return
  4264  		}
  4265  	}
  4266  
  4267  	ctxt.Diag("invalid instruction: %v", p)
  4268  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4269  	return
  4270  }
  4271  
  4272  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4273  // which is not referenced in a.
  4274  // If a is empty, it returns BX to account for MULB-like instructions
  4275  // that might use DX and AX.
  4276  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4277  	cand := 1
  4278  	canc := cand
  4279  	canb := canc
  4280  	cana := canb
  4281  
  4282  	if a.Type == obj.TYPE_NONE {
  4283  		cand = 0
  4284  		cana = cand
  4285  	}
  4286  
  4287  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4288  		switch a.Reg {
  4289  		case REG_NONE:
  4290  			cand = 0
  4291  			cana = cand
  4292  
  4293  		case REG_AX, REG_AL, REG_AH:
  4294  			cana = 0
  4295  
  4296  		case REG_BX, REG_BL, REG_BH:
  4297  			canb = 0
  4298  
  4299  		case REG_CX, REG_CL, REG_CH:
  4300  			canc = 0
  4301  
  4302  		case REG_DX, REG_DL, REG_DH:
  4303  			cand = 0
  4304  		}
  4305  	}
  4306  
  4307  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4308  		switch a.Index {
  4309  		case REG_AX:
  4310  			cana = 0
  4311  
  4312  		case REG_BX:
  4313  			canb = 0
  4314  
  4315  		case REG_CX:
  4316  			canc = 0
  4317  
  4318  		case REG_DX:
  4319  			cand = 0
  4320  		}
  4321  	}
  4322  
  4323  	if cana != 0 {
  4324  		return REG_AX
  4325  	}
  4326  	if canb != 0 {
  4327  		return REG_BX
  4328  	}
  4329  	if canc != 0 {
  4330  		return REG_CX
  4331  	}
  4332  	if cand != 0 {
  4333  		return REG_DX
  4334  	}
  4335  
  4336  	ctxt.Diag("impossible byte register")
  4337  	log.Fatalf("bad code")
  4338  	return 0
  4339  }
  4340  
  4341  func isbadbyte(a *obj.Addr) bool {
  4342  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4343  }
  4344  
  4345  var naclret = []uint8{
  4346  	0x5e, // POPL SI
  4347  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4348  	0x83,
  4349  	0xe6,
  4350  	0xe0, // ANDL $~31, SI
  4351  	0x4c,
  4352  	0x01,
  4353  	0xfe, // ADDQ R15, SI
  4354  	0xff,
  4355  	0xe6, // JMP SI
  4356  }
  4357  
  4358  var naclret8 = []uint8{
  4359  	0x5d, // POPL BP
  4360  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4361  	0x83,
  4362  	0xe5,
  4363  	0xe0, // ANDL $~31, BP
  4364  	0xff,
  4365  	0xe5, // JMP BP
  4366  }
  4367  
  4368  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4369  
  4370  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4371  
  4372  var naclmovs = []uint8{
  4373  	0x89,
  4374  	0xf6, // MOVL SI, SI
  4375  	0x49,
  4376  	0x8d,
  4377  	0x34,
  4378  	0x37, // LEAQ (R15)(SI*1), SI
  4379  	0x89,
  4380  	0xff, // MOVL DI, DI
  4381  	0x49,
  4382  	0x8d,
  4383  	0x3c,
  4384  	0x3f, // LEAQ (R15)(DI*1), DI
  4385  }
  4386  
  4387  var naclstos = []uint8{
  4388  	0x89,
  4389  	0xff, // MOVL DI, DI
  4390  	0x49,
  4391  	0x8d,
  4392  	0x3c,
  4393  	0x3f, // LEAQ (R15)(DI*1), DI
  4394  }
  4395  
  4396  func nacltrunc(ctxt *obj.Link, reg int) {
  4397  	if reg >= REG_R8 {
  4398  		ctxt.AsmBuf.Put1(0x45)
  4399  	}
  4400  	reg = (reg - REG_AX) & 7
  4401  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4402  }
  4403  
  4404  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4405  	ctxt.AsmBuf.Reset()
  4406  	ctxt.Asmode = int(p.Mode)
  4407  
  4408  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4409  		switch p.As {
  4410  		case obj.ARET:
  4411  			ctxt.AsmBuf.Put(naclret8)
  4412  			return
  4413  
  4414  		case obj.ACALL,
  4415  			obj.AJMP:
  4416  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4417  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4418  			}
  4419  
  4420  		case AINT:
  4421  			ctxt.AsmBuf.Put1(0xf4)
  4422  			return
  4423  		}
  4424  	}
  4425  
  4426  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4427  		if p.As == AREP {
  4428  			ctxt.Rep++
  4429  			return
  4430  		}
  4431  
  4432  		if p.As == AREPN {
  4433  			ctxt.Repn++
  4434  			return
  4435  		}
  4436  
  4437  		if p.As == ALOCK {
  4438  			ctxt.Lock++
  4439  			return
  4440  		}
  4441  
  4442  		if p.As != ALEAQ && p.As != ALEAL {
  4443  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4444  				nacltrunc(ctxt, int(p.From.Index))
  4445  			}
  4446  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4447  				nacltrunc(ctxt, int(p.To.Index))
  4448  			}
  4449  		}
  4450  
  4451  		switch p.As {
  4452  		case obj.ARET:
  4453  			ctxt.AsmBuf.Put(naclret)
  4454  			return
  4455  
  4456  		case obj.ACALL,
  4457  			obj.AJMP:
  4458  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4459  				// ANDL $~31, reg
  4460  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4461  				// ADDQ R15, reg
  4462  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4463  			}
  4464  
  4465  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4466  				// ANDL $~31, reg
  4467  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4468  				// ADDQ R15, reg
  4469  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4470  			}
  4471  
  4472  		case AINT:
  4473  			ctxt.AsmBuf.Put1(0xf4)
  4474  			return
  4475  
  4476  		case ASCASB,
  4477  			ASCASW,
  4478  			ASCASL,
  4479  			ASCASQ,
  4480  			ASTOSB,
  4481  			ASTOSW,
  4482  			ASTOSL,
  4483  			ASTOSQ:
  4484  			ctxt.AsmBuf.Put(naclstos)
  4485  
  4486  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4487  			ctxt.AsmBuf.Put(naclmovs)
  4488  		}
  4489  
  4490  		if ctxt.Rep != 0 {
  4491  			ctxt.AsmBuf.Put1(0xf3)
  4492  			ctxt.Rep = 0
  4493  		}
  4494  
  4495  		if ctxt.Repn != 0 {
  4496  			ctxt.AsmBuf.Put1(0xf2)
  4497  			ctxt.Repn = 0
  4498  		}
  4499  
  4500  		if ctxt.Lock != 0 {
  4501  			ctxt.AsmBuf.Put1(0xf0)
  4502  			ctxt.Lock = 0
  4503  		}
  4504  	}
  4505  
  4506  	ctxt.Rexflag = 0
  4507  	ctxt.Vexflag = 0
  4508  	mark := ctxt.AsmBuf.Len()
  4509  	ctxt.Asmode = int(p.Mode)
  4510  	doasm(ctxt, p)
  4511  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4512  		/*
  4513  		 * as befits the whole approach of the architecture,
  4514  		 * the rex prefix must appear before the first opcode byte
  4515  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4516  		 * before the 0f opcode escape!), or it might be ignored.
  4517  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4518  		 */
  4519  		if p.Mode != 64 {
  4520  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4521  		}
  4522  		n := ctxt.AsmBuf.Len()
  4523  		var np int
  4524  		for np = mark; np < n; np++ {
  4525  			c := ctxt.AsmBuf.Peek(np)
  4526  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4527  				break
  4528  			}
  4529  		}
  4530  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4531  	}
  4532  
  4533  	n := ctxt.AsmBuf.Len()
  4534  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4535  		r := &ctxt.Cursym.R[i]
  4536  		if int64(r.Off) < p.Pc {
  4537  			break
  4538  		}
  4539  		if ctxt.Rexflag != 0 {
  4540  			r.Off++
  4541  		}
  4542  		if r.Type == obj.R_PCREL {
  4543  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4544  				// PC-relative addressing is relative to the end of the instruction,
  4545  				// but the relocations applied by the linker are relative to the end
  4546  				// of the relocation. Because immediate instruction
  4547  				// arguments can follow the PC-relative memory reference in the
  4548  				// instruction encoding, the two may not coincide. In this case,
  4549  				// adjust addend so that linker can keep relocating relative to the
  4550  				// end of the relocation.
  4551  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4552  			} else if p.Mode == 32 {
  4553  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4554  				// assumes that the previous instruction loaded the PC of the end
  4555  				// of that instruction into CX, so the adjustment is relative to
  4556  				// that.
  4557  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4558  			}
  4559  		}
  4560  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4561  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4562  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4563  		}
  4564  
  4565  	}
  4566  
  4567  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4568  		switch p.To.Reg {
  4569  		case REG_SP:
  4570  			ctxt.AsmBuf.Put(naclspfix)
  4571  		case REG_BP:
  4572  			ctxt.AsmBuf.Put(naclbpfix)
  4573  		}
  4574  	}
  4575  }