github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	FuncAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zvex_i_rm_r
   212  	Zvex_i_r_v
   213  	Zvex_i_rm_v_r
   214  	Zmax
   215  )
   216  
   217  const (
   218  	Px   = 0
   219  	Px1  = 1    // symbolic; exact value doesn't matter
   220  	P32  = 0x32 /* 32-bit only */
   221  	Pe   = 0x66 /* operand escape */
   222  	Pm   = 0x0f /* 2byte opcode escape */
   223  	Pq   = 0xff /* both escapes: 66 0f */
   224  	Pb   = 0xfe /* byte operands */
   225  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   226  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   227  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   228  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   229  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   230  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   231  	Pw   = 0x48 /* Rex.w */
   232  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   233  	Py   = 0x80 /* defaults to 64-bit mode */
   234  	Py1  = 0x81 // symbolic; exact value doesn't matter
   235  	Py3  = 0x83 // symbolic; exact value doesn't matter
   236  	Pvex = 0x84 // symbolic: exact value doesn't matter
   237  
   238  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   239  	Rxr = 1 << 2 /* extend modrm reg */
   240  	Rxx = 1 << 1 /* extend sib index */
   241  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   242  )
   243  
   244  const (
   245  	// Encoding for VEX prefix in tables.
   246  	// The P, L, and W fields are chosen to match
   247  	// their eventual locations in the VEX prefix bytes.
   248  
   249  	// P field - 2 bits
   250  	vex66 = 1 << 0
   251  	vexF3 = 2 << 0
   252  	vexF2 = 3 << 0
   253  	// L field - 1 bit
   254  	vexLZ  = 0 << 2
   255  	vexLIG = 0 << 2
   256  	vex128 = 0 << 2
   257  	vex256 = 1 << 2
   258  	// W field - 1 bit
   259  	vexWIG = 0 << 7
   260  	vexW0  = 0 << 7
   261  	vexW1  = 1 << 7
   262  	// M field - 5 bits, but mostly reserved; we can store up to 4
   263  	vex0F   = 1 << 3
   264  	vex0F38 = 2 << 3
   265  	vex0F3A = 3 << 3
   266  
   267  	// Combinations used in the manual.
   268  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   269  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   270  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   271  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   272  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   273  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   274  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   275  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   276  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   277  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   278  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   279  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   280  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   281  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   282  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   283  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   284  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   285  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   286  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   287  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   288  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   289  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   290  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   291  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   292  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   293  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   294  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   295  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   296  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   297  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   298  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   299  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   300  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   301  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   302  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   303  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   304  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   305  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   306  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   307  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   308  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   309  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   310  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   311  )
   312  
   313  var ycover [Ymax * Ymax]uint8
   314  
   315  var reg [MAXREG]int
   316  
   317  var regrex [MAXREG + 1]int
   318  
   319  var ynone = []ytab{
   320  	{Ynone, Ynone, Ynone, Zlit, 1},
   321  }
   322  
   323  var ytext = []ytab{
   324  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   325  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   326  }
   327  
   328  var ynop = []ytab{
   329  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   330  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   331  	{Ynone, Ynone, Yml, Zpseudo, 0},
   332  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   333  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   334  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   335  	{Yml, Ynone, Ynone, Zpseudo, 0},
   336  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   337  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   338  }
   339  
   340  var yfuncdata = []ytab{
   341  	{Yi32, Ynone, Ym, Zpseudo, 0},
   342  }
   343  
   344  var ypcdata = []ytab{
   345  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   346  }
   347  
   348  var yxorb = []ytab{
   349  	{Yi32, Ynone, Yal, Zib_, 1},
   350  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   351  	{Yrb, Ynone, Ymb, Zr_m, 1},
   352  	{Ymb, Ynone, Yrb, Zm_r, 1},
   353  }
   354  
   355  var yxorl = []ytab{
   356  	{Yi8, Ynone, Yml, Zibo_m, 2},
   357  	{Yi32, Ynone, Yax, Zil_, 1},
   358  	{Yi32, Ynone, Yml, Zilo_m, 2},
   359  	{Yrl, Ynone, Yml, Zr_m, 1},
   360  	{Yml, Ynone, Yrl, Zm_r, 1},
   361  }
   362  
   363  var yaddl = []ytab{
   364  	{Yi8, Ynone, Yml, Zibo_m, 2},
   365  	{Yi32, Ynone, Yax, Zil_, 1},
   366  	{Yi32, Ynone, Yml, Zilo_m, 2},
   367  	{Yrl, Ynone, Yml, Zr_m, 1},
   368  	{Yml, Ynone, Yrl, Zm_r, 1},
   369  }
   370  
   371  var yincb = []ytab{
   372  	{Ynone, Ynone, Ymb, Zo_m, 2},
   373  }
   374  
   375  var yincw = []ytab{
   376  	{Ynone, Ynone, Yml, Zo_m, 2},
   377  }
   378  
   379  var yincl = []ytab{
   380  	{Ynone, Ynone, Yrl, Z_rp, 1},
   381  	{Ynone, Ynone, Yml, Zo_m, 2},
   382  }
   383  
   384  var yincq = []ytab{
   385  	{Ynone, Ynone, Yml, Zo_m, 2},
   386  }
   387  
   388  var ycmpb = []ytab{
   389  	{Yal, Ynone, Yi32, Z_ib, 1},
   390  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   391  	{Ymb, Ynone, Yrb, Zm_r, 1},
   392  	{Yrb, Ynone, Ymb, Zr_m, 1},
   393  }
   394  
   395  var ycmpl = []ytab{
   396  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   397  	{Yax, Ynone, Yi32, Z_il, 1},
   398  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   399  	{Yml, Ynone, Yrl, Zm_r, 1},
   400  	{Yrl, Ynone, Yml, Zr_m, 1},
   401  }
   402  
   403  var yshb = []ytab{
   404  	{Yi1, Ynone, Ymb, Zo_m, 2},
   405  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   406  	{Ycx, Ynone, Ymb, Zo_m, 2},
   407  }
   408  
   409  var yshl = []ytab{
   410  	{Yi1, Ynone, Yml, Zo_m, 2},
   411  	{Yi32, Ynone, Yml, Zibo_m, 2},
   412  	{Ycl, Ynone, Yml, Zo_m, 2},
   413  	{Ycx, Ynone, Yml, Zo_m, 2},
   414  }
   415  
   416  var ytestb = []ytab{
   417  	{Yi32, Ynone, Yal, Zib_, 1},
   418  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   419  	{Yrb, Ynone, Ymb, Zr_m, 1},
   420  	{Ymb, Ynone, Yrb, Zm_r, 1},
   421  }
   422  
   423  var ytestl = []ytab{
   424  	{Yi32, Ynone, Yax, Zil_, 1},
   425  	{Yi32, Ynone, Yml, Zilo_m, 2},
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  }
   429  
   430  var ymovb = []ytab{
   431  	{Yrb, Ynone, Ymb, Zr_m, 1},
   432  	{Ymb, Ynone, Yrb, Zm_r, 1},
   433  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   434  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   435  }
   436  
   437  var ymbs = []ytab{
   438  	{Ymb, Ynone, Ynone, Zm_o, 2},
   439  }
   440  
   441  var ybtl = []ytab{
   442  	{Yi8, Ynone, Yml, Zibo_m, 2},
   443  	{Yrl, Ynone, Yml, Zr_m, 1},
   444  }
   445  
   446  var ymovw = []ytab{
   447  	{Yrl, Ynone, Yml, Zr_m, 1},
   448  	{Yml, Ynone, Yrl, Zm_r, 1},
   449  	{Yi0, Ynone, Yrl, Zclr, 1},
   450  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   451  	{Yi32, Ynone, Yml, Zilo_m, 2},
   452  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   453  }
   454  
   455  var ymovl = []ytab{
   456  	{Yrl, Ynone, Yml, Zr_m, 1},
   457  	{Yml, Ynone, Yrl, Zm_r, 1},
   458  	{Yi0, Ynone, Yrl, Zclr, 1},
   459  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   460  	{Yi32, Ynone, Yml, Zilo_m, 2},
   461  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   462  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   463  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   464  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   465  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   466  }
   467  
   468  var yret = []ytab{
   469  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   470  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   471  }
   472  
   473  var ymovq = []ytab{
   474  	// valid in 32-bit mode
   475  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   476  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   477  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   478  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   479  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   480  
   481  	// valid only in 64-bit mode, usually with 64-bit prefix
   482  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   483  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   484  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   485  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   486  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   487  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   488  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   489  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   490  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   491  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   492  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   493  }
   494  
   495  var ym_rl = []ytab{
   496  	{Ym, Ynone, Yrl, Zm_r, 1},
   497  }
   498  
   499  var yrl_m = []ytab{
   500  	{Yrl, Ynone, Ym, Zr_m, 1},
   501  }
   502  
   503  var ymb_rl = []ytab{
   504  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   505  }
   506  
   507  var yml_rl = []ytab{
   508  	{Yml, Ynone, Yrl, Zm_r, 1},
   509  }
   510  
   511  var yrl_ml = []ytab{
   512  	{Yrl, Ynone, Yml, Zr_m, 1},
   513  }
   514  
   515  var yml_mb = []ytab{
   516  	{Yrb, Ynone, Ymb, Zr_m, 1},
   517  	{Ymb, Ynone, Yrb, Zm_r, 1},
   518  }
   519  
   520  var yrb_mb = []ytab{
   521  	{Yrb, Ynone, Ymb, Zr_m, 1},
   522  }
   523  
   524  var yxchg = []ytab{
   525  	{Yax, Ynone, Yrl, Z_rp, 1},
   526  	{Yrl, Ynone, Yax, Zrp_, 1},
   527  	{Yrl, Ynone, Yml, Zr_m, 1},
   528  	{Yml, Ynone, Yrl, Zm_r, 1},
   529  }
   530  
   531  var ydivl = []ytab{
   532  	{Yml, Ynone, Ynone, Zm_o, 2},
   533  }
   534  
   535  var ydivb = []ytab{
   536  	{Ymb, Ynone, Ynone, Zm_o, 2},
   537  }
   538  
   539  var yimul = []ytab{
   540  	{Yml, Ynone, Ynone, Zm_o, 2},
   541  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   542  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   543  	{Yml, Ynone, Yrl, Zm_r, 2},
   544  }
   545  
   546  var yimul3 = []ytab{
   547  	{Yi8, Yml, Yrl, Zibm_r, 2},
   548  }
   549  
   550  var ybyte = []ytab{
   551  	{Yi64, Ynone, Ynone, Zbyte, 1},
   552  }
   553  
   554  var yin = []ytab{
   555  	{Yi32, Ynone, Ynone, Zib_, 1},
   556  	{Ynone, Ynone, Ynone, Zlit, 1},
   557  }
   558  
   559  var yint = []ytab{
   560  	{Yi32, Ynone, Ynone, Zib_, 1},
   561  }
   562  
   563  var ypushl = []ytab{
   564  	{Yrl, Ynone, Ynone, Zrp_, 1},
   565  	{Ym, Ynone, Ynone, Zm_o, 2},
   566  	{Yi8, Ynone, Ynone, Zib_, 1},
   567  	{Yi32, Ynone, Ynone, Zil_, 1},
   568  }
   569  
   570  var ypopl = []ytab{
   571  	{Ynone, Ynone, Yrl, Z_rp, 1},
   572  	{Ynone, Ynone, Ym, Zo_m, 2},
   573  }
   574  
   575  var ybswap = []ytab{
   576  	{Ynone, Ynone, Yrl, Z_rp, 2},
   577  }
   578  
   579  var yscond = []ytab{
   580  	{Ynone, Ynone, Ymb, Zo_m, 2},
   581  }
   582  
   583  var yjcond = []ytab{
   584  	{Ynone, Ynone, Ybr, Zbr, 0},
   585  	{Yi0, Ynone, Ybr, Zbr, 0},
   586  	{Yi1, Ynone, Ybr, Zbr, 1},
   587  }
   588  
   589  var yloop = []ytab{
   590  	{Ynone, Ynone, Ybr, Zloop, 1},
   591  }
   592  
   593  var ycall = []ytab{
   594  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   595  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   596  	{Ynone, Ynone, Yindir, Zcallind, 2},
   597  	{Ynone, Ynone, Ybr, Zcall, 0},
   598  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   599  }
   600  
   601  var yduff = []ytab{
   602  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   603  }
   604  
   605  var yjmp = []ytab{
   606  	{Ynone, Ynone, Yml, Zo_m64, 2},
   607  	{Ynone, Ynone, Ybr, Zjmp, 0},
   608  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   609  }
   610  
   611  var yfmvd = []ytab{
   612  	{Ym, Ynone, Yf0, Zm_o, 2},
   613  	{Yf0, Ynone, Ym, Zo_m, 2},
   614  	{Yrf, Ynone, Yf0, Zm_o, 2},
   615  	{Yf0, Ynone, Yrf, Zo_m, 2},
   616  }
   617  
   618  var yfmvdp = []ytab{
   619  	{Yf0, Ynone, Ym, Zo_m, 2},
   620  	{Yf0, Ynone, Yrf, Zo_m, 2},
   621  }
   622  
   623  var yfmvf = []ytab{
   624  	{Ym, Ynone, Yf0, Zm_o, 2},
   625  	{Yf0, Ynone, Ym, Zo_m, 2},
   626  }
   627  
   628  var yfmvx = []ytab{
   629  	{Ym, Ynone, Yf0, Zm_o, 2},
   630  }
   631  
   632  var yfmvp = []ytab{
   633  	{Yf0, Ynone, Ym, Zo_m, 2},
   634  }
   635  
   636  var yfcmv = []ytab{
   637  	{Yrf, Ynone, Yf0, Zm_o, 2},
   638  }
   639  
   640  var yfadd = []ytab{
   641  	{Ym, Ynone, Yf0, Zm_o, 2},
   642  	{Yrf, Ynone, Yf0, Zm_o, 2},
   643  	{Yf0, Ynone, Yrf, Zo_m, 2},
   644  }
   645  
   646  var yfaddp = []ytab{
   647  	{Yf0, Ynone, Yrf, Zo_m, 2},
   648  }
   649  
   650  var yfxch = []ytab{
   651  	{Yf0, Ynone, Yrf, Zo_m, 2},
   652  	{Yrf, Ynone, Yf0, Zm_o, 2},
   653  }
   654  
   655  var ycompp = []ytab{
   656  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   657  }
   658  
   659  var ystsw = []ytab{
   660  	{Ynone, Ynone, Ym, Zo_m, 2},
   661  	{Ynone, Ynone, Yax, Zlit, 1},
   662  }
   663  
   664  var ystcw = []ytab{
   665  	{Ynone, Ynone, Ym, Zo_m, 2},
   666  	{Ym, Ynone, Ynone, Zm_o, 2},
   667  }
   668  
   669  var ysvrs = []ytab{
   670  	{Ynone, Ynone, Ym, Zo_m, 2},
   671  	{Ym, Ynone, Ynone, Zm_o, 2},
   672  }
   673  
   674  var ymm = []ytab{
   675  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   676  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   677  }
   678  
   679  var yxm = []ytab{
   680  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   681  }
   682  
   683  var yxm_q4 = []ytab{
   684  	{Yxm, Ynone, Yxr, Zm_r, 1},
   685  }
   686  
   687  var yxcvm1 = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   689  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   690  }
   691  
   692  var yxcvm2 = []ytab{
   693  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   694  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   695  }
   696  
   697  /*
   698  var yxmq = []ytab{
   699  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   700  }
   701  */
   702  
   703  var yxr = []ytab{
   704  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   705  }
   706  
   707  var yxr_ml = []ytab{
   708  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   709  }
   710  
   711  var ymr = []ytab{
   712  	{Ymr, Ynone, Ymr, Zm_r, 1},
   713  }
   714  
   715  var ymr_ml = []ytab{
   716  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   717  }
   718  
   719  var yxcmp = []ytab{
   720  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   721  }
   722  
   723  var yxcmpi = []ytab{
   724  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   725  }
   726  
   727  var yxmov = []ytab{
   728  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   729  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   730  }
   731  
   732  var yxcvfl = []ytab{
   733  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   734  }
   735  
   736  var yxcvlf = []ytab{
   737  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   738  }
   739  
   740  var yxcvfq = []ytab{
   741  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   742  }
   743  
   744  var yxcvqf = []ytab{
   745  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   746  }
   747  
   748  var yps = []ytab{
   749  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   750  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   751  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   752  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   753  }
   754  
   755  var yxrrl = []ytab{
   756  	{Yxr, Ynone, Yrl, Zm_r, 1},
   757  }
   758  
   759  var ymrxr = []ytab{
   760  	{Ymr, Ynone, Yxr, Zm_r, 1},
   761  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   762  }
   763  
   764  var ymshuf = []ytab{
   765  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   766  }
   767  
   768  var ymshufb = []ytab{
   769  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   770  }
   771  
   772  var yxshuf = []ytab{
   773  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   774  }
   775  
   776  var yextrw = []ytab{
   777  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   778  }
   779  
   780  var yextr = []ytab{
   781  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   782  }
   783  
   784  var yinsrw = []ytab{
   785  	{Yu8, Yml, Yxr, Zibm_r, 2},
   786  }
   787  
   788  var yinsr = []ytab{
   789  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   790  }
   791  
   792  var ypsdq = []ytab{
   793  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   794  }
   795  
   796  var ymskb = []ytab{
   797  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   798  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   799  }
   800  
   801  var ycrc32l = []ytab{
   802  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   803  }
   804  
   805  var yprefetch = []ytab{
   806  	{Ym, Ynone, Ynone, Zm_o, 2},
   807  }
   808  
   809  var yaes = []ytab{
   810  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   811  }
   812  
   813  var yaes2 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   815  }
   816  
   817  var yxbegin = []ytab{
   818  	{Ynone, Ynone, Ybr, Zjmp, 1},
   819  }
   820  
   821  var yxabort = []ytab{
   822  	{Yu8, Ynone, Ynone, Zib_, 1},
   823  }
   824  
   825  var ylddqu = []ytab{
   826  	{Ym, Ynone, Yxr, Zm_r, 1},
   827  }
   828  
   829  // VEX instructions that come in two forms:
   830  //	VTHING xmm2/m128, xmmV, xmm1
   831  //	VTHING ymm2/m256, ymmV, ymm1
   832  // The opcode array in the corresponding Optab entry
   833  // should contain the (VEX prefixes, opcode byte) pair
   834  // for each of the two forms.
   835  // For example, the entries for VPXOR are:
   836  //
   837  //	VPXOR xmm2/m128, xmmV, xmm1
   838  //	VEX.NDS.128.66.0F.WIG EF /r
   839  //
   840  //	VPXOR ymm2/m256, ymmV, ymm1
   841  //	VEX.NDS.256.66.0F.WIG EF /r
   842  //
   843  // The NDS/NDD/DDS part can be dropped, producing this
   844  // Optab entry:
   845  //
   846  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   847  //
   848  var yvex_xy3 = []ytab{
   849  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_ri3 = []ytab{
   854  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   855  }
   856  
   857  var yvex_xyi3 = []ytab{
   858  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   859  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   860  }
   861  
   862  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   863  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   864  }
   865  
   866  var yvex_xyi4 = []ytab{
   867  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   868  }
   869  
   870  var yvex_shift = []ytab{
   871  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   872  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   873  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   874  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   875  }
   876  
   877  var yvex_shift_dq = []ytab{
   878  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   879  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   880  }
   881  
   882  var yvex_r3 = []ytab{
   883  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   884  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   885  }
   886  
   887  var yvex_vmr3 = []ytab{
   888  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   889  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   890  }
   891  
   892  var yvex_xy2 = []ytab{
   893  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   894  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   895  }
   896  
   897  var yvex_xyr2 = []ytab{
   898  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   899  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   900  }
   901  
   902  var yvex_vmovdqa = []ytab{
   903  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   904  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   905  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   906  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   907  }
   908  
   909  var yvex_vmovntdq = []ytab{
   910  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   911  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   912  }
   913  
   914  var yvex_vpbroadcast = []ytab{
   915  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   916  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   917  }
   918  
   919  var ymmxmm0f38 = []ytab{
   920  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   921  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   922  }
   923  
   924  /*
   925   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   926   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   927   * the entry with the given p->as and then looks through the ytable for that
   928   * instruction (the second field in the optab struct) for a line whose first
   929   * two values match the Ytypes of the p->from and p->to operands.  The function
   930   * oclass in span.c computes the specific Ytype of an operand and then the set
   931   * of more general Ytypes that it satisfies is implied by the ycover table, set
   932   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   933   * from the more general 8-bit constants, but instinit says
   934   *
   935   *        ycover[Yi0*Ymax + Ys32] = 1;
   936   *        ycover[Yi1*Ymax + Ys32] = 1;
   937   *        ycover[Yi8*Ymax + Ys32] = 1;
   938   *
   939   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   940   * if that's what an instruction can handle.
   941   *
   942   * In parallel with the scan through the ytable for the appropriate line, there
   943   * is a z pointer that starts out pointing at the strange magic byte list in
   944   * the Optab struct.  With each step past a non-matching ytable line, z
   945   * advances by the 4th entry in the line.  When a matching line is found, that
   946   * z pointer has the extra data to use in laying down the instruction bytes.
   947   * The actual bytes laid down are a function of the 3rd entry in the line (that
   948   * is, the Ztype) and the z bytes.
   949   *
   950   * For example, let's look at AADDL.  The optab line says:
   951   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   952   *
   953   * and yaddl says
   954   *        uchar   yaddl[] =
   955   *        {
   956   *                Yi8,    Yml,    Zibo_m, 2,
   957   *                Yi32,   Yax,    Zil_,   1,
   958   *                Yi32,   Yml,    Zilo_m, 2,
   959   *                Yrl,    Yml,    Zr_m,   1,
   960   *                Yml,    Yrl,    Zm_r,   1,
   961   *                0
   962   *        };
   963   *
   964   * so there are 5 possible types of ADDL instruction that can be laid down, and
   965   * possible states used to lay them down (Ztype and z pointer, assuming z
   966   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   967   *
   968   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   969   *        Yi32, Yax -> Zil_, z+2 (0x05)
   970   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   971   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   972   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   973   *
   974   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   975   * relatively straightforward as this program goes.
   976   *
   977   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   978   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   979   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   980   * Zilo_m is the same but a long (32-bit) immediate.
   981   */
   982  var optab =
   983  /*	as, ytab, andproto, opcode */
   984  []Optab{
   985  	{obj.AXXX, nil, 0, [23]uint8{}},
   986  	{AAAA, ynone, P32, [23]uint8{0x37}},
   987  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   988  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   989  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   990  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   991  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   992  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   993  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   994  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   995  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   996  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   997  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   998  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   999  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
  1000  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
  1001  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1002  	{AADJSP, nil, 0, [23]uint8{}},
  1003  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
  1004  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1005  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
  1006  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
  1007  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
  1008  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
  1009  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1010  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1011  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
  1012  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
  1013  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
  1014  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
  1015  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
  1016  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
  1017  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
  1018  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
  1019  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
  1020  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
  1021  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
  1022  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1023  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1024  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1025  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1026  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1027  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1028  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1029  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1030  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1031  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1032  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1033  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1034  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1035  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1036  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1037  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1038  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1039  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1040  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1041  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1042  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1043  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1044  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1045  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1046  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1047  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1048  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1049  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1050  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1051  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1052  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1053  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1054  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1055  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1056  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1057  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1058  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1059  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1060  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1061  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1062  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1063  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1064  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1065  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1066  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1067  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1068  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1069  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1070  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1071  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1072  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1073  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1074  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1075  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1076  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1077  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1078  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1079  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1080  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1081  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1082  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1083  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1084  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1085  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1086  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1087  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1088  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1089  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1090  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1091  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1092  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1093  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1094  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1095  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1096  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1097  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1098  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1099  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1100  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1101  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1102  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
  1103  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
  1104  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1105  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1106  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1107  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1108  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1109  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1110  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1111  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1112  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1113  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1114  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1115  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1116  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1117  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1118  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1119  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1120  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1121  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1122  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1123  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1124  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1125  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1126  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1127  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1128  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1129  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1130  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1131  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
  1132  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1133  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1134  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
  1135  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1136  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1137  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1138  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1139  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1140  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1141  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1142  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1143  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1144  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1145  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1146  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1147  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1148  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1149  	{obj.AGLOBL, nil, 0, [23]uint8{}},
  1150  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1151  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1152  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1153  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1154  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1155  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1156  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1157  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1158  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1159  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1160  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1161  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1162  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1163  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1164  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1165  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1166  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1167  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1168  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1169  	{AINT, yint, Px, [23]uint8{0xcd}},
  1170  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1171  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1172  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1173  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1174  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1175  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1176  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1177  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1178  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1179  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1180  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1181  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1182  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1183  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1184  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1185  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1186  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1187  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1188  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1189  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1190  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1191  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1192  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1193  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1194  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1195  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1196  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1197  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1198  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1199  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1200  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1201  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1202  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1203  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1204  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1205  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1206  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1207  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1208  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1209  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1210  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1211  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1212  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1213  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1214  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1215  	{ALONG, ybyte, Px, [23]uint8{4}},
  1216  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1217  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1218  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1219  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1220  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1221  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1222  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1223  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1224  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1225  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1226  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1227  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1228  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1229  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1230  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1231  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1232  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1233  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1234  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1235  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1236  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1237  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1238  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1239  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1240  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1241  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1242  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1243  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1244  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1245  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1246  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1247  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1248  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1249  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1250  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1251  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1252  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1253  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1254  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1255  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1256  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1257  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1258  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1259  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1260  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1261  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1262  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1263  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1264  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1265  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1266  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1267  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1268  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1269  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1270  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1271  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1272  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1273  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1274  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1275  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1276  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1277  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1278  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1279  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1280  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1281  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1282  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1283  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1284  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1285  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1286  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1287  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1288  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1289  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1290  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1291  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1292  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1293  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1294  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1295  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1296  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1297  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1298  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1299  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1300  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1301  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1302  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1303  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1304  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1305  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1306  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1307  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1308  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1309  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1310  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1311  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1312  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1313  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1314  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1315  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1316  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1317  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1318  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1319  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1320  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1321  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1322  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1323  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1324  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1325  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1326  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1327  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1328  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1329  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1330  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1331  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1332  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1333  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1334  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1335  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1336  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1337  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1338  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1339  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1340  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1341  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1342  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1343  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1344  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1345  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1346  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1347  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1348  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1349  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1350  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1351  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1352  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1353  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1354  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1355  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1356  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1357  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1358  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1359  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1360  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1361  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1362  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1363  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1364  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1365  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1366  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1367  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1368  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1369  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1370  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1371  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1372  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1373  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1374  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1375  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1376  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1377  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1378  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1379  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1380  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1381  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1382  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1383  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1384  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1385  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1386  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1387  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1388  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1389  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1390  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1391  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1392  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1393  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1394  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1395  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1396  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1397  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1398  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1399  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1400  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1401  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1402  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1403  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1404  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1405  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1406  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1407  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1408  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1409  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1410  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1411  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1412  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1413  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1414  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1415  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1416  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1417  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1418  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1419  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1420  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1421  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1422  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1423  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1424  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1425  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1426  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1427  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1428  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1429  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1430  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1431  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1432  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1433  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1434  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1435  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1436  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1437  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1438  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1439  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1440  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1441  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1442  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1443  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1444  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1445  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1446  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1447  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1448  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1449  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1450  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1451  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1452  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1453  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1454  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1455  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1456  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1457  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1458  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1459  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1460  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1461  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1462  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1463  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1464  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1465  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1466  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1467  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1468  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1469  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1470  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1471  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1472  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1473  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1474  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1475  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1476  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1477  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1478  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1479  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1480  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1481  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1482  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1483  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1484  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1485  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1486  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1487  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1488  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1489  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1490  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1491  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1492  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1493  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1494  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1495  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1496  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1497  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1498  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1500  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1501  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1502  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1503  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1504  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1505  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1506  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1507  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1508  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1509  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1510  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1511  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1512  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1513  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1514  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1515  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1516  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1517  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1518  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1519  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1520  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1521  	{AWORD, ybyte, Px, [23]uint8{2}},
  1522  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1523  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1525  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1526  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1527  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1528  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1530  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1531  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1532  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1533  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1534  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1535  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1536  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1537  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1538  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1539  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1540  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1541  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1542  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1543  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1544  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1545  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1546  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1547  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1548  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1549  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1550  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1551  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1552  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1553  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1554  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1555  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1556  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1557  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1558  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1559  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1560  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1561  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1562  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1563  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1564  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1565  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1566  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1567  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1568  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1569  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1570  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1571  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1572  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1573  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1574  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1575  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1576  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1577  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1578  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1579  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1580  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1581  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1582  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1583  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1584  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1585  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1586  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1587  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1588  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1589  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1590  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1591  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1592  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1593  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1594  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1595  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1596  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1597  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1598  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1599  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1600  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1601  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1602  	{AFFREE, nil, 0, [23]uint8{}},
  1603  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1604  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1605  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1606  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1607  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1608  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1609  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1610  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1611  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1612  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1613  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1614  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1615  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1616  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1617  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1618  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1619  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1620  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1621  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1622  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1623  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1624  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1625  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1626  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1627  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1628  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1629  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1630  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1631  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1632  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1633  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1634  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1635  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1636  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1637  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1638  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1639  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1640  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1641  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1642  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1643  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1644  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1645  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1646  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1647  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1648  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1649  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1650  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1651  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1652  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1653  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1654  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1655  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1656  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1657  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1658  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1659  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1660  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1661  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1662  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1663  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1664  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1665  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1666  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1667  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1668  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1669  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1670  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1671  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1672  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1673  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1674  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1675  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1676  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1677  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1678  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1679  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1680  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1681  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1682  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1683  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1684  
  1685  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1686  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1687  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1688  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1689  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1690  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1691  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1692  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1693  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1694  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1695  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1696  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1697  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1698  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1699  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1700  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1701  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1702  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1703  
  1704  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1705  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1706  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1707  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1708  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1709  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1710  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1711  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1712  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1713  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1714  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1715  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1716  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1717  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1718  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1719  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1720  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1721  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1722  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1723  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1724  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1725  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1726  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1727  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1728  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1729  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1730  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1731  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1732  
  1733  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1734  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1735  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1736  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1737  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1738  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1739  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1740  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1741  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1742  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1743  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1744  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1745  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1746  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1747  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1748  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1749  	{obj.AEND, nil, 0, [23]uint8{}},
  1750  	{0, nil, 0, [23]uint8{}},
  1751  }
  1752  
  1753  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1754  
  1755  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1756  // This happens on systems like Solaris that call .so functions instead of system calls.
  1757  // It does not seem to be necessary for any other systems. This is probably working
  1758  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1759  // what that bug is. And this does fix it.
  1760  func isextern(s *obj.LSym) bool {
  1761  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1762  	return strings.HasPrefix(s.Name, "libc_")
  1763  }
  1764  
  1765  // single-instruction no-ops of various lengths.
  1766  // constructed by hand and disassembled with gdb to verify.
  1767  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1768  var nop = [][16]uint8{
  1769  	{0x90},
  1770  	{0x66, 0x90},
  1771  	{0x0F, 0x1F, 0x00},
  1772  	{0x0F, 0x1F, 0x40, 0x00},
  1773  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1774  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1775  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1776  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1777  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1778  }
  1779  
  1780  // Native Client rejects the repeated 0x66 prefix.
  1781  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1782  func fillnop(p []byte, n int) {
  1783  	var m int
  1784  
  1785  	for n > 0 {
  1786  		m = n
  1787  		if m > len(nop) {
  1788  			m = len(nop)
  1789  		}
  1790  		copy(p[:m], nop[m-1][:m])
  1791  		p = p[m:]
  1792  		n -= m
  1793  	}
  1794  }
  1795  
  1796  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1797  	s.Grow(int64(c) + int64(pad))
  1798  	fillnop(s.P[c:], int(pad))
  1799  	return c + pad
  1800  }
  1801  
  1802  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1803  	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
  1804  		return l
  1805  	}
  1806  	return q
  1807  }
  1808  
  1809  func span6(ctxt *obj.Link, s *obj.LSym) {
  1810  	ctxt.Cursym = s
  1811  
  1812  	if s.P != nil {
  1813  		return
  1814  	}
  1815  
  1816  	if ycover[0] == 0 {
  1817  		instinit()
  1818  	}
  1819  
  1820  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1821  		if p.To.Type == obj.TYPE_BRANCH {
  1822  			if p.Pcond == nil {
  1823  				p.Pcond = p
  1824  			}
  1825  		}
  1826  		if p.As == AADJSP {
  1827  			p.To.Type = obj.TYPE_REG
  1828  			p.To.Reg = REG_SP
  1829  			v := int32(-p.From.Offset)
  1830  			p.From.Offset = int64(v)
  1831  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1832  			if v < 0 {
  1833  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1834  				v = -v
  1835  				p.From.Offset = int64(v)
  1836  			}
  1837  
  1838  			if v == 0 {
  1839  				p.As = obj.ANOP
  1840  			}
  1841  		}
  1842  	}
  1843  
  1844  	var q *obj.Prog
  1845  	var count int64 // rough count of number of instructions
  1846  	for p := s.Text; p != nil; p = p.Link {
  1847  		count++
  1848  		p.Back = 2 // use short branches first time through
  1849  		q = p.Pcond
  1850  		if q != nil && (q.Back&2 != 0) {
  1851  			p.Back |= 1 // backward jump
  1852  			q.Back |= 4 // loop head
  1853  		}
  1854  
  1855  		if p.As == AADJSP {
  1856  			p.To.Type = obj.TYPE_REG
  1857  			p.To.Reg = REG_SP
  1858  			v := int32(-p.From.Offset)
  1859  			p.From.Offset = int64(v)
  1860  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1861  			if v < 0 {
  1862  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1863  				v = -v
  1864  				p.From.Offset = int64(v)
  1865  			}
  1866  
  1867  			if v == 0 {
  1868  				p.As = obj.ANOP
  1869  			}
  1870  		}
  1871  	}
  1872  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1873  
  1874  	n := 0
  1875  	var c int32
  1876  	errors := ctxt.Errors
  1877  	var deferreturn *obj.LSym
  1878  	if ctxt.Headtype == obj.Hnacl {
  1879  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1880  	}
  1881  	for {
  1882  		loop := int32(0)
  1883  		for i := range s.R {
  1884  			s.R[i] = obj.Reloc{}
  1885  		}
  1886  		s.R = s.R[:0]
  1887  		s.P = s.P[:0]
  1888  		c = 0
  1889  		for p := s.Text; p != nil; p = p.Link {
  1890  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1891  
  1892  				// pad everything to avoid crossing 32-byte boundary
  1893  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1894  					c = naclpad(ctxt, s, c, -c&31)
  1895  				}
  1896  
  1897  				// pad call deferreturn to start at 32-byte boundary
  1898  				// so that subtracting 5 in jmpdefer will jump back
  1899  				// to that boundary and rerun the call.
  1900  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1901  					c = naclpad(ctxt, s, c, -c&31)
  1902  				}
  1903  
  1904  				// pad call to end at 32-byte boundary
  1905  				if p.As == obj.ACALL {
  1906  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1907  				}
  1908  
  1909  				// the linker treats REP and STOSQ as different instructions
  1910  				// but in fact the REP is a prefix on the STOSQ.
  1911  				// make sure REP has room for 2 more bytes, so that
  1912  				// padding will not be inserted before the next instruction.
  1913  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1914  					c = naclpad(ctxt, s, c, -c&31)
  1915  				}
  1916  
  1917  				// same for LOCK.
  1918  				// various instructions follow; the longest is 4 bytes.
  1919  				// give ourselves 8 bytes so as to avoid surprises.
  1920  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1921  					c = naclpad(ctxt, s, c, -c&31)
  1922  				}
  1923  			}
  1924  
  1925  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1926  				// pad with NOPs
  1927  				v := -c & (LoopAlign - 1)
  1928  
  1929  				if v <= MaxLoopPad {
  1930  					s.Grow(int64(c) + int64(v))
  1931  					fillnop(s.P[c:], int(v))
  1932  					c += v
  1933  				}
  1934  			}
  1935  
  1936  			p.Pc = int64(c)
  1937  
  1938  			// process forward jumps to p
  1939  			for q = p.Rel; q != nil; q = q.Forwd {
  1940  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1941  				if q.Back&2 != 0 { // short
  1942  					if v > 127 {
  1943  						loop++
  1944  						q.Back ^= 2
  1945  					}
  1946  
  1947  					if q.As == AJCXZL || q.As == AXBEGIN {
  1948  						s.P[q.Pc+2] = byte(v)
  1949  					} else {
  1950  						s.P[q.Pc+1] = byte(v)
  1951  					}
  1952  				} else {
  1953  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1954  				}
  1955  			}
  1956  
  1957  			p.Rel = nil
  1958  
  1959  			p.Pc = int64(c)
  1960  			asmins(ctxt, p)
  1961  			m := ctxt.AsmBuf.Len()
  1962  			if int(p.Isize) != m {
  1963  				p.Isize = uint8(m)
  1964  				loop++
  1965  			}
  1966  
  1967  			s.Grow(p.Pc + int64(m))
  1968  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1969  			c += int32(m)
  1970  		}
  1971  
  1972  		n++
  1973  		if n > 20 {
  1974  			ctxt.Diag("span must be looping")
  1975  			log.Fatalf("loop")
  1976  		}
  1977  		if loop == 0 {
  1978  			break
  1979  		}
  1980  		if ctxt.Errors > errors {
  1981  			return
  1982  		}
  1983  	}
  1984  
  1985  	if ctxt.Headtype == obj.Hnacl {
  1986  		c = naclpad(ctxt, s, c, -c&31)
  1987  	}
  1988  
  1989  	s.Size = int64(c)
  1990  
  1991  	if false { /* debug['a'] > 1 */
  1992  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1993  		var i int
  1994  		for i = 0; i < len(s.P); i++ {
  1995  			fmt.Printf(" %.2x", s.P[i])
  1996  			if i%16 == 15 {
  1997  				fmt.Printf("\n  %.6x", uint(i+1))
  1998  			}
  1999  		}
  2000  
  2001  		if i%16 != 0 {
  2002  			fmt.Printf("\n")
  2003  		}
  2004  
  2005  		for i := 0; i < len(s.R); i++ {
  2006  			r := &s.R[i]
  2007  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2008  		}
  2009  	}
  2010  }
  2011  
  2012  func instinit() {
  2013  	for i := 1; optab[i].as != 0; i++ {
  2014  		c := optab[i].as
  2015  		if opindex[c&obj.AMask] != nil {
  2016  			log.Fatalf("phase error in optab: %d (%v)", i, c)
  2017  		}
  2018  		opindex[c&obj.AMask] = &optab[i]
  2019  	}
  2020  
  2021  	for i := 0; i < Ymax; i++ {
  2022  		ycover[i*Ymax+i] = 1
  2023  	}
  2024  
  2025  	ycover[Yi0*Ymax+Yi8] = 1
  2026  	ycover[Yi1*Ymax+Yi8] = 1
  2027  	ycover[Yu7*Ymax+Yi8] = 1
  2028  
  2029  	ycover[Yi0*Ymax+Yu7] = 1
  2030  	ycover[Yi1*Ymax+Yu7] = 1
  2031  
  2032  	ycover[Yi0*Ymax+Yu8] = 1
  2033  	ycover[Yi1*Ymax+Yu8] = 1
  2034  	ycover[Yu7*Ymax+Yu8] = 1
  2035  
  2036  	ycover[Yi0*Ymax+Ys32] = 1
  2037  	ycover[Yi1*Ymax+Ys32] = 1
  2038  	ycover[Yu7*Ymax+Ys32] = 1
  2039  	ycover[Yu8*Ymax+Ys32] = 1
  2040  	ycover[Yi8*Ymax+Ys32] = 1
  2041  
  2042  	ycover[Yi0*Ymax+Yi32] = 1
  2043  	ycover[Yi1*Ymax+Yi32] = 1
  2044  	ycover[Yu7*Ymax+Yi32] = 1
  2045  	ycover[Yu8*Ymax+Yi32] = 1
  2046  	ycover[Yi8*Ymax+Yi32] = 1
  2047  	ycover[Ys32*Ymax+Yi32] = 1
  2048  
  2049  	ycover[Yi0*Ymax+Yi64] = 1
  2050  	ycover[Yi1*Ymax+Yi64] = 1
  2051  	ycover[Yu7*Ymax+Yi64] = 1
  2052  	ycover[Yu8*Ymax+Yi64] = 1
  2053  	ycover[Yi8*Ymax+Yi64] = 1
  2054  	ycover[Ys32*Ymax+Yi64] = 1
  2055  	ycover[Yi32*Ymax+Yi64] = 1
  2056  
  2057  	ycover[Yal*Ymax+Yrb] = 1
  2058  	ycover[Ycl*Ymax+Yrb] = 1
  2059  	ycover[Yax*Ymax+Yrb] = 1
  2060  	ycover[Ycx*Ymax+Yrb] = 1
  2061  	ycover[Yrx*Ymax+Yrb] = 1
  2062  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2063  
  2064  	ycover[Ycl*Ymax+Ycx] = 1
  2065  
  2066  	ycover[Yax*Ymax+Yrx] = 1
  2067  	ycover[Ycx*Ymax+Yrx] = 1
  2068  
  2069  	ycover[Yax*Ymax+Yrl] = 1
  2070  	ycover[Ycx*Ymax+Yrl] = 1
  2071  	ycover[Yrx*Ymax+Yrl] = 1
  2072  	ycover[Yrl32*Ymax+Yrl] = 1
  2073  
  2074  	ycover[Yf0*Ymax+Yrf] = 1
  2075  
  2076  	ycover[Yal*Ymax+Ymb] = 1
  2077  	ycover[Ycl*Ymax+Ymb] = 1
  2078  	ycover[Yax*Ymax+Ymb] = 1
  2079  	ycover[Ycx*Ymax+Ymb] = 1
  2080  	ycover[Yrx*Ymax+Ymb] = 1
  2081  	ycover[Yrb*Ymax+Ymb] = 1
  2082  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2083  	ycover[Ym*Ymax+Ymb] = 1
  2084  
  2085  	ycover[Yax*Ymax+Yml] = 1
  2086  	ycover[Ycx*Ymax+Yml] = 1
  2087  	ycover[Yrx*Ymax+Yml] = 1
  2088  	ycover[Yrl*Ymax+Yml] = 1
  2089  	ycover[Yrl32*Ymax+Yml] = 1
  2090  	ycover[Ym*Ymax+Yml] = 1
  2091  
  2092  	ycover[Yax*Ymax+Ymm] = 1
  2093  	ycover[Ycx*Ymax+Ymm] = 1
  2094  	ycover[Yrx*Ymax+Ymm] = 1
  2095  	ycover[Yrl*Ymax+Ymm] = 1
  2096  	ycover[Yrl32*Ymax+Ymm] = 1
  2097  	ycover[Ym*Ymax+Ymm] = 1
  2098  	ycover[Ymr*Ymax+Ymm] = 1
  2099  
  2100  	ycover[Ym*Ymax+Yxm] = 1
  2101  	ycover[Yxr*Ymax+Yxm] = 1
  2102  
  2103  	ycover[Ym*Ymax+Yym] = 1
  2104  	ycover[Yyr*Ymax+Yym] = 1
  2105  
  2106  	for i := 0; i < MAXREG; i++ {
  2107  		reg[i] = -1
  2108  		if i >= REG_AL && i <= REG_R15B {
  2109  			reg[i] = (i - REG_AL) & 7
  2110  			if i >= REG_SPB && i <= REG_DIB {
  2111  				regrex[i] = 0x40
  2112  			}
  2113  			if i >= REG_R8B && i <= REG_R15B {
  2114  				regrex[i] = Rxr | Rxx | Rxb
  2115  			}
  2116  		}
  2117  
  2118  		if i >= REG_AH && i <= REG_BH {
  2119  			reg[i] = 4 + ((i - REG_AH) & 7)
  2120  		}
  2121  		if i >= REG_AX && i <= REG_R15 {
  2122  			reg[i] = (i - REG_AX) & 7
  2123  			if i >= REG_R8 {
  2124  				regrex[i] = Rxr | Rxx | Rxb
  2125  			}
  2126  		}
  2127  
  2128  		if i >= REG_F0 && i <= REG_F0+7 {
  2129  			reg[i] = (i - REG_F0) & 7
  2130  		}
  2131  		if i >= REG_M0 && i <= REG_M0+7 {
  2132  			reg[i] = (i - REG_M0) & 7
  2133  		}
  2134  		if i >= REG_X0 && i <= REG_X0+15 {
  2135  			reg[i] = (i - REG_X0) & 7
  2136  			if i >= REG_X0+8 {
  2137  				regrex[i] = Rxr | Rxx | Rxb
  2138  			}
  2139  		}
  2140  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2141  			reg[i] = (i - REG_Y0) & 7
  2142  			if i >= REG_Y0+8 {
  2143  				regrex[i] = Rxr | Rxx | Rxb
  2144  			}
  2145  		}
  2146  
  2147  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2148  			regrex[i] = Rxr
  2149  		}
  2150  	}
  2151  }
  2152  
  2153  var isAndroid = (obj.Getgoos() == "android")
  2154  
  2155  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2156  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2157  		return 0
  2158  	}
  2159  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2160  		switch a.Reg {
  2161  		case REG_CS:
  2162  			return 0x2e
  2163  
  2164  		case REG_DS:
  2165  			return 0x3e
  2166  
  2167  		case REG_ES:
  2168  			return 0x26
  2169  
  2170  		case REG_FS:
  2171  			return 0x64
  2172  
  2173  		case REG_GS:
  2174  			return 0x65
  2175  
  2176  		case REG_TLS:
  2177  			// NOTE: Systems listed here should be only systems that
  2178  			// support direct TLS references like 8(TLS) implemented as
  2179  			// direct references from FS or GS. Systems that require
  2180  			// the initial-exec model, where you load the TLS base into
  2181  			// a register and then index from that register, do not reach
  2182  			// this code and should not be listed.
  2183  			if p.Mode == 32 {
  2184  				switch ctxt.Headtype {
  2185  				default:
  2186  					if isAndroid {
  2187  						return 0x65 // GS
  2188  					}
  2189  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2190  
  2191  				case obj.Hdarwin,
  2192  					obj.Hdragonfly,
  2193  					obj.Hfreebsd,
  2194  					obj.Hnetbsd,
  2195  					obj.Hopenbsd:
  2196  					return 0x65 // GS
  2197  				}
  2198  			}
  2199  
  2200  			switch ctxt.Headtype {
  2201  			default:
  2202  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2203  
  2204  			case obj.Hlinux:
  2205  				if isAndroid {
  2206  					return 0x64 // FS
  2207  				}
  2208  
  2209  				if ctxt.Flag_shared {
  2210  					log.Fatalf("unknown TLS base register for linux with -shared")
  2211  				} else {
  2212  					return 0x64 // FS
  2213  				}
  2214  
  2215  			case obj.Hdragonfly,
  2216  				obj.Hfreebsd,
  2217  				obj.Hnetbsd,
  2218  				obj.Hopenbsd,
  2219  				obj.Hsolaris:
  2220  				return 0x64 // FS
  2221  
  2222  			case obj.Hdarwin:
  2223  				return 0x65 // GS
  2224  			}
  2225  		}
  2226  	}
  2227  
  2228  	if p.Mode == 32 {
  2229  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2230  			// When building for inclusion into a shared library, an instruction of the form
  2231  			//     MOVL 0(CX)(TLS*1), AX
  2232  			// becomes
  2233  			//     mov %gs:(%ecx), %eax
  2234  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2235  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2236  			// a shared library the instruction it becomes
  2237  			//     mov 0x0(%ecx), $eax
  2238  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2239  			if a.Offset != 0 {
  2240  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2241  			}
  2242  			return 0x65 // GS
  2243  		}
  2244  		return 0
  2245  	}
  2246  
  2247  	switch a.Index {
  2248  	case REG_CS:
  2249  		return 0x2e
  2250  
  2251  	case REG_DS:
  2252  		return 0x3e
  2253  
  2254  	case REG_ES:
  2255  		return 0x26
  2256  
  2257  	case REG_TLS:
  2258  		if ctxt.Flag_shared {
  2259  			// When building for inclusion into a shared library, an instruction of the form
  2260  			//     MOV 0(CX)(TLS*1), AX
  2261  			// becomes
  2262  			//     mov %fs:(%rcx), %rax
  2263  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2264  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2265  			// a shared library the instruction does not require a prefix.
  2266  			if a.Offset != 0 {
  2267  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2268  			}
  2269  			return 0x64
  2270  		}
  2271  
  2272  	case REG_FS:
  2273  		return 0x64
  2274  
  2275  	case REG_GS:
  2276  		return 0x65
  2277  	}
  2278  
  2279  	return 0
  2280  }
  2281  
  2282  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2283  	switch a.Type {
  2284  	case obj.TYPE_NONE:
  2285  		return Ynone
  2286  
  2287  	case obj.TYPE_BRANCH:
  2288  		return Ybr
  2289  
  2290  	case obj.TYPE_INDIR:
  2291  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2292  			return Yindir
  2293  		}
  2294  		return Yxxx
  2295  
  2296  	case obj.TYPE_MEM:
  2297  		if a.Index == REG_SP {
  2298  			// Can't use SP as the index register
  2299  			return Yxxx
  2300  		}
  2301  		if ctxt.Asmode == 64 {
  2302  			switch a.Name {
  2303  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2304  				// Global variables can't use index registers and their
  2305  				// base register is %rip (%rip is encoded as REG_NONE).
  2306  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2307  					return Yxxx
  2308  				}
  2309  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2310  				// These names must have a base of SP.  The old compiler
  2311  				// uses 0 for the base register. SSA uses REG_SP.
  2312  				if a.Reg != REG_SP && a.Reg != 0 {
  2313  					return Yxxx
  2314  				}
  2315  			case obj.NAME_NONE:
  2316  				// everything is ok
  2317  			default:
  2318  				// unknown name
  2319  				return Yxxx
  2320  			}
  2321  		}
  2322  		return Ym
  2323  
  2324  	case obj.TYPE_ADDR:
  2325  		switch a.Name {
  2326  		case obj.NAME_GOTREF:
  2327  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2328  			return Yxxx
  2329  
  2330  		case obj.NAME_EXTERN,
  2331  			obj.NAME_STATIC:
  2332  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
  2333  				return Yi32
  2334  			}
  2335  			return Yiauto // use pc-relative addressing
  2336  
  2337  		case obj.NAME_AUTO,
  2338  			obj.NAME_PARAM:
  2339  			return Yiauto
  2340  		}
  2341  
  2342  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2343  		// and got Yi32 in an earlier version of this code.
  2344  		// Keep doing that until we fix yduff etc.
  2345  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2346  			return Yi32
  2347  		}
  2348  
  2349  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2350  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2351  		}
  2352  		fallthrough
  2353  
  2354  		// fall through
  2355  
  2356  	case obj.TYPE_CONST:
  2357  		if a.Sym != nil {
  2358  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2359  		}
  2360  
  2361  		v := a.Offset
  2362  		if p.Mode == 32 {
  2363  			v = int64(int32(v))
  2364  		}
  2365  		if v == 0 {
  2366  			if p.Mark&PRESERVEFLAGS != 0 {
  2367  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2368  				return Yu7
  2369  			}
  2370  			return Yi0
  2371  		}
  2372  		if v == 1 {
  2373  			return Yi1
  2374  		}
  2375  		if v >= 0 && v <= 127 {
  2376  			return Yu7
  2377  		}
  2378  		if v >= 0 && v <= 255 {
  2379  			return Yu8
  2380  		}
  2381  		if v >= -128 && v <= 127 {
  2382  			return Yi8
  2383  		}
  2384  		if p.Mode == 32 {
  2385  			return Yi32
  2386  		}
  2387  		l := int32(v)
  2388  		if int64(l) == v {
  2389  			return Ys32 /* can sign extend */
  2390  		}
  2391  		if v>>32 == 0 {
  2392  			return Yi32 /* unsigned */
  2393  		}
  2394  		return Yi64
  2395  
  2396  	case obj.TYPE_TEXTSIZE:
  2397  		return Ytextsize
  2398  	}
  2399  
  2400  	if a.Type != obj.TYPE_REG {
  2401  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2402  		return Yxxx
  2403  	}
  2404  
  2405  	switch a.Reg {
  2406  	case REG_AL:
  2407  		return Yal
  2408  
  2409  	case REG_AX:
  2410  		return Yax
  2411  
  2412  		/*
  2413  			case REG_SPB:
  2414  		*/
  2415  	case REG_BPB,
  2416  		REG_SIB,
  2417  		REG_DIB,
  2418  		REG_R8B,
  2419  		REG_R9B,
  2420  		REG_R10B,
  2421  		REG_R11B,
  2422  		REG_R12B,
  2423  		REG_R13B,
  2424  		REG_R14B,
  2425  		REG_R15B:
  2426  		if ctxt.Asmode != 64 {
  2427  			return Yxxx
  2428  		}
  2429  		fallthrough
  2430  
  2431  	case REG_DL,
  2432  		REG_BL,
  2433  		REG_AH,
  2434  		REG_CH,
  2435  		REG_DH,
  2436  		REG_BH:
  2437  		return Yrb
  2438  
  2439  	case REG_CL:
  2440  		return Ycl
  2441  
  2442  	case REG_CX:
  2443  		return Ycx
  2444  
  2445  	case REG_DX, REG_BX:
  2446  		return Yrx
  2447  
  2448  	case REG_R8, /* not really Yrl */
  2449  		REG_R9,
  2450  		REG_R10,
  2451  		REG_R11,
  2452  		REG_R12,
  2453  		REG_R13,
  2454  		REG_R14,
  2455  		REG_R15:
  2456  		if ctxt.Asmode != 64 {
  2457  			return Yxxx
  2458  		}
  2459  		fallthrough
  2460  
  2461  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2462  		if p.Mode == 32 {
  2463  			return Yrl32
  2464  		}
  2465  		return Yrl
  2466  
  2467  	case REG_F0 + 0:
  2468  		return Yf0
  2469  
  2470  	case REG_F0 + 1,
  2471  		REG_F0 + 2,
  2472  		REG_F0 + 3,
  2473  		REG_F0 + 4,
  2474  		REG_F0 + 5,
  2475  		REG_F0 + 6,
  2476  		REG_F0 + 7:
  2477  		return Yrf
  2478  
  2479  	case REG_M0 + 0,
  2480  		REG_M0 + 1,
  2481  		REG_M0 + 2,
  2482  		REG_M0 + 3,
  2483  		REG_M0 + 4,
  2484  		REG_M0 + 5,
  2485  		REG_M0 + 6,
  2486  		REG_M0 + 7:
  2487  		return Ymr
  2488  
  2489  	case REG_X0 + 0,
  2490  		REG_X0 + 1,
  2491  		REG_X0 + 2,
  2492  		REG_X0 + 3,
  2493  		REG_X0 + 4,
  2494  		REG_X0 + 5,
  2495  		REG_X0 + 6,
  2496  		REG_X0 + 7,
  2497  		REG_X0 + 8,
  2498  		REG_X0 + 9,
  2499  		REG_X0 + 10,
  2500  		REG_X0 + 11,
  2501  		REG_X0 + 12,
  2502  		REG_X0 + 13,
  2503  		REG_X0 + 14,
  2504  		REG_X0 + 15:
  2505  		return Yxr
  2506  
  2507  	case REG_Y0 + 0,
  2508  		REG_Y0 + 1,
  2509  		REG_Y0 + 2,
  2510  		REG_Y0 + 3,
  2511  		REG_Y0 + 4,
  2512  		REG_Y0 + 5,
  2513  		REG_Y0 + 6,
  2514  		REG_Y0 + 7,
  2515  		REG_Y0 + 8,
  2516  		REG_Y0 + 9,
  2517  		REG_Y0 + 10,
  2518  		REG_Y0 + 11,
  2519  		REG_Y0 + 12,
  2520  		REG_Y0 + 13,
  2521  		REG_Y0 + 14,
  2522  		REG_Y0 + 15:
  2523  		return Yyr
  2524  
  2525  	case REG_CS:
  2526  		return Ycs
  2527  	case REG_SS:
  2528  		return Yss
  2529  	case REG_DS:
  2530  		return Yds
  2531  	case REG_ES:
  2532  		return Yes
  2533  	case REG_FS:
  2534  		return Yfs
  2535  	case REG_GS:
  2536  		return Ygs
  2537  	case REG_TLS:
  2538  		return Ytls
  2539  
  2540  	case REG_GDTR:
  2541  		return Ygdtr
  2542  	case REG_IDTR:
  2543  		return Yidtr
  2544  	case REG_LDTR:
  2545  		return Yldtr
  2546  	case REG_MSW:
  2547  		return Ymsw
  2548  	case REG_TASK:
  2549  		return Ytask
  2550  
  2551  	case REG_CR + 0:
  2552  		return Ycr0
  2553  	case REG_CR + 1:
  2554  		return Ycr1
  2555  	case REG_CR + 2:
  2556  		return Ycr2
  2557  	case REG_CR + 3:
  2558  		return Ycr3
  2559  	case REG_CR + 4:
  2560  		return Ycr4
  2561  	case REG_CR + 5:
  2562  		return Ycr5
  2563  	case REG_CR + 6:
  2564  		return Ycr6
  2565  	case REG_CR + 7:
  2566  		return Ycr7
  2567  	case REG_CR + 8:
  2568  		return Ycr8
  2569  
  2570  	case REG_DR + 0:
  2571  		return Ydr0
  2572  	case REG_DR + 1:
  2573  		return Ydr1
  2574  	case REG_DR + 2:
  2575  		return Ydr2
  2576  	case REG_DR + 3:
  2577  		return Ydr3
  2578  	case REG_DR + 4:
  2579  		return Ydr4
  2580  	case REG_DR + 5:
  2581  		return Ydr5
  2582  	case REG_DR + 6:
  2583  		return Ydr6
  2584  	case REG_DR + 7:
  2585  		return Ydr7
  2586  
  2587  	case REG_TR + 0:
  2588  		return Ytr0
  2589  	case REG_TR + 1:
  2590  		return Ytr1
  2591  	case REG_TR + 2:
  2592  		return Ytr2
  2593  	case REG_TR + 3:
  2594  		return Ytr3
  2595  	case REG_TR + 4:
  2596  		return Ytr4
  2597  	case REG_TR + 5:
  2598  		return Ytr5
  2599  	case REG_TR + 6:
  2600  		return Ytr6
  2601  	case REG_TR + 7:
  2602  		return Ytr7
  2603  	}
  2604  
  2605  	return Yxxx
  2606  }
  2607  
  2608  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2609  	var i int
  2610  
  2611  	switch index {
  2612  	default:
  2613  		goto bad
  2614  
  2615  	case REG_NONE:
  2616  		i = 4 << 3
  2617  		goto bas
  2618  
  2619  	case REG_R8,
  2620  		REG_R9,
  2621  		REG_R10,
  2622  		REG_R11,
  2623  		REG_R12,
  2624  		REG_R13,
  2625  		REG_R14,
  2626  		REG_R15:
  2627  		if ctxt.Asmode != 64 {
  2628  			goto bad
  2629  		}
  2630  		fallthrough
  2631  
  2632  	case REG_AX,
  2633  		REG_CX,
  2634  		REG_DX,
  2635  		REG_BX,
  2636  		REG_BP,
  2637  		REG_SI,
  2638  		REG_DI:
  2639  		i = reg[index] << 3
  2640  	}
  2641  
  2642  	switch scale {
  2643  	default:
  2644  		goto bad
  2645  
  2646  	case 1:
  2647  		break
  2648  
  2649  	case 2:
  2650  		i |= 1 << 6
  2651  
  2652  	case 4:
  2653  		i |= 2 << 6
  2654  
  2655  	case 8:
  2656  		i |= 3 << 6
  2657  	}
  2658  
  2659  bas:
  2660  	switch base {
  2661  	default:
  2662  		goto bad
  2663  
  2664  	case REG_NONE: /* must be mod=00 */
  2665  		i |= 5
  2666  
  2667  	case REG_R8,
  2668  		REG_R9,
  2669  		REG_R10,
  2670  		REG_R11,
  2671  		REG_R12,
  2672  		REG_R13,
  2673  		REG_R14,
  2674  		REG_R15:
  2675  		if ctxt.Asmode != 64 {
  2676  			goto bad
  2677  		}
  2678  		fallthrough
  2679  
  2680  	case REG_AX,
  2681  		REG_CX,
  2682  		REG_DX,
  2683  		REG_BX,
  2684  		REG_SP,
  2685  		REG_BP,
  2686  		REG_SI,
  2687  		REG_DI:
  2688  		i |= reg[base]
  2689  	}
  2690  
  2691  	ctxt.AsmBuf.Put1(byte(i))
  2692  	return
  2693  
  2694  bad:
  2695  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2696  	ctxt.AsmBuf.Put1(0)
  2697  	return
  2698  }
  2699  
  2700  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2701  	var rel obj.Reloc
  2702  
  2703  	v := vaddr(ctxt, p, a, &rel)
  2704  	if rel.Siz != 0 {
  2705  		if rel.Siz != 4 {
  2706  			ctxt.Diag("bad reloc")
  2707  		}
  2708  		r := obj.Addrel(ctxt.Cursym)
  2709  		*r = rel
  2710  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2711  	}
  2712  
  2713  	ctxt.AsmBuf.PutInt32(int32(v))
  2714  }
  2715  
  2716  /*
  2717  static void
  2718  relput8(Prog *p, Addr *a)
  2719  {
  2720  	vlong v;
  2721  	Reloc rel, *r;
  2722  
  2723  	v = vaddr(ctxt, p, a, &rel);
  2724  	if(rel.siz != 0) {
  2725  		r = addrel(ctxt->cursym);
  2726  		*r = rel;
  2727  		r->siz = 8;
  2728  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2729  	}
  2730  	put8(ctxt, v);
  2731  }
  2732  */
  2733  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2734  	if r != nil {
  2735  		*r = obj.Reloc{}
  2736  	}
  2737  
  2738  	switch a.Name {
  2739  	case obj.NAME_STATIC,
  2740  		obj.NAME_GOTREF,
  2741  		obj.NAME_EXTERN:
  2742  		s := a.Sym
  2743  		if r == nil {
  2744  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2745  			log.Fatalf("reloc")
  2746  		}
  2747  
  2748  		if a.Name == obj.NAME_GOTREF {
  2749  			r.Siz = 4
  2750  			r.Type = obj.R_GOTPCREL
  2751  		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
  2752  			r.Siz = 4
  2753  			r.Type = obj.R_ADDR
  2754  		} else {
  2755  			r.Siz = 4
  2756  			r.Type = obj.R_PCREL
  2757  		}
  2758  
  2759  		r.Off = -1 // caller must fill in
  2760  		r.Sym = s
  2761  		r.Add = a.Offset
  2762  
  2763  		return 0
  2764  	}
  2765  
  2766  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2767  		if r == nil {
  2768  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2769  			log.Fatalf("reloc")
  2770  		}
  2771  
  2772  		if !ctxt.Flag_shared || isAndroid {
  2773  			r.Type = obj.R_TLS_LE
  2774  			r.Siz = 4
  2775  			r.Off = -1 // caller must fill in
  2776  			r.Add = a.Offset
  2777  		}
  2778  		return 0
  2779  	}
  2780  
  2781  	return a.Offset
  2782  }
  2783  
  2784  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2785  	var base int
  2786  	var rel obj.Reloc
  2787  
  2788  	rex &= 0x40 | Rxr
  2789  	switch {
  2790  	case int64(int32(a.Offset)) == a.Offset:
  2791  		// Offset fits in sign-extended 32 bits.
  2792  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2793  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2794  		// This is allowed for assembly that wants to use 32-bit hex
  2795  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2796  	default:
  2797  		ctxt.Diag("offset too large in %s", p)
  2798  	}
  2799  	v := int32(a.Offset)
  2800  	rel.Siz = 0
  2801  
  2802  	switch a.Type {
  2803  	case obj.TYPE_ADDR:
  2804  		if a.Name == obj.NAME_NONE {
  2805  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2806  		}
  2807  		if a.Index == REG_TLS {
  2808  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2809  		}
  2810  		goto bad
  2811  
  2812  	case obj.TYPE_REG:
  2813  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2814  			goto bad
  2815  		}
  2816  		if v != 0 {
  2817  			goto bad
  2818  		}
  2819  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2820  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2821  		return
  2822  	}
  2823  
  2824  	if a.Type != obj.TYPE_MEM {
  2825  		goto bad
  2826  	}
  2827  
  2828  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2829  		base := int(a.Reg)
  2830  		switch a.Name {
  2831  		case obj.NAME_EXTERN,
  2832  			obj.NAME_GOTREF,
  2833  			obj.NAME_STATIC:
  2834  			if !isextern(a.Sym) && p.Mode == 64 {
  2835  				goto bad
  2836  			}
  2837  			if p.Mode == 32 && ctxt.Flag_shared {
  2838  				// The base register has already been set. It holds the PC
  2839  				// of this instruction returned by a PC-reading thunk.
  2840  				// See obj6.go:rewriteToPcrel.
  2841  			} else {
  2842  				base = REG_NONE
  2843  			}
  2844  			v = int32(vaddr(ctxt, p, a, &rel))
  2845  
  2846  		case obj.NAME_AUTO,
  2847  			obj.NAME_PARAM:
  2848  			base = REG_SP
  2849  		}
  2850  
  2851  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2852  		if base == REG_NONE {
  2853  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2854  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2855  			goto putrelv
  2856  		}
  2857  
  2858  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2859  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2860  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2861  			return
  2862  		}
  2863  
  2864  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2865  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2866  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2867  			ctxt.AsmBuf.Put1(byte(v))
  2868  			return
  2869  		}
  2870  
  2871  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2872  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2873  		goto putrelv
  2874  	}
  2875  
  2876  	base = int(a.Reg)
  2877  	switch a.Name {
  2878  	case obj.NAME_STATIC,
  2879  		obj.NAME_GOTREF,
  2880  		obj.NAME_EXTERN:
  2881  		if a.Sym == nil {
  2882  			ctxt.Diag("bad addr: %v", p)
  2883  		}
  2884  		if p.Mode == 32 && ctxt.Flag_shared {
  2885  			// The base register has already been set. It holds the PC
  2886  			// of this instruction returned by a PC-reading thunk.
  2887  			// See obj6.go:rewriteToPcrel.
  2888  		} else {
  2889  			base = REG_NONE
  2890  		}
  2891  		v = int32(vaddr(ctxt, p, a, &rel))
  2892  
  2893  	case obj.NAME_AUTO,
  2894  		obj.NAME_PARAM:
  2895  		base = REG_SP
  2896  	}
  2897  
  2898  	if base == REG_TLS {
  2899  		v = int32(vaddr(ctxt, p, a, &rel))
  2900  	}
  2901  
  2902  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2903  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2904  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2905  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2906  				ctxt.Diag("%v has offset against gotref", p)
  2907  			}
  2908  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2909  			goto putrelv
  2910  		}
  2911  
  2912  		// temporary
  2913  		ctxt.AsmBuf.Put2(
  2914  			byte(0<<6|4<<0|r<<3), // sib present
  2915  			0<<6|4<<3|5<<0,       // DS:d32
  2916  		)
  2917  		goto putrelv
  2918  	}
  2919  
  2920  	if base == REG_SP || base == REG_R12 {
  2921  		if v == 0 {
  2922  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2923  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2924  			return
  2925  		}
  2926  
  2927  		if v >= -128 && v < 128 {
  2928  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2929  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2930  			ctxt.AsmBuf.Put1(byte(v))
  2931  			return
  2932  		}
  2933  
  2934  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2935  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2936  		goto putrelv
  2937  	}
  2938  
  2939  	if REG_AX <= base && base <= REG_R15 {
  2940  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2941  			rel = obj.Reloc{}
  2942  			rel.Type = obj.R_TLS_LE
  2943  			rel.Siz = 4
  2944  			rel.Sym = nil
  2945  			rel.Add = int64(v)
  2946  			v = 0
  2947  		}
  2948  
  2949  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2950  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2951  			return
  2952  		}
  2953  
  2954  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2955  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2956  			return
  2957  		}
  2958  
  2959  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2960  		goto putrelv
  2961  	}
  2962  
  2963  	goto bad
  2964  
  2965  putrelv:
  2966  	if rel.Siz != 0 {
  2967  		if rel.Siz != 4 {
  2968  			ctxt.Diag("bad rel")
  2969  			goto bad
  2970  		}
  2971  
  2972  		r := obj.Addrel(ctxt.Cursym)
  2973  		*r = rel
  2974  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2975  	}
  2976  
  2977  	ctxt.AsmBuf.PutInt32(v)
  2978  	return
  2979  
  2980  bad:
  2981  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2982  	return
  2983  }
  2984  
  2985  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2986  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2987  }
  2988  
  2989  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2990  	asmandsz(ctxt, p, a, o, 0, 0)
  2991  }
  2992  
  2993  func bytereg(a *obj.Addr, t *uint8) {
  2994  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2995  		a.Reg += REG_AL - REG_AX
  2996  		*t = 0
  2997  	}
  2998  }
  2999  
  3000  func unbytereg(a *obj.Addr, t *uint8) {
  3001  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3002  		a.Reg += REG_AX - REG_AL
  3003  		*t = 0
  3004  	}
  3005  }
  3006  
  3007  const (
  3008  	E = 0xff
  3009  )
  3010  
  3011  var ymovtab = []Movtab{
  3012  	/* push */
  3013  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3014  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3015  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3016  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3017  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3018  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3019  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3020  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3021  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3022  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3023  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3024  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3025  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3026  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3027  
  3028  	/* pop */
  3029  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3030  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3031  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3032  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3033  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3034  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3035  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3036  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3037  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3038  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3039  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3040  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3041  
  3042  	/* mov seg */
  3043  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3044  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3045  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3046  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3047  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3048  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3049  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3050  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3051  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3052  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3053  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3054  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3055  
  3056  	/* mov cr */
  3057  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3058  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3059  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3060  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3061  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3062  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3063  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3064  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3065  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3066  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3067  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3068  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3069  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3070  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3071  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3072  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3073  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3074  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3075  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3076  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3077  
  3078  	/* mov dr */
  3079  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3080  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3081  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3082  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3083  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3084  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3085  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3086  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3087  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3088  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3089  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3090  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3091  
  3092  	/* mov tr */
  3093  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3094  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3095  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3096  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3097  
  3098  	/* lgdt, sgdt, lidt, sidt */
  3099  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3100  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3101  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3102  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3103  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3104  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3105  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3106  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3107  
  3108  	/* lldt, sldt */
  3109  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3110  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3111  
  3112  	/* lmsw, smsw */
  3113  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3114  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3115  
  3116  	/* ltr, str */
  3117  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3118  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3119  
  3120  	/* load full pointer - unsupported
  3121  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3122  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3123  	*/
  3124  
  3125  	/* double shift */
  3126  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3127  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3128  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3129  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3130  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3131  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3132  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3133  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3134  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3135  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3136  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3137  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3138  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3139  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3140  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3141  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3142  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3143  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3144  
  3145  	/* load TLS base */
  3146  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3147  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3148  	{0, 0, 0, 0, 0, [4]uint8{}},
  3149  }
  3150  
  3151  func isax(a *obj.Addr) bool {
  3152  	switch a.Reg {
  3153  	case REG_AX, REG_AL, REG_AH:
  3154  		return true
  3155  	}
  3156  
  3157  	if a.Index == REG_AX {
  3158  		return true
  3159  	}
  3160  	return false
  3161  }
  3162  
  3163  func subreg(p *obj.Prog, from int, to int) {
  3164  	if false { /* debug['Q'] */
  3165  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3166  	}
  3167  
  3168  	if int(p.From.Reg) == from {
  3169  		p.From.Reg = int16(to)
  3170  		p.Ft = 0
  3171  	}
  3172  
  3173  	if int(p.To.Reg) == from {
  3174  		p.To.Reg = int16(to)
  3175  		p.Tt = 0
  3176  	}
  3177  
  3178  	if int(p.From.Index) == from {
  3179  		p.From.Index = int16(to)
  3180  		p.Ft = 0
  3181  	}
  3182  
  3183  	if int(p.To.Index) == from {
  3184  		p.To.Index = int16(to)
  3185  		p.Tt = 0
  3186  	}
  3187  
  3188  	if false { /* debug['Q'] */
  3189  		fmt.Printf("%v\n", p)
  3190  	}
  3191  }
  3192  
  3193  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3194  	switch op {
  3195  	case Pm, Pe, Pf2, Pf3:
  3196  		if osize != 1 {
  3197  			if op != Pm {
  3198  				ctxt.AsmBuf.Put1(byte(op))
  3199  			}
  3200  			ctxt.AsmBuf.Put1(Pm)
  3201  			z++
  3202  			op = int(o.op[z])
  3203  			break
  3204  		}
  3205  		fallthrough
  3206  
  3207  	default:
  3208  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3209  			ctxt.AsmBuf.Put1(Pm)
  3210  		}
  3211  	}
  3212  
  3213  	ctxt.AsmBuf.Put1(byte(op))
  3214  	return z
  3215  }
  3216  
  3217  var bpduff1 = []byte{
  3218  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3219  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3220  }
  3221  
  3222  var bpduff2 = []byte{
  3223  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3224  }
  3225  
  3226  // Emit VEX prefix and opcode byte.
  3227  // The three addresses are the r/m, vvvv, and reg fields.
  3228  // The reg and rm arguments appear in the same order as the
  3229  // arguments to asmand, which typically follows the call to asmvex.
  3230  // The final two arguments are the VEX prefix (see encoding above)
  3231  // and the opcode byte.
  3232  // For details about vex prefix see:
  3233  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3234  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3235  	ctxt.Vexflag = 1
  3236  	rexR := 0
  3237  	if r != nil {
  3238  		rexR = regrex[r.Reg] & Rxr
  3239  	}
  3240  	rexB := 0
  3241  	rexX := 0
  3242  	if rm != nil {
  3243  		rexB = regrex[rm.Reg] & Rxb
  3244  		rexX = regrex[rm.Index] & Rxx
  3245  	}
  3246  	vexM := (vex >> 3) & 0xF
  3247  	vexWLP := vex & 0x87
  3248  	vexV := byte(0)
  3249  	if v != nil {
  3250  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3251  	}
  3252  	vexV ^= 0xF
  3253  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3254  		// Can use 2-byte encoding.
  3255  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3256  	} else {
  3257  		// Must use 3-byte encoding.
  3258  		ctxt.AsmBuf.Put3(0xc4,
  3259  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3260  			vexV<<3|vexWLP,
  3261  		)
  3262  	}
  3263  	ctxt.AsmBuf.Put1(opcode)
  3264  }
  3265  
  3266  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3267  	ctxt.Curp = p // TODO
  3268  
  3269  	o := opindex[p.As&obj.AMask]
  3270  
  3271  	if o == nil {
  3272  		ctxt.Diag("asmins: missing op %v", p)
  3273  		return
  3274  	}
  3275  
  3276  	pre := prefixof(ctxt, p, &p.From)
  3277  	if pre != 0 {
  3278  		ctxt.AsmBuf.Put1(byte(pre))
  3279  	}
  3280  	pre = prefixof(ctxt, p, &p.To)
  3281  	if pre != 0 {
  3282  		ctxt.AsmBuf.Put1(byte(pre))
  3283  	}
  3284  
  3285  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3286  	// which encodes as SHRQ $32(DX*0), AX.
  3287  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3288  	// Change encoding generated by assemblers and compilers and remove.
  3289  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3290  		p.From3 = new(obj.Addr)
  3291  		p.From3.Type = obj.TYPE_REG
  3292  		p.From3.Reg = p.From.Index
  3293  		p.From.Index = 0
  3294  	}
  3295  
  3296  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3297  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3298  	switch p.As {
  3299  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3300  		if p.From3Type() == obj.TYPE_NONE {
  3301  			p.From3 = new(obj.Addr)
  3302  			*p.From3 = p.From
  3303  			p.From = obj.Addr{}
  3304  			p.From.Type = obj.TYPE_CONST
  3305  			p.From.Offset = p.To.Offset
  3306  			p.To.Offset = 0
  3307  		}
  3308  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3309  		if p.From3Type() == obj.TYPE_NONE {
  3310  			p.From3 = new(obj.Addr)
  3311  			*p.From3 = p.To
  3312  			p.To = obj.Addr{}
  3313  			p.To.Type = obj.TYPE_CONST
  3314  			p.To.Offset = p.From3.Offset
  3315  			p.From3.Offset = 0
  3316  		}
  3317  	}
  3318  
  3319  	if p.Ft == 0 {
  3320  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3321  	}
  3322  	if p.Tt == 0 {
  3323  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3324  	}
  3325  
  3326  	ft := int(p.Ft) * Ymax
  3327  	f3t := Ynone * Ymax
  3328  	if p.From3 != nil {
  3329  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3330  	}
  3331  	tt := int(p.Tt) * Ymax
  3332  
  3333  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3334  	z := 0
  3335  	var a *obj.Addr
  3336  	var l int
  3337  	var op int
  3338  	var q *obj.Prog
  3339  	var r *obj.Reloc
  3340  	var rel obj.Reloc
  3341  	var v int64
  3342  	for i := range o.ytab {
  3343  		yt := &o.ytab[i]
  3344  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3345  			switch o.prefix {
  3346  			case Px1: /* first option valid only in 32-bit mode */
  3347  				if ctxt.Mode == 64 && z == 0 {
  3348  					z += int(yt.zoffset) + xo
  3349  					continue
  3350  				}
  3351  			case Pq: /* 16 bit escape and opcode escape */
  3352  				ctxt.AsmBuf.Put2(Pe, Pm)
  3353  
  3354  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3355  				ctxt.Rexflag |= Pw
  3356  				ctxt.AsmBuf.Put2(Pe, Pm)
  3357  
  3358  			case Pq4: /*  66 0F 38 */
  3359  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3360  
  3361  			case Pf2, /* xmm opcode escape */
  3362  				Pf3:
  3363  				ctxt.AsmBuf.Put2(o.prefix, Pm)
  3364  
  3365  			case Pef3:
  3366  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3367  
  3368  			case Pfw: /* xmm opcode escape + REX.W */
  3369  				ctxt.Rexflag |= Pw
  3370  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3371  
  3372  			case Pm: /* opcode escape */
  3373  				ctxt.AsmBuf.Put1(Pm)
  3374  
  3375  			case Pe: /* 16 bit escape */
  3376  				ctxt.AsmBuf.Put1(Pe)
  3377  
  3378  			case Pw: /* 64-bit escape */
  3379  				if p.Mode != 64 {
  3380  					ctxt.Diag("asmins: illegal 64: %v", p)
  3381  				}
  3382  				ctxt.Rexflag |= Pw
  3383  
  3384  			case Pw8: /* 64-bit escape if z >= 8 */
  3385  				if z >= 8 {
  3386  					if p.Mode != 64 {
  3387  						ctxt.Diag("asmins: illegal 64: %v", p)
  3388  					}
  3389  					ctxt.Rexflag |= Pw
  3390  				}
  3391  
  3392  			case Pb: /* botch */
  3393  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3394  					goto bad
  3395  				}
  3396  				// NOTE(rsc): This is probably safe to do always,
  3397  				// but when enabled it chooses different encodings
  3398  				// than the old cmd/internal/obj/i386 code did,
  3399  				// which breaks our "same bits out" checks.
  3400  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3401  				// in the original obj/i386, and it would encode
  3402  				// (using a valid, shorter form) as 3c 00 if we enabled
  3403  				// the call to bytereg here.
  3404  				if p.Mode == 64 {
  3405  					bytereg(&p.From, &p.Ft)
  3406  					bytereg(&p.To, &p.Tt)
  3407  				}
  3408  
  3409  			case P32: /* 32 bit but illegal if 64-bit mode */
  3410  				if p.Mode == 64 {
  3411  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3412  				}
  3413  
  3414  			case Py: /* 64-bit only, no prefix */
  3415  				if p.Mode != 64 {
  3416  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3417  				}
  3418  
  3419  			case Py1: /* 64-bit only if z < 1, no prefix */
  3420  				if z < 1 && p.Mode != 64 {
  3421  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3422  				}
  3423  
  3424  			case Py3: /* 64-bit only if z < 3, no prefix */
  3425  				if z < 3 && p.Mode != 64 {
  3426  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3427  				}
  3428  			}
  3429  
  3430  			if z >= len(o.op) {
  3431  				log.Fatalf("asmins bad table %v", p)
  3432  			}
  3433  			op = int(o.op[z])
  3434  			if op == 0x0f {
  3435  				ctxt.AsmBuf.Put1(byte(op))
  3436  				z++
  3437  				op = int(o.op[z])
  3438  			}
  3439  
  3440  			switch yt.zcase {
  3441  			default:
  3442  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3443  				return
  3444  
  3445  			case Zpseudo:
  3446  				break
  3447  
  3448  			case Zlit:
  3449  				for ; ; z++ {
  3450  					op = int(o.op[z])
  3451  					if op == 0 {
  3452  						break
  3453  					}
  3454  					ctxt.AsmBuf.Put1(byte(op))
  3455  				}
  3456  
  3457  			case Zlitm_r:
  3458  				for ; ; z++ {
  3459  					op = int(o.op[z])
  3460  					if op == 0 {
  3461  						break
  3462  					}
  3463  					ctxt.AsmBuf.Put1(byte(op))
  3464  				}
  3465  				asmand(ctxt, p, &p.From, &p.To)
  3466  
  3467  			case Zmb_r:
  3468  				bytereg(&p.From, &p.Ft)
  3469  				fallthrough
  3470  
  3471  			case Zm_r:
  3472  				ctxt.AsmBuf.Put1(byte(op))
  3473  				asmand(ctxt, p, &p.From, &p.To)
  3474  
  3475  			case Zm2_r:
  3476  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3477  				asmand(ctxt, p, &p.From, &p.To)
  3478  
  3479  			case Zm_r_xm:
  3480  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3481  				asmand(ctxt, p, &p.From, &p.To)
  3482  
  3483  			case Zm_r_xm_nr:
  3484  				ctxt.Rexflag = 0
  3485  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3486  				asmand(ctxt, p, &p.From, &p.To)
  3487  
  3488  			case Zm_r_i_xm:
  3489  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3490  				asmand(ctxt, p, &p.From, p.From3)
  3491  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3492  
  3493  			case Zibm_r, Zibr_m:
  3494  				for {
  3495  					tmp1 := z
  3496  					z++
  3497  					op = int(o.op[tmp1])
  3498  					if op == 0 {
  3499  						break
  3500  					}
  3501  					ctxt.AsmBuf.Put1(byte(op))
  3502  				}
  3503  				if yt.zcase == Zibr_m {
  3504  					asmand(ctxt, p, &p.To, p.From3)
  3505  				} else {
  3506  					asmand(ctxt, p, p.From3, &p.To)
  3507  				}
  3508  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3509  
  3510  			case Zaut_r:
  3511  				ctxt.AsmBuf.Put1(0x8d) // leal
  3512  				if p.From.Type != obj.TYPE_ADDR {
  3513  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3514  				}
  3515  				p.From.Type = obj.TYPE_MEM
  3516  				asmand(ctxt, p, &p.From, &p.To)
  3517  				p.From.Type = obj.TYPE_ADDR
  3518  
  3519  			case Zm_o:
  3520  				ctxt.AsmBuf.Put1(byte(op))
  3521  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3522  
  3523  			case Zr_m:
  3524  				ctxt.AsmBuf.Put1(byte(op))
  3525  				asmand(ctxt, p, &p.To, &p.From)
  3526  
  3527  			case Zvex_rm_v_r:
  3528  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3529  				asmand(ctxt, p, &p.From, &p.To)
  3530  
  3531  			case Zvex_i_r_v:
  3532  				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3533  				regnum := byte(0x7)
  3534  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3535  					regnum &= byte(p.From3.Reg - REG_X0)
  3536  				} else {
  3537  					regnum &= byte(p.From3.Reg - REG_Y0)
  3538  				}
  3539  				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
  3540  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3541  
  3542  			case Zvex_i_rm_v_r:
  3543  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3544  				asmand(ctxt, p, &p.From, &p.To)
  3545  				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
  3546  
  3547  			case Zvex_i_rm_r:
  3548  				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3549  				asmand(ctxt, p, p.From3, &p.To)
  3550  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3551  
  3552  			case Zvex_v_rm_r:
  3553  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3554  				asmand(ctxt, p, p.From3, &p.To)
  3555  
  3556  			case Zvex_r_v_rm:
  3557  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3558  				asmand(ctxt, p, &p.To, &p.From)
  3559  
  3560  			case Zr_m_xm:
  3561  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3562  				asmand(ctxt, p, &p.To, &p.From)
  3563  
  3564  			case Zr_m_xm_nr:
  3565  				ctxt.Rexflag = 0
  3566  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3567  				asmand(ctxt, p, &p.To, &p.From)
  3568  
  3569  			case Zo_m:
  3570  				ctxt.AsmBuf.Put1(byte(op))
  3571  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3572  
  3573  			case Zcallindreg:
  3574  				r = obj.Addrel(ctxt.Cursym)
  3575  				r.Off = int32(p.Pc)
  3576  				r.Type = obj.R_CALLIND
  3577  				r.Siz = 0
  3578  				fallthrough
  3579  
  3580  			case Zo_m64:
  3581  				ctxt.AsmBuf.Put1(byte(op))
  3582  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3583  
  3584  			case Zm_ibo:
  3585  				ctxt.AsmBuf.Put1(byte(op))
  3586  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3587  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3588  
  3589  			case Zibo_m:
  3590  				ctxt.AsmBuf.Put1(byte(op))
  3591  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3592  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3593  
  3594  			case Zibo_m_xm:
  3595  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3596  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3597  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3598  
  3599  			case Z_ib, Zib_:
  3600  				if yt.zcase == Zib_ {
  3601  					a = &p.From
  3602  				} else {
  3603  					a = &p.To
  3604  				}
  3605  				ctxt.AsmBuf.Put1(byte(op))
  3606  				if p.As == AXABORT {
  3607  					ctxt.AsmBuf.Put1(o.op[z+1])
  3608  				}
  3609  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3610  
  3611  			case Zib_rp:
  3612  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3613  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3614  
  3615  			case Zil_rp:
  3616  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3617  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3618  				if o.prefix == Pe {
  3619  					v = vaddr(ctxt, p, &p.From, nil)
  3620  					ctxt.AsmBuf.PutInt16(int16(v))
  3621  				} else {
  3622  					relput4(ctxt, p, &p.From)
  3623  				}
  3624  
  3625  			case Zo_iw:
  3626  				ctxt.AsmBuf.Put1(byte(op))
  3627  				if p.From.Type != obj.TYPE_NONE {
  3628  					v = vaddr(ctxt, p, &p.From, nil)
  3629  					ctxt.AsmBuf.PutInt16(int16(v))
  3630  				}
  3631  
  3632  			case Ziq_rp:
  3633  				v = vaddr(ctxt, p, &p.From, &rel)
  3634  				l = int(v >> 32)
  3635  				if l == 0 && rel.Siz != 8 {
  3636  					//p->mark |= 0100;
  3637  					//print("zero: %llux %v\n", v, p);
  3638  					ctxt.Rexflag &^= (0x40 | Rxw)
  3639  
  3640  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3641  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3642  					if rel.Type != 0 {
  3643  						r = obj.Addrel(ctxt.Cursym)
  3644  						*r = rel
  3645  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3646  					}
  3647  
  3648  					ctxt.AsmBuf.PutInt32(int32(v))
  3649  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3650  
  3651  					//p->mark |= 0100;
  3652  					//print("sign: %llux %v\n", v, p);
  3653  					ctxt.AsmBuf.Put1(0xc7)
  3654  					asmando(ctxt, p, &p.To, 0)
  3655  
  3656  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3657  				} else {
  3658  					//print("all: %llux %v\n", v, p);
  3659  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3660  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3661  					if rel.Type != 0 {
  3662  						r = obj.Addrel(ctxt.Cursym)
  3663  						*r = rel
  3664  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3665  					}
  3666  
  3667  					ctxt.AsmBuf.PutInt64(v)
  3668  				}
  3669  
  3670  			case Zib_rr:
  3671  				ctxt.AsmBuf.Put1(byte(op))
  3672  				asmand(ctxt, p, &p.To, &p.To)
  3673  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3674  
  3675  			case Z_il, Zil_:
  3676  				if yt.zcase == Zil_ {
  3677  					a = &p.From
  3678  				} else {
  3679  					a = &p.To
  3680  				}
  3681  				ctxt.AsmBuf.Put1(byte(op))
  3682  				if o.prefix == Pe {
  3683  					v = vaddr(ctxt, p, a, nil)
  3684  					ctxt.AsmBuf.PutInt16(int16(v))
  3685  				} else {
  3686  					relput4(ctxt, p, a)
  3687  				}
  3688  
  3689  			case Zm_ilo, Zilo_m:
  3690  				ctxt.AsmBuf.Put1(byte(op))
  3691  				if yt.zcase == Zilo_m {
  3692  					a = &p.From
  3693  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3694  				} else {
  3695  					a = &p.To
  3696  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3697  				}
  3698  
  3699  				if o.prefix == Pe {
  3700  					v = vaddr(ctxt, p, a, nil)
  3701  					ctxt.AsmBuf.PutInt16(int16(v))
  3702  				} else {
  3703  					relput4(ctxt, p, a)
  3704  				}
  3705  
  3706  			case Zil_rr:
  3707  				ctxt.AsmBuf.Put1(byte(op))
  3708  				asmand(ctxt, p, &p.To, &p.To)
  3709  				if o.prefix == Pe {
  3710  					v = vaddr(ctxt, p, &p.From, nil)
  3711  					ctxt.AsmBuf.PutInt16(int16(v))
  3712  				} else {
  3713  					relput4(ctxt, p, &p.From)
  3714  				}
  3715  
  3716  			case Z_rp:
  3717  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3718  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3719  
  3720  			case Zrp_:
  3721  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3722  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3723  
  3724  			case Zclr:
  3725  				ctxt.Rexflag &^= Pw
  3726  				ctxt.AsmBuf.Put1(byte(op))
  3727  				asmand(ctxt, p, &p.To, &p.To)
  3728  
  3729  			case Zcallcon, Zjmpcon:
  3730  				if yt.zcase == Zcallcon {
  3731  					ctxt.AsmBuf.Put1(byte(op))
  3732  				} else {
  3733  					ctxt.AsmBuf.Put1(o.op[z+1])
  3734  				}
  3735  				r = obj.Addrel(ctxt.Cursym)
  3736  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3737  				r.Type = obj.R_PCREL
  3738  				r.Siz = 4
  3739  				r.Add = p.To.Offset
  3740  				ctxt.AsmBuf.PutInt32(0)
  3741  
  3742  			case Zcallind:
  3743  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3744  				r = obj.Addrel(ctxt.Cursym)
  3745  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3746  				r.Type = obj.R_ADDR
  3747  				r.Siz = 4
  3748  				r.Add = p.To.Offset
  3749  				r.Sym = p.To.Sym
  3750  				ctxt.AsmBuf.PutInt32(0)
  3751  
  3752  			case Zcall, Zcallduff:
  3753  				if p.To.Sym == nil {
  3754  					ctxt.Diag("call without target")
  3755  					log.Fatalf("bad code")
  3756  				}
  3757  
  3758  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3759  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3760  				}
  3761  
  3762  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3763  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3764  					// (the call jumps into the middle of the function).
  3765  					// This makes it possible to see call sites for duffcopy/duffzero in
  3766  					// BP-based profiling tools like Linux perf (which is the
  3767  					// whole point of obj.Framepointer_enabled).
  3768  					// MOVQ BP, -16(SP)
  3769  					// LEAQ -16(SP), BP
  3770  					ctxt.AsmBuf.Put(bpduff1)
  3771  				}
  3772  				ctxt.AsmBuf.Put1(byte(op))
  3773  				r = obj.Addrel(ctxt.Cursym)
  3774  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3775  				r.Sym = p.To.Sym
  3776  				r.Add = p.To.Offset
  3777  				r.Type = obj.R_CALL
  3778  				r.Siz = 4
  3779  				ctxt.AsmBuf.PutInt32(0)
  3780  
  3781  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3782  					// Pop BP pushed above.
  3783  					// MOVQ 0(BP), BP
  3784  					ctxt.AsmBuf.Put(bpduff2)
  3785  				}
  3786  
  3787  			// TODO: jump across functions needs reloc
  3788  			case Zbr, Zjmp, Zloop:
  3789  				if p.As == AXBEGIN {
  3790  					ctxt.AsmBuf.Put1(byte(op))
  3791  				}
  3792  				if p.To.Sym != nil {
  3793  					if yt.zcase != Zjmp {
  3794  						ctxt.Diag("branch to ATEXT")
  3795  						log.Fatalf("bad code")
  3796  					}
  3797  
  3798  					ctxt.AsmBuf.Put1(o.op[z+1])
  3799  					r = obj.Addrel(ctxt.Cursym)
  3800  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3801  					r.Sym = p.To.Sym
  3802  					r.Type = obj.R_PCREL
  3803  					r.Siz = 4
  3804  					ctxt.AsmBuf.PutInt32(0)
  3805  					break
  3806  				}
  3807  
  3808  				// Assumes q is in this function.
  3809  				// TODO: Check in input, preserve in brchain.
  3810  
  3811  				// Fill in backward jump now.
  3812  				q = p.Pcond
  3813  
  3814  				if q == nil {
  3815  					ctxt.Diag("jmp/branch/loop without target")
  3816  					log.Fatalf("bad code")
  3817  				}
  3818  
  3819  				if p.Back&1 != 0 {
  3820  					v = q.Pc - (p.Pc + 2)
  3821  					if v >= -128 && p.As != AXBEGIN {
  3822  						if p.As == AJCXZL {
  3823  							ctxt.AsmBuf.Put1(0x67)
  3824  						}
  3825  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3826  					} else if yt.zcase == Zloop {
  3827  						ctxt.Diag("loop too far: %v", p)
  3828  					} else {
  3829  						v -= 5 - 2
  3830  						if p.As == AXBEGIN {
  3831  							v--
  3832  						}
  3833  						if yt.zcase == Zbr {
  3834  							ctxt.AsmBuf.Put1(0x0f)
  3835  							v--
  3836  						}
  3837  
  3838  						ctxt.AsmBuf.Put1(o.op[z+1])
  3839  						ctxt.AsmBuf.PutInt32(int32(v))
  3840  					}
  3841  
  3842  					break
  3843  				}
  3844  
  3845  				// Annotate target; will fill in later.
  3846  				p.Forwd = q.Rel
  3847  
  3848  				q.Rel = p
  3849  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3850  					if p.As == AJCXZL {
  3851  						ctxt.AsmBuf.Put1(0x67)
  3852  					}
  3853  					ctxt.AsmBuf.Put2(byte(op), 0)
  3854  				} else if yt.zcase == Zloop {
  3855  					ctxt.Diag("loop too far: %v", p)
  3856  				} else {
  3857  					if yt.zcase == Zbr {
  3858  						ctxt.AsmBuf.Put1(0x0f)
  3859  					}
  3860  					ctxt.AsmBuf.Put1(o.op[z+1])
  3861  					ctxt.AsmBuf.PutInt32(0)
  3862  				}
  3863  
  3864  				break
  3865  
  3866  			/*
  3867  				v = q->pc - p->pc - 2;
  3868  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3869  					*ctxt->andptr++ = op;
  3870  					*ctxt->andptr++ = v;
  3871  				} else {
  3872  					v -= 5-2;
  3873  					if(yt.zcase == Zbr) {
  3874  						*ctxt->andptr++ = 0x0f;
  3875  						v--;
  3876  					}
  3877  					*ctxt->andptr++ = o->op[z+1];
  3878  					*ctxt->andptr++ = v;
  3879  					*ctxt->andptr++ = v>>8;
  3880  					*ctxt->andptr++ = v>>16;
  3881  					*ctxt->andptr++ = v>>24;
  3882  				}
  3883  			*/
  3884  
  3885  			case Zbyte:
  3886  				v = vaddr(ctxt, p, &p.From, &rel)
  3887  				if rel.Siz != 0 {
  3888  					rel.Siz = uint8(op)
  3889  					r = obj.Addrel(ctxt.Cursym)
  3890  					*r = rel
  3891  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3892  				}
  3893  
  3894  				ctxt.AsmBuf.Put1(byte(v))
  3895  				if op > 1 {
  3896  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3897  					if op > 2 {
  3898  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3899  						if op > 4 {
  3900  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3901  						}
  3902  					}
  3903  				}
  3904  			}
  3905  
  3906  			return
  3907  		}
  3908  		z += int(yt.zoffset) + xo
  3909  	}
  3910  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3911  		var pp obj.Prog
  3912  		var t []byte
  3913  		if p.As == mo[0].as {
  3914  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3915  				t = mo[0].op[:]
  3916  				switch mo[0].code {
  3917  				default:
  3918  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3919  
  3920  				case 0: /* lit */
  3921  					for z = 0; t[z] != E; z++ {
  3922  						ctxt.AsmBuf.Put1(t[z])
  3923  					}
  3924  
  3925  				case 1: /* r,m */
  3926  					ctxt.AsmBuf.Put1(t[0])
  3927  					asmando(ctxt, p, &p.To, int(t[1]))
  3928  
  3929  				case 2: /* m,r */
  3930  					ctxt.AsmBuf.Put1(t[0])
  3931  					asmando(ctxt, p, &p.From, int(t[1]))
  3932  
  3933  				case 3: /* r,m - 2op */
  3934  					ctxt.AsmBuf.Put2(t[0], t[1])
  3935  					asmando(ctxt, p, &p.To, int(t[2]))
  3936  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3937  
  3938  				case 4: /* m,r - 2op */
  3939  					ctxt.AsmBuf.Put2(t[0], t[1])
  3940  					asmando(ctxt, p, &p.From, int(t[2]))
  3941  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3942  
  3943  				case 5: /* load full pointer, trash heap */
  3944  					if t[0] != 0 {
  3945  						ctxt.AsmBuf.Put1(t[0])
  3946  					}
  3947  					switch p.To.Index {
  3948  					default:
  3949  						goto bad
  3950  
  3951  					case REG_DS:
  3952  						ctxt.AsmBuf.Put1(0xc5)
  3953  
  3954  					case REG_SS:
  3955  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3956  
  3957  					case REG_ES:
  3958  						ctxt.AsmBuf.Put1(0xc4)
  3959  
  3960  					case REG_FS:
  3961  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3962  
  3963  					case REG_GS:
  3964  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3965  					}
  3966  
  3967  					asmand(ctxt, p, &p.From, &p.To)
  3968  
  3969  				case 6: /* double shift */
  3970  					if t[0] == Pw {
  3971  						if p.Mode != 64 {
  3972  							ctxt.Diag("asmins: illegal 64: %v", p)
  3973  						}
  3974  						ctxt.Rexflag |= Pw
  3975  						t = t[1:]
  3976  					} else if t[0] == Pe {
  3977  						ctxt.AsmBuf.Put1(Pe)
  3978  						t = t[1:]
  3979  					}
  3980  
  3981  					switch p.From.Type {
  3982  					default:
  3983  						goto bad
  3984  
  3985  					case obj.TYPE_CONST:
  3986  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3987  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3988  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3989  
  3990  					case obj.TYPE_REG:
  3991  						switch p.From.Reg {
  3992  						default:
  3993  							goto bad
  3994  
  3995  						case REG_CL, REG_CX:
  3996  							ctxt.AsmBuf.Put2(0x0f, t[1])
  3997  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3998  						}
  3999  					}
  4000  
  4001  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4002  				// where you load the TLS base register into a register and then index off that
  4003  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4004  				// are handled in prefixof above and should not be listed here.
  4005  				case 7: /* mov tls, r */
  4006  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  4007  						ctxt.Diag("invalid load of TLS: %v", p)
  4008  					}
  4009  
  4010  					if p.Mode == 32 {
  4011  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4012  						// where you load the TLS base register into a register and then index off that
  4013  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4014  						// are handled in prefixof above and should not be listed here.
  4015  						switch ctxt.Headtype {
  4016  						default:
  4017  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4018  
  4019  						case obj.Hlinux,
  4020  							obj.Hnacl:
  4021  							if ctxt.Flag_shared {
  4022  								// Note that this is not generating the same insns as the other cases.
  4023  								//     MOV TLS, dst
  4024  								// becomes
  4025  								//     call __x86.get_pc_thunk.dst
  4026  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4027  								// which is encoded as
  4028  								//     call __x86.get_pc_thunk.dst
  4029  								//     movq 0(dst), dst
  4030  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4031  								// is g, which we can't check here, but will when we assemble the second
  4032  								// instruction.
  4033  								dst := p.To.Reg
  4034  								ctxt.AsmBuf.Put1(0xe8)
  4035  								r = obj.Addrel(ctxt.Cursym)
  4036  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4037  								r.Type = obj.R_CALL
  4038  								r.Siz = 4
  4039  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
  4040  								ctxt.AsmBuf.PutInt32(0)
  4041  
  4042  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4043  								r = obj.Addrel(ctxt.Cursym)
  4044  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4045  								r.Type = obj.R_TLS_IE
  4046  								r.Siz = 4
  4047  								r.Add = 2
  4048  								ctxt.AsmBuf.PutInt32(0)
  4049  							} else {
  4050  								// ELF TLS base is 0(GS).
  4051  								pp.From = p.From
  4052  
  4053  								pp.From.Type = obj.TYPE_MEM
  4054  								pp.From.Reg = REG_GS
  4055  								pp.From.Offset = 0
  4056  								pp.From.Index = REG_NONE
  4057  								pp.From.Scale = 0
  4058  								ctxt.AsmBuf.Put2(0x65, // GS
  4059  									0x8B)
  4060  								asmand(ctxt, p, &pp.From, &p.To)
  4061  							}
  4062  						case obj.Hplan9:
  4063  							if ctxt.Plan9privates == nil {
  4064  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4065  							}
  4066  							pp.From = obj.Addr{}
  4067  							pp.From.Type = obj.TYPE_MEM
  4068  							pp.From.Name = obj.NAME_EXTERN
  4069  							pp.From.Sym = ctxt.Plan9privates
  4070  							pp.From.Offset = 0
  4071  							pp.From.Index = REG_NONE
  4072  							ctxt.AsmBuf.Put1(0x8B)
  4073  							asmand(ctxt, p, &pp.From, &p.To)
  4074  
  4075  						case obj.Hwindows:
  4076  							// Windows TLS base is always 0x14(FS).
  4077  							pp.From = p.From
  4078  
  4079  							pp.From.Type = obj.TYPE_MEM
  4080  							pp.From.Reg = REG_FS
  4081  							pp.From.Offset = 0x14
  4082  							pp.From.Index = REG_NONE
  4083  							pp.From.Scale = 0
  4084  							ctxt.AsmBuf.Put2(0x64, // FS
  4085  								0x8B)
  4086  							asmand(ctxt, p, &pp.From, &p.To)
  4087  						}
  4088  						break
  4089  					}
  4090  
  4091  					switch ctxt.Headtype {
  4092  					default:
  4093  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4094  
  4095  					case obj.Hlinux:
  4096  						if !ctxt.Flag_shared {
  4097  							log.Fatalf("unknown TLS base location for linux without -shared")
  4098  						}
  4099  						// Note that this is not generating the same insn as the other cases.
  4100  						//     MOV TLS, R_to
  4101  						// becomes
  4102  						//     movq g@gottpoff(%rip), R_to
  4103  						// which is encoded as
  4104  						//     movq 0(%rip), R_to
  4105  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4106  						// is g, which we can't check here, but will when we assemble the second
  4107  						// instruction.
  4108  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4109  
  4110  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4111  						r = obj.Addrel(ctxt.Cursym)
  4112  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4113  						r.Type = obj.R_TLS_IE
  4114  						r.Siz = 4
  4115  						r.Add = -4
  4116  						ctxt.AsmBuf.PutInt32(0)
  4117  
  4118  					case obj.Hplan9:
  4119  						if ctxt.Plan9privates == nil {
  4120  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4121  						}
  4122  						pp.From = obj.Addr{}
  4123  						pp.From.Type = obj.TYPE_MEM
  4124  						pp.From.Name = obj.NAME_EXTERN
  4125  						pp.From.Sym = ctxt.Plan9privates
  4126  						pp.From.Offset = 0
  4127  						pp.From.Index = REG_NONE
  4128  						ctxt.Rexflag |= Pw
  4129  						ctxt.AsmBuf.Put1(0x8B)
  4130  						asmand(ctxt, p, &pp.From, &p.To)
  4131  
  4132  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4133  						// TLS base is 0(FS).
  4134  						pp.From = p.From
  4135  
  4136  						pp.From.Type = obj.TYPE_MEM
  4137  						pp.From.Name = obj.NAME_NONE
  4138  						pp.From.Reg = REG_NONE
  4139  						pp.From.Offset = 0
  4140  						pp.From.Index = REG_NONE
  4141  						pp.From.Scale = 0
  4142  						ctxt.Rexflag |= Pw
  4143  						ctxt.AsmBuf.Put2(0x64, // FS
  4144  							0x8B)
  4145  						asmand(ctxt, p, &pp.From, &p.To)
  4146  
  4147  					case obj.Hwindows:
  4148  						// Windows TLS base is always 0x28(GS).
  4149  						pp.From = p.From
  4150  
  4151  						pp.From.Type = obj.TYPE_MEM
  4152  						pp.From.Name = obj.NAME_NONE
  4153  						pp.From.Reg = REG_GS
  4154  						pp.From.Offset = 0x28
  4155  						pp.From.Index = REG_NONE
  4156  						pp.From.Scale = 0
  4157  						ctxt.Rexflag |= Pw
  4158  						ctxt.AsmBuf.Put2(0x65, // GS
  4159  							0x8B)
  4160  						asmand(ctxt, p, &pp.From, &p.To)
  4161  					}
  4162  				}
  4163  				return
  4164  			}
  4165  		}
  4166  	}
  4167  	goto bad
  4168  
  4169  bad:
  4170  	if p.Mode != 64 {
  4171  		/*
  4172  		 * here, the assembly has failed.
  4173  		 * if its a byte instruction that has
  4174  		 * unaddressable registers, try to
  4175  		 * exchange registers and reissue the
  4176  		 * instruction with the operands renamed.
  4177  		 */
  4178  		pp := *p
  4179  
  4180  		unbytereg(&pp.From, &pp.Ft)
  4181  		unbytereg(&pp.To, &pp.Tt)
  4182  
  4183  		z := int(p.From.Reg)
  4184  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4185  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4186  			// For now, different to keep bit-for-bit compatibility.
  4187  			if p.Mode == 32 {
  4188  				breg := byteswapreg(ctxt, &p.To)
  4189  				if breg != REG_AX {
  4190  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4191  					asmando(ctxt, p, &p.From, reg[breg])
  4192  					subreg(&pp, z, breg)
  4193  					doasm(ctxt, &pp)
  4194  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4195  					asmando(ctxt, p, &p.From, reg[breg])
  4196  				} else {
  4197  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4198  					subreg(&pp, z, REG_AX)
  4199  					doasm(ctxt, &pp)
  4200  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4201  				}
  4202  				return
  4203  			}
  4204  
  4205  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4206  				// We certainly don't want to exchange
  4207  				// with AX if the op is MUL or DIV.
  4208  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4209  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4210  				subreg(&pp, z, REG_BX)
  4211  				doasm(ctxt, &pp)
  4212  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4213  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4214  			} else {
  4215  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4216  				subreg(&pp, z, REG_AX)
  4217  				doasm(ctxt, &pp)
  4218  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4219  			}
  4220  			return
  4221  		}
  4222  
  4223  		z = int(p.To.Reg)
  4224  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4225  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4226  			// For now, different to keep bit-for-bit compatibility.
  4227  			if p.Mode == 32 {
  4228  				breg := byteswapreg(ctxt, &p.From)
  4229  				if breg != REG_AX {
  4230  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4231  					asmando(ctxt, p, &p.To, reg[breg])
  4232  					subreg(&pp, z, breg)
  4233  					doasm(ctxt, &pp)
  4234  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4235  					asmando(ctxt, p, &p.To, reg[breg])
  4236  				} else {
  4237  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4238  					subreg(&pp, z, REG_AX)
  4239  					doasm(ctxt, &pp)
  4240  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4241  				}
  4242  				return
  4243  			}
  4244  
  4245  			if isax(&p.From) {
  4246  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4247  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4248  				subreg(&pp, z, REG_BX)
  4249  				doasm(ctxt, &pp)
  4250  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4251  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4252  			} else {
  4253  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4254  				subreg(&pp, z, REG_AX)
  4255  				doasm(ctxt, &pp)
  4256  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4257  			}
  4258  			return
  4259  		}
  4260  	}
  4261  
  4262  	ctxt.Diag("invalid instruction: %v", p)
  4263  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4264  	return
  4265  }
  4266  
  4267  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4268  // which is not referenced in a.
  4269  // If a is empty, it returns BX to account for MULB-like instructions
  4270  // that might use DX and AX.
  4271  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4272  	cand := 1
  4273  	canc := cand
  4274  	canb := canc
  4275  	cana := canb
  4276  
  4277  	if a.Type == obj.TYPE_NONE {
  4278  		cand = 0
  4279  		cana = cand
  4280  	}
  4281  
  4282  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4283  		switch a.Reg {
  4284  		case REG_NONE:
  4285  			cand = 0
  4286  			cana = cand
  4287  
  4288  		case REG_AX, REG_AL, REG_AH:
  4289  			cana = 0
  4290  
  4291  		case REG_BX, REG_BL, REG_BH:
  4292  			canb = 0
  4293  
  4294  		case REG_CX, REG_CL, REG_CH:
  4295  			canc = 0
  4296  
  4297  		case REG_DX, REG_DL, REG_DH:
  4298  			cand = 0
  4299  		}
  4300  	}
  4301  
  4302  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4303  		switch a.Index {
  4304  		case REG_AX:
  4305  			cana = 0
  4306  
  4307  		case REG_BX:
  4308  			canb = 0
  4309  
  4310  		case REG_CX:
  4311  			canc = 0
  4312  
  4313  		case REG_DX:
  4314  			cand = 0
  4315  		}
  4316  	}
  4317  
  4318  	if cana != 0 {
  4319  		return REG_AX
  4320  	}
  4321  	if canb != 0 {
  4322  		return REG_BX
  4323  	}
  4324  	if canc != 0 {
  4325  		return REG_CX
  4326  	}
  4327  	if cand != 0 {
  4328  		return REG_DX
  4329  	}
  4330  
  4331  	ctxt.Diag("impossible byte register")
  4332  	log.Fatalf("bad code")
  4333  	return 0
  4334  }
  4335  
  4336  func isbadbyte(a *obj.Addr) bool {
  4337  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4338  }
  4339  
  4340  var naclret = []uint8{
  4341  	0x5e, // POPL SI
  4342  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4343  	0x83,
  4344  	0xe6,
  4345  	0xe0, // ANDL $~31, SI
  4346  	0x4c,
  4347  	0x01,
  4348  	0xfe, // ADDQ R15, SI
  4349  	0xff,
  4350  	0xe6, // JMP SI
  4351  }
  4352  
  4353  var naclret8 = []uint8{
  4354  	0x5d, // POPL BP
  4355  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4356  	0x83,
  4357  	0xe5,
  4358  	0xe0, // ANDL $~31, BP
  4359  	0xff,
  4360  	0xe5, // JMP BP
  4361  }
  4362  
  4363  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4364  
  4365  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4366  
  4367  var naclmovs = []uint8{
  4368  	0x89,
  4369  	0xf6, // MOVL SI, SI
  4370  	0x49,
  4371  	0x8d,
  4372  	0x34,
  4373  	0x37, // LEAQ (R15)(SI*1), SI
  4374  	0x89,
  4375  	0xff, // MOVL DI, DI
  4376  	0x49,
  4377  	0x8d,
  4378  	0x3c,
  4379  	0x3f, // LEAQ (R15)(DI*1), DI
  4380  }
  4381  
  4382  var naclstos = []uint8{
  4383  	0x89,
  4384  	0xff, // MOVL DI, DI
  4385  	0x49,
  4386  	0x8d,
  4387  	0x3c,
  4388  	0x3f, // LEAQ (R15)(DI*1), DI
  4389  }
  4390  
  4391  func nacltrunc(ctxt *obj.Link, reg int) {
  4392  	if reg >= REG_R8 {
  4393  		ctxt.AsmBuf.Put1(0x45)
  4394  	}
  4395  	reg = (reg - REG_AX) & 7
  4396  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4397  }
  4398  
  4399  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4400  	ctxt.AsmBuf.Reset()
  4401  	ctxt.Asmode = int(p.Mode)
  4402  
  4403  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4404  		switch p.As {
  4405  		case obj.ARET:
  4406  			ctxt.AsmBuf.Put(naclret8)
  4407  			return
  4408  
  4409  		case obj.ACALL,
  4410  			obj.AJMP:
  4411  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4412  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4413  			}
  4414  
  4415  		case AINT:
  4416  			ctxt.AsmBuf.Put1(0xf4)
  4417  			return
  4418  		}
  4419  	}
  4420  
  4421  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4422  		if p.As == AREP {
  4423  			ctxt.Rep++
  4424  			return
  4425  		}
  4426  
  4427  		if p.As == AREPN {
  4428  			ctxt.Repn++
  4429  			return
  4430  		}
  4431  
  4432  		if p.As == ALOCK {
  4433  			ctxt.Lock++
  4434  			return
  4435  		}
  4436  
  4437  		if p.As != ALEAQ && p.As != ALEAL {
  4438  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4439  				nacltrunc(ctxt, int(p.From.Index))
  4440  			}
  4441  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4442  				nacltrunc(ctxt, int(p.To.Index))
  4443  			}
  4444  		}
  4445  
  4446  		switch p.As {
  4447  		case obj.ARET:
  4448  			ctxt.AsmBuf.Put(naclret)
  4449  			return
  4450  
  4451  		case obj.ACALL,
  4452  			obj.AJMP:
  4453  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4454  				// ANDL $~31, reg
  4455  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4456  				// ADDQ R15, reg
  4457  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4458  			}
  4459  
  4460  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4461  				// ANDL $~31, reg
  4462  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4463  				// ADDQ R15, reg
  4464  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4465  			}
  4466  
  4467  		case AINT:
  4468  			ctxt.AsmBuf.Put1(0xf4)
  4469  			return
  4470  
  4471  		case ASCASB,
  4472  			ASCASW,
  4473  			ASCASL,
  4474  			ASCASQ,
  4475  			ASTOSB,
  4476  			ASTOSW,
  4477  			ASTOSL,
  4478  			ASTOSQ:
  4479  			ctxt.AsmBuf.Put(naclstos)
  4480  
  4481  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4482  			ctxt.AsmBuf.Put(naclmovs)
  4483  		}
  4484  
  4485  		if ctxt.Rep != 0 {
  4486  			ctxt.AsmBuf.Put1(0xf3)
  4487  			ctxt.Rep = 0
  4488  		}
  4489  
  4490  		if ctxt.Repn != 0 {
  4491  			ctxt.AsmBuf.Put1(0xf2)
  4492  			ctxt.Repn = 0
  4493  		}
  4494  
  4495  		if ctxt.Lock != 0 {
  4496  			ctxt.AsmBuf.Put1(0xf0)
  4497  			ctxt.Lock = 0
  4498  		}
  4499  	}
  4500  
  4501  	ctxt.Rexflag = 0
  4502  	ctxt.Vexflag = 0
  4503  	mark := ctxt.AsmBuf.Len()
  4504  	ctxt.Asmode = int(p.Mode)
  4505  	doasm(ctxt, p)
  4506  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4507  		/*
  4508  		 * as befits the whole approach of the architecture,
  4509  		 * the rex prefix must appear before the first opcode byte
  4510  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4511  		 * before the 0f opcode escape!), or it might be ignored.
  4512  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4513  		 */
  4514  		if p.Mode != 64 {
  4515  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4516  		}
  4517  		n := ctxt.AsmBuf.Len()
  4518  		var np int
  4519  		for np = mark; np < n; np++ {
  4520  			c := ctxt.AsmBuf.Peek(np)
  4521  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4522  				break
  4523  			}
  4524  		}
  4525  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4526  	}
  4527  
  4528  	n := ctxt.AsmBuf.Len()
  4529  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4530  		r := &ctxt.Cursym.R[i]
  4531  		if int64(r.Off) < p.Pc {
  4532  			break
  4533  		}
  4534  		if ctxt.Rexflag != 0 {
  4535  			r.Off++
  4536  		}
  4537  		if r.Type == obj.R_PCREL {
  4538  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4539  				// PC-relative addressing is relative to the end of the instruction,
  4540  				// but the relocations applied by the linker are relative to the end
  4541  				// of the relocation. Because immediate instruction
  4542  				// arguments can follow the PC-relative memory reference in the
  4543  				// instruction encoding, the two may not coincide. In this case,
  4544  				// adjust addend so that linker can keep relocating relative to the
  4545  				// end of the relocation.
  4546  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4547  			} else if p.Mode == 32 {
  4548  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4549  				// assumes that the previous instruction loaded the PC of the end
  4550  				// of that instruction into CX, so the adjustment is relative to
  4551  				// that.
  4552  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4553  			}
  4554  		}
  4555  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4556  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4557  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4558  		}
  4559  
  4560  	}
  4561  
  4562  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4563  		switch p.To.Reg {
  4564  		case REG_SP:
  4565  			ctxt.AsmBuf.Put(naclspfix)
  4566  		case REG_BP:
  4567  			ctxt.AsmBuf.Put(naclbpfix)
  4568  		}
  4569  	}
  4570  }