github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	FuncAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zvex_i_rm_r
   212  	Zvex_i_r_v
   213  	Zvex_i_rm_v_r
   214  	Zmax
   215  )
   216  
   217  const (
   218  	Px   = 0
   219  	Px1  = 1    // symbolic; exact value doesn't matter
   220  	P32  = 0x32 /* 32-bit only */
   221  	Pe   = 0x66 /* operand escape */
   222  	Pm   = 0x0f /* 2byte opcode escape */
   223  	Pq   = 0xff /* both escapes: 66 0f */
   224  	Pb   = 0xfe /* byte operands */
   225  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   226  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   227  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   228  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   229  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   230  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   231  	Pw   = 0x48 /* Rex.w */
   232  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   233  	Py   = 0x80 /* defaults to 64-bit mode */
   234  	Py1  = 0x81 // symbolic; exact value doesn't matter
   235  	Py3  = 0x83 // symbolic; exact value doesn't matter
   236  	Pvex = 0x84 // symbolic: exact value doesn't matter
   237  
   238  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   239  	Rxr = 1 << 2 /* extend modrm reg */
   240  	Rxx = 1 << 1 /* extend sib index */
   241  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   242  )
   243  
   244  const (
   245  	// Encoding for VEX prefix in tables.
   246  	// The P, L, and W fields are chosen to match
   247  	// their eventual locations in the VEX prefix bytes.
   248  
   249  	// P field - 2 bits
   250  	vex66 = 1 << 0
   251  	vexF3 = 2 << 0
   252  	vexF2 = 3 << 0
   253  	// L field - 1 bit
   254  	vexLZ  = 0 << 2
   255  	vexLIG = 0 << 2
   256  	vex128 = 0 << 2
   257  	vex256 = 1 << 2
   258  	// W field - 1 bit
   259  	vexWIG = 0 << 7
   260  	vexW0  = 0 << 7
   261  	vexW1  = 1 << 7
   262  	// M field - 5 bits, but mostly reserved; we can store up to 4
   263  	vex0F   = 1 << 3
   264  	vex0F38 = 2 << 3
   265  	vex0F3A = 3 << 3
   266  
   267  	// Combinations used in the manual.
   268  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   269  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   270  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   271  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   272  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   273  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   274  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   275  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   276  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   277  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   278  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   279  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   280  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   281  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   282  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   283  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   284  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   285  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   286  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   287  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   288  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   289  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   290  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   291  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   292  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   293  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   294  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   295  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   296  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   297  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   298  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   299  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   300  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   301  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   302  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   303  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   304  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   305  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   306  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   307  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   308  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   309  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   310  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   311  )
   312  
   313  var ycover [Ymax * Ymax]uint8
   314  
   315  var reg [MAXREG]int
   316  
   317  var regrex [MAXREG + 1]int
   318  
   319  var ynone = []ytab{
   320  	{Ynone, Ynone, Ynone, Zlit, 1},
   321  }
   322  
   323  var ytext = []ytab{
   324  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   325  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   326  }
   327  
   328  var ynop = []ytab{
   329  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   330  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   331  	{Ynone, Ynone, Yml, Zpseudo, 0},
   332  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   333  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   334  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   335  	{Yml, Ynone, Ynone, Zpseudo, 0},
   336  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   337  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   338  }
   339  
   340  var yfuncdata = []ytab{
   341  	{Yi32, Ynone, Ym, Zpseudo, 0},
   342  }
   343  
   344  var ypcdata = []ytab{
   345  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   346  }
   347  
   348  var yxorb = []ytab{
   349  	{Yi32, Ynone, Yal, Zib_, 1},
   350  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   351  	{Yrb, Ynone, Ymb, Zr_m, 1},
   352  	{Ymb, Ynone, Yrb, Zm_r, 1},
   353  }
   354  
   355  var yxorl = []ytab{
   356  	{Yi8, Ynone, Yml, Zibo_m, 2},
   357  	{Yi32, Ynone, Yax, Zil_, 1},
   358  	{Yi32, Ynone, Yml, Zilo_m, 2},
   359  	{Yrl, Ynone, Yml, Zr_m, 1},
   360  	{Yml, Ynone, Yrl, Zm_r, 1},
   361  }
   362  
   363  var yaddl = []ytab{
   364  	{Yi8, Ynone, Yml, Zibo_m, 2},
   365  	{Yi32, Ynone, Yax, Zil_, 1},
   366  	{Yi32, Ynone, Yml, Zilo_m, 2},
   367  	{Yrl, Ynone, Yml, Zr_m, 1},
   368  	{Yml, Ynone, Yrl, Zm_r, 1},
   369  }
   370  
   371  var yincb = []ytab{
   372  	{Ynone, Ynone, Ymb, Zo_m, 2},
   373  }
   374  
   375  var yincw = []ytab{
   376  	{Ynone, Ynone, Yml, Zo_m, 2},
   377  }
   378  
   379  var yincl = []ytab{
   380  	{Ynone, Ynone, Yrl, Z_rp, 1},
   381  	{Ynone, Ynone, Yml, Zo_m, 2},
   382  }
   383  
   384  var yincq = []ytab{
   385  	{Ynone, Ynone, Yml, Zo_m, 2},
   386  }
   387  
   388  var ycmpb = []ytab{
   389  	{Yal, Ynone, Yi32, Z_ib, 1},
   390  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   391  	{Ymb, Ynone, Yrb, Zm_r, 1},
   392  	{Yrb, Ynone, Ymb, Zr_m, 1},
   393  }
   394  
   395  var ycmpl = []ytab{
   396  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   397  	{Yax, Ynone, Yi32, Z_il, 1},
   398  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   399  	{Yml, Ynone, Yrl, Zm_r, 1},
   400  	{Yrl, Ynone, Yml, Zr_m, 1},
   401  }
   402  
   403  var yshb = []ytab{
   404  	{Yi1, Ynone, Ymb, Zo_m, 2},
   405  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   406  	{Ycx, Ynone, Ymb, Zo_m, 2},
   407  }
   408  
   409  var yshl = []ytab{
   410  	{Yi1, Ynone, Yml, Zo_m, 2},
   411  	{Yi32, Ynone, Yml, Zibo_m, 2},
   412  	{Ycl, Ynone, Yml, Zo_m, 2},
   413  	{Ycx, Ynone, Yml, Zo_m, 2},
   414  }
   415  
   416  var ytestb = []ytab{
   417  	{Yi32, Ynone, Yal, Zib_, 1},
   418  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   419  	{Yrb, Ynone, Ymb, Zr_m, 1},
   420  	{Ymb, Ynone, Yrb, Zm_r, 1},
   421  }
   422  
   423  var ytestl = []ytab{
   424  	{Yi32, Ynone, Yax, Zil_, 1},
   425  	{Yi32, Ynone, Yml, Zilo_m, 2},
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  }
   429  
   430  var ymovb = []ytab{
   431  	{Yrb, Ynone, Ymb, Zr_m, 1},
   432  	{Ymb, Ynone, Yrb, Zm_r, 1},
   433  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   434  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   435  }
   436  
   437  var ymbs = []ytab{
   438  	{Ymb, Ynone, Ynone, Zm_o, 2},
   439  }
   440  
   441  var ybtl = []ytab{
   442  	{Yi8, Ynone, Yml, Zibo_m, 2},
   443  	{Yrl, Ynone, Yml, Zr_m, 1},
   444  }
   445  
   446  var ymovw = []ytab{
   447  	{Yrl, Ynone, Yml, Zr_m, 1},
   448  	{Yml, Ynone, Yrl, Zm_r, 1},
   449  	{Yi0, Ynone, Yrl, Zclr, 1},
   450  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   451  	{Yi32, Ynone, Yml, Zilo_m, 2},
   452  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   453  }
   454  
   455  var ymovl = []ytab{
   456  	{Yrl, Ynone, Yml, Zr_m, 1},
   457  	{Yml, Ynone, Yrl, Zm_r, 1},
   458  	{Yi0, Ynone, Yrl, Zclr, 1},
   459  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   460  	{Yi32, Ynone, Yml, Zilo_m, 2},
   461  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   462  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   463  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   464  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   465  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   466  }
   467  
   468  var yret = []ytab{
   469  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   470  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   471  }
   472  
   473  var ymovq = []ytab{
   474  	// valid in 32-bit mode
   475  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   476  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   477  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   478  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   479  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   480  
   481  	// valid only in 64-bit mode, usually with 64-bit prefix
   482  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   483  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   484  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   485  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   486  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   487  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   488  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   489  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   490  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   491  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   492  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   493  }
   494  
   495  var ym_rl = []ytab{
   496  	{Ym, Ynone, Yrl, Zm_r, 1},
   497  }
   498  
   499  var yrl_m = []ytab{
   500  	{Yrl, Ynone, Ym, Zr_m, 1},
   501  }
   502  
   503  var ymb_rl = []ytab{
   504  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   505  }
   506  
   507  var yml_rl = []ytab{
   508  	{Yml, Ynone, Yrl, Zm_r, 1},
   509  }
   510  
   511  var yrl_ml = []ytab{
   512  	{Yrl, Ynone, Yml, Zr_m, 1},
   513  }
   514  
   515  var yml_mb = []ytab{
   516  	{Yrb, Ynone, Ymb, Zr_m, 1},
   517  	{Ymb, Ynone, Yrb, Zm_r, 1},
   518  }
   519  
   520  var yrb_mb = []ytab{
   521  	{Yrb, Ynone, Ymb, Zr_m, 1},
   522  }
   523  
   524  var yxchg = []ytab{
   525  	{Yax, Ynone, Yrl, Z_rp, 1},
   526  	{Yrl, Ynone, Yax, Zrp_, 1},
   527  	{Yrl, Ynone, Yml, Zr_m, 1},
   528  	{Yml, Ynone, Yrl, Zm_r, 1},
   529  }
   530  
   531  var ydivl = []ytab{
   532  	{Yml, Ynone, Ynone, Zm_o, 2},
   533  }
   534  
   535  var ydivb = []ytab{
   536  	{Ymb, Ynone, Ynone, Zm_o, 2},
   537  }
   538  
   539  var yimul = []ytab{
   540  	{Yml, Ynone, Ynone, Zm_o, 2},
   541  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   542  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   543  	{Yml, Ynone, Yrl, Zm_r, 2},
   544  }
   545  
   546  var yimul3 = []ytab{
   547  	{Yi8, Yml, Yrl, Zibm_r, 2},
   548  }
   549  
   550  var ybyte = []ytab{
   551  	{Yi64, Ynone, Ynone, Zbyte, 1},
   552  }
   553  
   554  var yin = []ytab{
   555  	{Yi32, Ynone, Ynone, Zib_, 1},
   556  	{Ynone, Ynone, Ynone, Zlit, 1},
   557  }
   558  
   559  var yint = []ytab{
   560  	{Yi32, Ynone, Ynone, Zib_, 1},
   561  }
   562  
   563  var ypushl = []ytab{
   564  	{Yrl, Ynone, Ynone, Zrp_, 1},
   565  	{Ym, Ynone, Ynone, Zm_o, 2},
   566  	{Yi8, Ynone, Ynone, Zib_, 1},
   567  	{Yi32, Ynone, Ynone, Zil_, 1},
   568  }
   569  
   570  var ypopl = []ytab{
   571  	{Ynone, Ynone, Yrl, Z_rp, 1},
   572  	{Ynone, Ynone, Ym, Zo_m, 2},
   573  }
   574  
   575  var ybswap = []ytab{
   576  	{Ynone, Ynone, Yrl, Z_rp, 2},
   577  }
   578  
   579  var yscond = []ytab{
   580  	{Ynone, Ynone, Ymb, Zo_m, 2},
   581  }
   582  
   583  var yjcond = []ytab{
   584  	{Ynone, Ynone, Ybr, Zbr, 0},
   585  	{Yi0, Ynone, Ybr, Zbr, 0},
   586  	{Yi1, Ynone, Ybr, Zbr, 1},
   587  }
   588  
   589  var yloop = []ytab{
   590  	{Ynone, Ynone, Ybr, Zloop, 1},
   591  }
   592  
   593  var ycall = []ytab{
   594  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   595  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   596  	{Ynone, Ynone, Yindir, Zcallind, 2},
   597  	{Ynone, Ynone, Ybr, Zcall, 0},
   598  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   599  }
   600  
   601  var yduff = []ytab{
   602  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   603  }
   604  
   605  var yjmp = []ytab{
   606  	{Ynone, Ynone, Yml, Zo_m64, 2},
   607  	{Ynone, Ynone, Ybr, Zjmp, 0},
   608  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   609  }
   610  
   611  var yfmvd = []ytab{
   612  	{Ym, Ynone, Yf0, Zm_o, 2},
   613  	{Yf0, Ynone, Ym, Zo_m, 2},
   614  	{Yrf, Ynone, Yf0, Zm_o, 2},
   615  	{Yf0, Ynone, Yrf, Zo_m, 2},
   616  }
   617  
   618  var yfmvdp = []ytab{
   619  	{Yf0, Ynone, Ym, Zo_m, 2},
   620  	{Yf0, Ynone, Yrf, Zo_m, 2},
   621  }
   622  
   623  var yfmvf = []ytab{
   624  	{Ym, Ynone, Yf0, Zm_o, 2},
   625  	{Yf0, Ynone, Ym, Zo_m, 2},
   626  }
   627  
   628  var yfmvx = []ytab{
   629  	{Ym, Ynone, Yf0, Zm_o, 2},
   630  }
   631  
   632  var yfmvp = []ytab{
   633  	{Yf0, Ynone, Ym, Zo_m, 2},
   634  }
   635  
   636  var yfcmv = []ytab{
   637  	{Yrf, Ynone, Yf0, Zm_o, 2},
   638  }
   639  
   640  var yfadd = []ytab{
   641  	{Ym, Ynone, Yf0, Zm_o, 2},
   642  	{Yrf, Ynone, Yf0, Zm_o, 2},
   643  	{Yf0, Ynone, Yrf, Zo_m, 2},
   644  }
   645  
   646  var yfaddp = []ytab{
   647  	{Yf0, Ynone, Yrf, Zo_m, 2},
   648  }
   649  
   650  var yfxch = []ytab{
   651  	{Yf0, Ynone, Yrf, Zo_m, 2},
   652  	{Yrf, Ynone, Yf0, Zm_o, 2},
   653  }
   654  
   655  var ycompp = []ytab{
   656  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   657  }
   658  
   659  var ystsw = []ytab{
   660  	{Ynone, Ynone, Ym, Zo_m, 2},
   661  	{Ynone, Ynone, Yax, Zlit, 1},
   662  }
   663  
   664  var ystcw = []ytab{
   665  	{Ynone, Ynone, Ym, Zo_m, 2},
   666  	{Ym, Ynone, Ynone, Zm_o, 2},
   667  }
   668  
   669  var ysvrs = []ytab{
   670  	{Ynone, Ynone, Ym, Zo_m, 2},
   671  	{Ym, Ynone, Ynone, Zm_o, 2},
   672  }
   673  
   674  var ymm = []ytab{
   675  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   676  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   677  }
   678  
   679  var yxm = []ytab{
   680  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   681  }
   682  
   683  var yxm_q4 = []ytab{
   684  	{Yxm, Ynone, Yxr, Zm_r, 1},
   685  }
   686  
   687  var yxcvm1 = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   689  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   690  }
   691  
   692  var yxcvm2 = []ytab{
   693  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   694  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   695  }
   696  
   697  /*
   698  var yxmq = []ytab{
   699  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   700  }
   701  */
   702  
   703  var yxr = []ytab{
   704  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   705  }
   706  
   707  var yxr_ml = []ytab{
   708  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   709  }
   710  
   711  var ymr = []ytab{
   712  	{Ymr, Ynone, Ymr, Zm_r, 1},
   713  }
   714  
   715  var ymr_ml = []ytab{
   716  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   717  }
   718  
   719  var yxcmp = []ytab{
   720  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   721  }
   722  
   723  var yxcmpi = []ytab{
   724  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   725  }
   726  
   727  var yxmov = []ytab{
   728  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   729  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   730  }
   731  
   732  var yxcvfl = []ytab{
   733  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   734  }
   735  
   736  var yxcvlf = []ytab{
   737  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   738  }
   739  
   740  var yxcvfq = []ytab{
   741  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   742  }
   743  
   744  var yxcvqf = []ytab{
   745  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   746  }
   747  
   748  var yps = []ytab{
   749  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   750  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   751  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   752  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   753  }
   754  
   755  var yxrrl = []ytab{
   756  	{Yxr, Ynone, Yrl, Zm_r, 1},
   757  }
   758  
   759  var ymrxr = []ytab{
   760  	{Ymr, Ynone, Yxr, Zm_r, 1},
   761  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   762  }
   763  
   764  var ymshuf = []ytab{
   765  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   766  }
   767  
   768  var ymshufb = []ytab{
   769  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   770  }
   771  
   772  var yxshuf = []ytab{
   773  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   774  }
   775  
   776  var yextrw = []ytab{
   777  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   778  }
   779  
   780  var yextr = []ytab{
   781  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   782  }
   783  
   784  var yinsrw = []ytab{
   785  	{Yu8, Yml, Yxr, Zibm_r, 2},
   786  }
   787  
   788  var yinsr = []ytab{
   789  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   790  }
   791  
   792  var ypsdq = []ytab{
   793  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   794  }
   795  
   796  var ymskb = []ytab{
   797  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   798  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   799  }
   800  
   801  var ycrc32l = []ytab{
   802  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   803  }
   804  
   805  var yprefetch = []ytab{
   806  	{Ym, Ynone, Ynone, Zm_o, 2},
   807  }
   808  
   809  var yaes = []ytab{
   810  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   811  }
   812  
   813  var yaes2 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   815  }
   816  
   817  var yxbegin = []ytab{
   818  	{Ynone, Ynone, Ybr, Zjmp, 1},
   819  }
   820  
   821  var yxabort = []ytab{
   822  	{Yu8, Ynone, Ynone, Zib_, 1},
   823  }
   824  
   825  var ylddqu = []ytab{
   826  	{Ym, Ynone, Yxr, Zm_r, 1},
   827  }
   828  
   829  // VEX instructions that come in two forms:
   830  //	VTHING xmm2/m128, xmmV, xmm1
   831  //	VTHING ymm2/m256, ymmV, ymm1
   832  // The opcode array in the corresponding Optab entry
   833  // should contain the (VEX prefixes, opcode byte) pair
   834  // for each of the two forms.
   835  // For example, the entries for VPXOR are:
   836  //
   837  //	VPXOR xmm2/m128, xmmV, xmm1
   838  //	VEX.NDS.128.66.0F.WIG EF /r
   839  //
   840  //	VPXOR ymm2/m256, ymmV, ymm1
   841  //	VEX.NDS.256.66.0F.WIG EF /r
   842  //
   843  // The NDS/NDD/DDS part can be dropped, producing this
   844  // Optab entry:
   845  //
   846  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   847  //
   848  var yvex_xy3 = []ytab{
   849  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_ri3 = []ytab{
   854  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   855  }
   856  
   857  var yvex_xyi3 = []ytab{
   858  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   859  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   860  }
   861  
   862  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   863  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   864  }
   865  
   866  var yvex_xyi4 = []ytab{
   867  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   868  }
   869  
   870  var yvex_shift = []ytab{
   871  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   872  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   873  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   874  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   875  }
   876  
   877  var yvex_shift_dq = []ytab{
   878  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   879  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   880  }
   881  
   882  var yvex_r3 = []ytab{
   883  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   884  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   885  }
   886  
   887  var yvex_vmr3 = []ytab{
   888  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   889  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   890  }
   891  
   892  var yvex_xy2 = []ytab{
   893  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   894  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   895  }
   896  
   897  var yvex_xyr2 = []ytab{
   898  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   899  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   900  }
   901  
   902  var yvex_vmovdqa = []ytab{
   903  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   904  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   905  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   906  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   907  }
   908  
   909  var yvex_vmovntdq = []ytab{
   910  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   911  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   912  }
   913  
   914  var yvex_vpbroadcast = []ytab{
   915  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   916  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   917  }
   918  
   919  var ymmxmm0f38 = []ytab{
   920  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   921  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   922  }
   923  
   924  /*
   925   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   926   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   927   * the entry with the given p->as and then looks through the ytable for that
   928   * instruction (the second field in the optab struct) for a line whose first
   929   * two values match the Ytypes of the p->from and p->to operands.  The function
   930   * oclass in span.c computes the specific Ytype of an operand and then the set
   931   * of more general Ytypes that it satisfies is implied by the ycover table, set
   932   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   933   * from the more general 8-bit constants, but instinit says
   934   *
   935   *        ycover[Yi0*Ymax + Ys32] = 1;
   936   *        ycover[Yi1*Ymax + Ys32] = 1;
   937   *        ycover[Yi8*Ymax + Ys32] = 1;
   938   *
   939   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   940   * if that's what an instruction can handle.
   941   *
   942   * In parallel with the scan through the ytable for the appropriate line, there
   943   * is a z pointer that starts out pointing at the strange magic byte list in
   944   * the Optab struct.  With each step past a non-matching ytable line, z
   945   * advances by the 4th entry in the line.  When a matching line is found, that
   946   * z pointer has the extra data to use in laying down the instruction bytes.
   947   * The actual bytes laid down are a function of the 3rd entry in the line (that
   948   * is, the Ztype) and the z bytes.
   949   *
   950   * For example, let's look at AADDL.  The optab line says:
   951   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   952   *
   953   * and yaddl says
   954   *        uchar   yaddl[] =
   955   *        {
   956   *                Yi8,    Yml,    Zibo_m, 2,
   957   *                Yi32,   Yax,    Zil_,   1,
   958   *                Yi32,   Yml,    Zilo_m, 2,
   959   *                Yrl,    Yml,    Zr_m,   1,
   960   *                Yml,    Yrl,    Zm_r,   1,
   961   *                0
   962   *        };
   963   *
   964   * so there are 5 possible types of ADDL instruction that can be laid down, and
   965   * possible states used to lay them down (Ztype and z pointer, assuming z
   966   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   967   *
   968   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   969   *        Yi32, Yax -> Zil_, z+2 (0x05)
   970   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   971   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   972   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   973   *
   974   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   975   * relatively straightforward as this program goes.
   976   *
   977   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   978   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   979   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   980   * Zilo_m is the same but a long (32-bit) immediate.
   981   */
   982  var optab =
   983  /*	as, ytab, andproto, opcode */
   984  []Optab{
   985  	{obj.AXXX, nil, 0, [23]uint8{}},
   986  	{AAAA, ynone, P32, [23]uint8{0x37}},
   987  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   988  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   989  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   990  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   991  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   992  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   993  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   994  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   995  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   996  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   997  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   998  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   999  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
  1000  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
  1001  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1002  	{AADJSP, nil, 0, [23]uint8{}},
  1003  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
  1004  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1005  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
  1006  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
  1007  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
  1008  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
  1009  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1010  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1011  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
  1012  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
  1013  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
  1014  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
  1015  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
  1016  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
  1017  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
  1018  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
  1019  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
  1020  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
  1021  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
  1022  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1023  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1024  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1025  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1026  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1027  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1028  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1029  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1030  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1031  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1032  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1033  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1034  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1035  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1036  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1037  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1038  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1039  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1040  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1041  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1042  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1043  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1044  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1045  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1046  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1047  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1048  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1049  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1050  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1051  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1052  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1053  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1054  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1055  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1056  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1057  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1058  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1059  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1060  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1061  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1062  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1063  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1064  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1065  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1066  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1067  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1068  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1069  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1070  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1071  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1072  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1073  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1074  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1075  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1076  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1077  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1078  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1079  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1080  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1081  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1082  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1083  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1084  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1085  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1086  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1087  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1088  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1089  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1090  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1091  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1092  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1093  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1094  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1095  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1096  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1097  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1098  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1099  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1100  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1101  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1102  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
  1103  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
  1104  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1105  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1106  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1107  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1108  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1109  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1110  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1111  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1112  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1113  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1114  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1115  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1116  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1117  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1118  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1119  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1120  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1121  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1122  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1123  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1124  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1125  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1126  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1127  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1128  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1129  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1130  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1131  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
  1132  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1133  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1134  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
  1135  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1136  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1137  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1138  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1139  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1140  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1141  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1142  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1143  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1144  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1145  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1146  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1147  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1148  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1149  	{obj.AGLOBL, nil, 0, [23]uint8{}},
  1150  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1151  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1152  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1153  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1154  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1155  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1156  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1157  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1158  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1159  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1160  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1161  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1162  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1163  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1164  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1165  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1166  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1167  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1168  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1169  	{AINT, yint, Px, [23]uint8{0xcd}},
  1170  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1171  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1172  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1173  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1174  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1175  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1176  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1177  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1178  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1179  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1180  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1181  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1182  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1183  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1184  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1185  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1186  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1187  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1188  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1189  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1190  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1191  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1192  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1193  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1194  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1195  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1196  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1197  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1198  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1199  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1200  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1201  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1202  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1203  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1204  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1205  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1206  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1207  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1208  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1209  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1210  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1211  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1212  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1213  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1214  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1215  	{ALONG, ybyte, Px, [23]uint8{4}},
  1216  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1217  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1218  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1219  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1220  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1221  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1222  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1223  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1224  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1225  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1226  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1227  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1228  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1229  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1230  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1231  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1232  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1233  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1234  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1235  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1236  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1237  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1238  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1239  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1240  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1241  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1242  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1243  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1244  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1245  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1246  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1247  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1248  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1249  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1250  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1251  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1252  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1253  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1254  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1255  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1256  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1257  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1258  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1259  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1260  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1261  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1262  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1263  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1264  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1265  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1266  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1267  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1268  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1269  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1270  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1271  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1272  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1273  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1274  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1275  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1276  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1277  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1278  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1279  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1280  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1281  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1282  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1283  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1284  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1285  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1286  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1287  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1288  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1289  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1290  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1291  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1292  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1293  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1294  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1295  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1296  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1297  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1298  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1299  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1300  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1301  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1302  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1303  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1304  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1305  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1306  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1307  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1308  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1309  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1310  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1311  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1312  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1313  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1314  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1315  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1316  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1317  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1318  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1319  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1320  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1321  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1322  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1323  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1324  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1325  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1326  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1327  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1328  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1329  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1330  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1331  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1332  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1333  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1334  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1335  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1336  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1337  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1338  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1339  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1340  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1341  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1342  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1343  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1344  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1345  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1346  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1347  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1348  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1349  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1350  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1351  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1352  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1353  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1354  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1355  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1356  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1357  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1358  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1359  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1360  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1361  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1362  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1363  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1364  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1365  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1366  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1367  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1368  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1369  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1370  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1371  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1372  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1373  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1374  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1375  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1376  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1377  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1378  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1379  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1380  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1381  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1382  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1383  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1384  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1385  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1386  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1387  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1388  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1389  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1390  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1391  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1392  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1393  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1394  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1395  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1396  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1397  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1398  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1399  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1400  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1401  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1402  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1403  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1404  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1405  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1406  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1407  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1408  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1409  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1410  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1411  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1412  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1413  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1414  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1415  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1416  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1417  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1418  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1419  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1420  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1421  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1422  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1423  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1424  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1425  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1426  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1427  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1428  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1429  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1430  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1431  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1432  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1433  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1434  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1435  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1436  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1437  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1438  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1439  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1440  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1441  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1442  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1443  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1444  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1445  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1446  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1447  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1448  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1449  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1450  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1451  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1452  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1453  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1454  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1455  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1456  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1457  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1458  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1459  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1460  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1461  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1462  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1463  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1464  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1465  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1466  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1467  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1468  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1469  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1470  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1471  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1472  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1473  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1474  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1475  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1476  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1477  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1478  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1479  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1480  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1481  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1482  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1483  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1484  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1485  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1486  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1487  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1488  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1489  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1490  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1491  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1492  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1493  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1494  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1495  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1496  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1497  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1498  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1499  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1500  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1501  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1502  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1503  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1504  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1505  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1506  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1507  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1508  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1509  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1510  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1511  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1512  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1513  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1514  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1515  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1516  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1517  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1518  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1519  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1520  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1521  	{AWORD, ybyte, Px, [23]uint8{2}},
  1522  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1523  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1525  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1526  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1527  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1528  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1529  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1530  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1531  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1532  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1533  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1534  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1535  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1536  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1537  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1538  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1539  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1540  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1541  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1542  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1543  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1544  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1545  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1546  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1547  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1548  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1549  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1550  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1551  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1552  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1553  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1554  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1555  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1556  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1557  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1558  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1559  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1560  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1561  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1562  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1563  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1564  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1565  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1566  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1567  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1568  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1569  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1570  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1571  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1572  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1573  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1574  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1575  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1576  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1577  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1578  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1579  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1580  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1581  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1582  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1583  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1584  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1585  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1586  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1587  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1588  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1589  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1590  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1591  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1592  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1593  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1594  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1595  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1596  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1597  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1598  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1599  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1600  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1601  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1602  	{AFFREE, nil, 0, [23]uint8{}},
  1603  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1604  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1605  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1606  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1607  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1608  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1609  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1610  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1611  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1612  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1613  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1614  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1615  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1616  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1617  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1618  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1619  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1620  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1621  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1622  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1623  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1624  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1625  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1626  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1627  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1628  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1629  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1630  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1631  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1632  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1633  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1634  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1635  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1636  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1637  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1638  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1639  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1640  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1641  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1642  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1643  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1644  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1645  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1646  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1647  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1648  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1649  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1650  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1651  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1652  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1653  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1654  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1655  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1656  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1657  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1658  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1659  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1660  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1661  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1662  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1663  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1664  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1665  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1666  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1667  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1668  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1669  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1670  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1671  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1672  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1673  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1674  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1675  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1676  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1677  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1678  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1679  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1680  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1681  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1682  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1683  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1684  
  1685  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1686  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1687  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1688  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1689  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1690  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1691  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1692  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1693  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1694  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1695  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1696  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1697  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1698  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1699  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1700  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1701  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1702  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1703  
  1704  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1705  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1706  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1707  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1708  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1709  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1710  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1711  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1712  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1713  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1714  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1715  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1716  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1717  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1718  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1719  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1720  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1721  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1722  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1723  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1724  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1725  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1726  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1727  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1728  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1729  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1730  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1731  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1732  
  1733  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1734  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1735  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1736  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1737  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1738  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1739  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1740  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1741  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1742  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1743  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1744  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1745  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1746  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1747  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1748  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1749  	{obj.AEND, nil, 0, [23]uint8{}},
  1750  	{0, nil, 0, [23]uint8{}},
  1751  }
  1752  
  1753  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1754  
  1755  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1756  // This happens on systems like Solaris that call .so functions instead of system calls.
  1757  // It does not seem to be necessary for any other systems. This is probably working
  1758  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1759  // what that bug is. And this does fix it.
  1760  func isextern(s *obj.LSym) bool {
  1761  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1762  	return strings.HasPrefix(s.Name, "libc_")
  1763  }
  1764  
  1765  // single-instruction no-ops of various lengths.
  1766  // constructed by hand and disassembled with gdb to verify.
  1767  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1768  var nop = [][16]uint8{
  1769  	{0x90},
  1770  	{0x66, 0x90},
  1771  	{0x0F, 0x1F, 0x00},
  1772  	{0x0F, 0x1F, 0x40, 0x00},
  1773  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1774  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1775  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1776  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1777  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1778  }
  1779  
  1780  // Native Client rejects the repeated 0x66 prefix.
  1781  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1782  func fillnop(p []byte, n int) {
  1783  	var m int
  1784  
  1785  	for n > 0 {
  1786  		m = n
  1787  		if m > len(nop) {
  1788  			m = len(nop)
  1789  		}
  1790  		copy(p[:m], nop[m-1][:m])
  1791  		p = p[m:]
  1792  		n -= m
  1793  	}
  1794  }
  1795  
  1796  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1797  	s.Grow(int64(c) + int64(pad))
  1798  	fillnop(s.P[c:], int(pad))
  1799  	return c + pad
  1800  }
  1801  
  1802  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1803  	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
  1804  		return l
  1805  	}
  1806  	return q
  1807  }
  1808  
  1809  func span6(ctxt *obj.Link, s *obj.LSym) {
  1810  	ctxt.Cursym = s
  1811  
  1812  	if s.P != nil {
  1813  		return
  1814  	}
  1815  
  1816  	if ycover[0] == 0 {
  1817  		instinit()
  1818  	}
  1819  
  1820  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1821  		if p.To.Type == obj.TYPE_BRANCH {
  1822  			if p.Pcond == nil {
  1823  				p.Pcond = p
  1824  			}
  1825  		}
  1826  		if p.As == AADJSP {
  1827  			p.To.Type = obj.TYPE_REG
  1828  			p.To.Reg = REG_SP
  1829  			v := int32(-p.From.Offset)
  1830  			p.From.Offset = int64(v)
  1831  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1832  			if v < 0 {
  1833  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1834  				v = -v
  1835  				p.From.Offset = int64(v)
  1836  			}
  1837  
  1838  			if v == 0 {
  1839  				p.As = obj.ANOP
  1840  			}
  1841  		}
  1842  	}
  1843  
  1844  	var q *obj.Prog
  1845  	var count int64 // rough count of number of instructions
  1846  	for p := s.Text; p != nil; p = p.Link {
  1847  		count++
  1848  		p.Back = 2 // use short branches first time through
  1849  		q = p.Pcond
  1850  		if q != nil && (q.Back&2 != 0) {
  1851  			p.Back |= 1 // backward jump
  1852  			q.Back |= 4 // loop head
  1853  		}
  1854  
  1855  		if p.As == AADJSP {
  1856  			p.To.Type = obj.TYPE_REG
  1857  			p.To.Reg = REG_SP
  1858  			v := int32(-p.From.Offset)
  1859  			p.From.Offset = int64(v)
  1860  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1861  			if v < 0 {
  1862  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1863  				v = -v
  1864  				p.From.Offset = int64(v)
  1865  			}
  1866  
  1867  			if v == 0 {
  1868  				p.As = obj.ANOP
  1869  			}
  1870  		}
  1871  	}
  1872  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1873  
  1874  	n := 0
  1875  	var c int32
  1876  	errors := ctxt.Errors
  1877  	var deferreturn *obj.LSym
  1878  	if ctxt.Headtype == obj.Hnacl {
  1879  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1880  	}
  1881  	for {
  1882  		loop := int32(0)
  1883  		for i := range s.R {
  1884  			s.R[i] = obj.Reloc{}
  1885  		}
  1886  		s.R = s.R[:0]
  1887  		s.P = s.P[:0]
  1888  		c = 0
  1889  		for p := s.Text; p != nil; p = p.Link {
  1890  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1891  
  1892  				// pad everything to avoid crossing 32-byte boundary
  1893  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1894  					c = naclpad(ctxt, s, c, -c&31)
  1895  				}
  1896  
  1897  				// pad call deferreturn to start at 32-byte boundary
  1898  				// so that subtracting 5 in jmpdefer will jump back
  1899  				// to that boundary and rerun the call.
  1900  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1901  					c = naclpad(ctxt, s, c, -c&31)
  1902  				}
  1903  
  1904  				// pad call to end at 32-byte boundary
  1905  				if p.As == obj.ACALL {
  1906  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1907  				}
  1908  
  1909  				// the linker treats REP and STOSQ as different instructions
  1910  				// but in fact the REP is a prefix on the STOSQ.
  1911  				// make sure REP has room for 2 more bytes, so that
  1912  				// padding will not be inserted before the next instruction.
  1913  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1914  					c = naclpad(ctxt, s, c, -c&31)
  1915  				}
  1916  
  1917  				// same for LOCK.
  1918  				// various instructions follow; the longest is 4 bytes.
  1919  				// give ourselves 8 bytes so as to avoid surprises.
  1920  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1921  					c = naclpad(ctxt, s, c, -c&31)
  1922  				}
  1923  			}
  1924  
  1925  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1926  				// pad with NOPs
  1927  				v := -c & (LoopAlign - 1)
  1928  
  1929  				if v <= MaxLoopPad {
  1930  					s.Grow(int64(c) + int64(v))
  1931  					fillnop(s.P[c:], int(v))
  1932  					c += v
  1933  				}
  1934  			}
  1935  
  1936  			p.Pc = int64(c)
  1937  
  1938  			// process forward jumps to p
  1939  			for q = p.Rel; q != nil; q = q.Forwd {
  1940  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1941  				if q.Back&2 != 0 { // short
  1942  					if v > 127 {
  1943  						loop++
  1944  						q.Back ^= 2
  1945  					}
  1946  
  1947  					if q.As == AJCXZL || q.As == AXBEGIN {
  1948  						s.P[q.Pc+2] = byte(v)
  1949  					} else {
  1950  						s.P[q.Pc+1] = byte(v)
  1951  					}
  1952  				} else {
  1953  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1954  				}
  1955  			}
  1956  
  1957  			p.Rel = nil
  1958  
  1959  			p.Pc = int64(c)
  1960  			asmins(ctxt, p)
  1961  			m := ctxt.AsmBuf.Len()
  1962  			if int(p.Isize) != m {
  1963  				p.Isize = uint8(m)
  1964  				loop++
  1965  			}
  1966  
  1967  			s.Grow(p.Pc + int64(m))
  1968  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1969  			c += int32(m)
  1970  		}
  1971  
  1972  		n++
  1973  		if n > 20 {
  1974  			ctxt.Diag("span must be looping")
  1975  			log.Fatalf("loop")
  1976  		}
  1977  		if loop == 0 {
  1978  			break
  1979  		}
  1980  		if ctxt.Errors > errors {
  1981  			return
  1982  		}
  1983  	}
  1984  
  1985  	if ctxt.Headtype == obj.Hnacl {
  1986  		c = naclpad(ctxt, s, c, -c&31)
  1987  	}
  1988  
  1989  	s.Size = int64(c)
  1990  
  1991  	if false { /* debug['a'] > 1 */
  1992  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1993  		var i int
  1994  		for i = 0; i < len(s.P); i++ {
  1995  			fmt.Printf(" %.2x", s.P[i])
  1996  			if i%16 == 15 {
  1997  				fmt.Printf("\n  %.6x", uint(i+1))
  1998  			}
  1999  		}
  2000  
  2001  		if i%16 != 0 {
  2002  			fmt.Printf("\n")
  2003  		}
  2004  
  2005  		for i := 0; i < len(s.R); i++ {
  2006  			r := &s.R[i]
  2007  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2008  		}
  2009  	}
  2010  }
  2011  
  2012  func instinit() {
  2013  	for i := 1; optab[i].as != 0; i++ {
  2014  		c := optab[i].as
  2015  		if opindex[c&obj.AMask] != nil {
  2016  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  2017  		}
  2018  		opindex[c&obj.AMask] = &optab[i]
  2019  	}
  2020  
  2021  	for i := 0; i < Ymax; i++ {
  2022  		ycover[i*Ymax+i] = 1
  2023  	}
  2024  
  2025  	ycover[Yi0*Ymax+Yi8] = 1
  2026  	ycover[Yi1*Ymax+Yi8] = 1
  2027  	ycover[Yu7*Ymax+Yi8] = 1
  2028  
  2029  	ycover[Yi0*Ymax+Yu7] = 1
  2030  	ycover[Yi1*Ymax+Yu7] = 1
  2031  
  2032  	ycover[Yi0*Ymax+Yu8] = 1
  2033  	ycover[Yi1*Ymax+Yu8] = 1
  2034  	ycover[Yu7*Ymax+Yu8] = 1
  2035  
  2036  	ycover[Yi0*Ymax+Ys32] = 1
  2037  	ycover[Yi1*Ymax+Ys32] = 1
  2038  	ycover[Yu7*Ymax+Ys32] = 1
  2039  	ycover[Yu8*Ymax+Ys32] = 1
  2040  	ycover[Yi8*Ymax+Ys32] = 1
  2041  
  2042  	ycover[Yi0*Ymax+Yi32] = 1
  2043  	ycover[Yi1*Ymax+Yi32] = 1
  2044  	ycover[Yu7*Ymax+Yi32] = 1
  2045  	ycover[Yu8*Ymax+Yi32] = 1
  2046  	ycover[Yi8*Ymax+Yi32] = 1
  2047  	ycover[Ys32*Ymax+Yi32] = 1
  2048  
  2049  	ycover[Yi0*Ymax+Yi64] = 1
  2050  	ycover[Yi1*Ymax+Yi64] = 1
  2051  	ycover[Yu7*Ymax+Yi64] = 1
  2052  	ycover[Yu8*Ymax+Yi64] = 1
  2053  	ycover[Yi8*Ymax+Yi64] = 1
  2054  	ycover[Ys32*Ymax+Yi64] = 1
  2055  	ycover[Yi32*Ymax+Yi64] = 1
  2056  
  2057  	ycover[Yal*Ymax+Yrb] = 1
  2058  	ycover[Ycl*Ymax+Yrb] = 1
  2059  	ycover[Yax*Ymax+Yrb] = 1
  2060  	ycover[Ycx*Ymax+Yrb] = 1
  2061  	ycover[Yrx*Ymax+Yrb] = 1
  2062  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2063  
  2064  	ycover[Ycl*Ymax+Ycx] = 1
  2065  
  2066  	ycover[Yax*Ymax+Yrx] = 1
  2067  	ycover[Ycx*Ymax+Yrx] = 1
  2068  
  2069  	ycover[Yax*Ymax+Yrl] = 1
  2070  	ycover[Ycx*Ymax+Yrl] = 1
  2071  	ycover[Yrx*Ymax+Yrl] = 1
  2072  	ycover[Yrl32*Ymax+Yrl] = 1
  2073  
  2074  	ycover[Yf0*Ymax+Yrf] = 1
  2075  
  2076  	ycover[Yal*Ymax+Ymb] = 1
  2077  	ycover[Ycl*Ymax+Ymb] = 1
  2078  	ycover[Yax*Ymax+Ymb] = 1
  2079  	ycover[Ycx*Ymax+Ymb] = 1
  2080  	ycover[Yrx*Ymax+Ymb] = 1
  2081  	ycover[Yrb*Ymax+Ymb] = 1
  2082  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2083  	ycover[Ym*Ymax+Ymb] = 1
  2084  
  2085  	ycover[Yax*Ymax+Yml] = 1
  2086  	ycover[Ycx*Ymax+Yml] = 1
  2087  	ycover[Yrx*Ymax+Yml] = 1
  2088  	ycover[Yrl*Ymax+Yml] = 1
  2089  	ycover[Yrl32*Ymax+Yml] = 1
  2090  	ycover[Ym*Ymax+Yml] = 1
  2091  
  2092  	ycover[Yax*Ymax+Ymm] = 1
  2093  	ycover[Ycx*Ymax+Ymm] = 1
  2094  	ycover[Yrx*Ymax+Ymm] = 1
  2095  	ycover[Yrl*Ymax+Ymm] = 1
  2096  	ycover[Yrl32*Ymax+Ymm] = 1
  2097  	ycover[Ym*Ymax+Ymm] = 1
  2098  	ycover[Ymr*Ymax+Ymm] = 1
  2099  
  2100  	ycover[Ym*Ymax+Yxm] = 1
  2101  	ycover[Yxr*Ymax+Yxm] = 1
  2102  
  2103  	ycover[Ym*Ymax+Yym] = 1
  2104  	ycover[Yyr*Ymax+Yym] = 1
  2105  
  2106  	for i := 0; i < MAXREG; i++ {
  2107  		reg[i] = -1
  2108  		if i >= REG_AL && i <= REG_R15B {
  2109  			reg[i] = (i - REG_AL) & 7
  2110  			if i >= REG_SPB && i <= REG_DIB {
  2111  				regrex[i] = 0x40
  2112  			}
  2113  			if i >= REG_R8B && i <= REG_R15B {
  2114  				regrex[i] = Rxr | Rxx | Rxb
  2115  			}
  2116  		}
  2117  
  2118  		if i >= REG_AH && i <= REG_BH {
  2119  			reg[i] = 4 + ((i - REG_AH) & 7)
  2120  		}
  2121  		if i >= REG_AX && i <= REG_R15 {
  2122  			reg[i] = (i - REG_AX) & 7
  2123  			if i >= REG_R8 {
  2124  				regrex[i] = Rxr | Rxx | Rxb
  2125  			}
  2126  		}
  2127  
  2128  		if i >= REG_F0 && i <= REG_F0+7 {
  2129  			reg[i] = (i - REG_F0) & 7
  2130  		}
  2131  		if i >= REG_M0 && i <= REG_M0+7 {
  2132  			reg[i] = (i - REG_M0) & 7
  2133  		}
  2134  		if i >= REG_X0 && i <= REG_X0+15 {
  2135  			reg[i] = (i - REG_X0) & 7
  2136  			if i >= REG_X0+8 {
  2137  				regrex[i] = Rxr | Rxx | Rxb
  2138  			}
  2139  		}
  2140  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2141  			reg[i] = (i - REG_Y0) & 7
  2142  			if i >= REG_Y0+8 {
  2143  				regrex[i] = Rxr | Rxx | Rxb
  2144  			}
  2145  		}
  2146  
  2147  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2148  			regrex[i] = Rxr
  2149  		}
  2150  	}
  2151  }
  2152  
  2153  var isAndroid = (obj.Getgoos() == "android")
  2154  
  2155  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2156  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2157  		return 0
  2158  	}
  2159  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2160  		switch a.Reg {
  2161  		case REG_CS:
  2162  			return 0x2e
  2163  
  2164  		case REG_DS:
  2165  			return 0x3e
  2166  
  2167  		case REG_ES:
  2168  			return 0x26
  2169  
  2170  		case REG_FS:
  2171  			return 0x64
  2172  
  2173  		case REG_GS:
  2174  			return 0x65
  2175  
  2176  		case REG_TLS:
  2177  			// NOTE: Systems listed here should be only systems that
  2178  			// support direct TLS references like 8(TLS) implemented as
  2179  			// direct references from FS or GS. Systems that require
  2180  			// the initial-exec model, where you load the TLS base into
  2181  			// a register and then index from that register, do not reach
  2182  			// this code and should not be listed.
  2183  			if p.Mode == 32 {
  2184  				switch ctxt.Headtype {
  2185  				default:
  2186  					if isAndroid {
  2187  						return 0x65 // GS
  2188  					}
  2189  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2190  
  2191  				case obj.Hdarwin,
  2192  					obj.Hdragonfly,
  2193  					obj.Hfreebsd,
  2194  					obj.Hnetbsd,
  2195  					obj.Hopenbsd:
  2196  					return 0x65 // GS
  2197  				}
  2198  			}
  2199  
  2200  			switch ctxt.Headtype {
  2201  			default:
  2202  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2203  
  2204  			case obj.Hlinux:
  2205  				if isAndroid {
  2206  					return 0x64 // FS
  2207  				}
  2208  
  2209  				if ctxt.Flag_shared {
  2210  					log.Fatalf("unknown TLS base register for linux with -shared")
  2211  				} else {
  2212  					return 0x64 // FS
  2213  				}
  2214  
  2215  			case obj.Hdragonfly,
  2216  				obj.Hfreebsd,
  2217  				obj.Hnetbsd,
  2218  				obj.Hopenbsd,
  2219  				obj.Hsolaris:
  2220  				return 0x64 // FS
  2221  
  2222  			case obj.Hdarwin:
  2223  				return 0x65 // GS
  2224  			}
  2225  		}
  2226  	}
  2227  
  2228  	if p.Mode == 32 {
  2229  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2230  			// When building for inclusion into a shared library, an instruction of the form
  2231  			//     MOVL 0(CX)(TLS*1), AX
  2232  			// becomes
  2233  			//     mov %gs:(%ecx), %eax
  2234  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2235  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2236  			// a shared library the instruction it becomes
  2237  			//     mov 0x0(%ecx), $eax
  2238  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2239  			if a.Offset != 0 {
  2240  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2241  			}
  2242  			return 0x65 // GS
  2243  		}
  2244  		return 0
  2245  	}
  2246  
  2247  	switch a.Index {
  2248  	case REG_CS:
  2249  		return 0x2e
  2250  
  2251  	case REG_DS:
  2252  		return 0x3e
  2253  
  2254  	case REG_ES:
  2255  		return 0x26
  2256  
  2257  	case REG_TLS:
  2258  		if ctxt.Flag_shared {
  2259  			// When building for inclusion into a shared library, an instruction of the form
  2260  			//     MOV 0(CX)(TLS*1), AX
  2261  			// becomes
  2262  			//     mov %fs:(%rcx), %rax
  2263  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2264  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2265  			// a shared library the instruction does not require a prefix.
  2266  			if a.Offset != 0 {
  2267  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2268  			}
  2269  			return 0x64
  2270  		}
  2271  
  2272  	case REG_FS:
  2273  		return 0x64
  2274  
  2275  	case REG_GS:
  2276  		return 0x65
  2277  	}
  2278  
  2279  	return 0
  2280  }
  2281  
  2282  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2283  	switch a.Type {
  2284  	case obj.TYPE_NONE:
  2285  		return Ynone
  2286  
  2287  	case obj.TYPE_BRANCH:
  2288  		return Ybr
  2289  
  2290  	case obj.TYPE_INDIR:
  2291  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2292  			return Yindir
  2293  		}
  2294  		return Yxxx
  2295  
  2296  	case obj.TYPE_MEM:
  2297  		if a.Index == REG_SP {
  2298  			// Can't use SP as the index register
  2299  			return Yxxx
  2300  		}
  2301  		if ctxt.Asmode == 64 {
  2302  			switch a.Name {
  2303  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2304  				// Global variables can't use index registers and their
  2305  				// base register is %rip (%rip is encoded as REG_NONE).
  2306  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2307  					return Yxxx
  2308  				}
  2309  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2310  				// These names must have a base of SP.  The old compiler
  2311  				// uses 0 for the base register. SSA uses REG_SP.
  2312  				if a.Reg != REG_SP && a.Reg != 0 {
  2313  					return Yxxx
  2314  				}
  2315  			case obj.NAME_NONE:
  2316  				// everything is ok
  2317  			default:
  2318  				// unknown name
  2319  				return Yxxx
  2320  			}
  2321  		}
  2322  		return Ym
  2323  
  2324  	case obj.TYPE_ADDR:
  2325  		switch a.Name {
  2326  		case obj.NAME_GOTREF:
  2327  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2328  			return Yxxx
  2329  
  2330  		case obj.NAME_EXTERN,
  2331  			obj.NAME_STATIC:
  2332  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
  2333  				return Yi32
  2334  			}
  2335  			return Yiauto // use pc-relative addressing
  2336  
  2337  		case obj.NAME_AUTO,
  2338  			obj.NAME_PARAM:
  2339  			return Yiauto
  2340  		}
  2341  
  2342  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2343  		// and got Yi32 in an earlier version of this code.
  2344  		// Keep doing that until we fix yduff etc.
  2345  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2346  			return Yi32
  2347  		}
  2348  
  2349  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2350  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2351  		}
  2352  		fallthrough
  2353  
  2354  		// fall through
  2355  
  2356  	case obj.TYPE_CONST:
  2357  		if a.Sym != nil {
  2358  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2359  		}
  2360  
  2361  		v := a.Offset
  2362  		if p.Mode == 32 {
  2363  			v = int64(int32(v))
  2364  		}
  2365  		if v == 0 {
  2366  			if p.Mark&PRESERVEFLAGS != 0 {
  2367  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2368  				return Yu7
  2369  			}
  2370  			return Yi0
  2371  		}
  2372  		if v == 1 {
  2373  			return Yi1
  2374  		}
  2375  		if v >= 0 && v <= 127 {
  2376  			return Yu7
  2377  		}
  2378  		if v >= 0 && v <= 255 {
  2379  			return Yu8
  2380  		}
  2381  		if v >= -128 && v <= 127 {
  2382  			return Yi8
  2383  		}
  2384  		if p.Mode == 32 {
  2385  			return Yi32
  2386  		}
  2387  		l := int32(v)
  2388  		if int64(l) == v {
  2389  			return Ys32 /* can sign extend */
  2390  		}
  2391  		if v>>32 == 0 {
  2392  			return Yi32 /* unsigned */
  2393  		}
  2394  		return Yi64
  2395  
  2396  	case obj.TYPE_TEXTSIZE:
  2397  		return Ytextsize
  2398  	}
  2399  
  2400  	if a.Type != obj.TYPE_REG {
  2401  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2402  		return Yxxx
  2403  	}
  2404  
  2405  	switch a.Reg {
  2406  	case REG_AL:
  2407  		return Yal
  2408  
  2409  	case REG_AX:
  2410  		return Yax
  2411  
  2412  		/*
  2413  			case REG_SPB:
  2414  		*/
  2415  	case REG_BPB,
  2416  		REG_SIB,
  2417  		REG_DIB,
  2418  		REG_R8B,
  2419  		REG_R9B,
  2420  		REG_R10B,
  2421  		REG_R11B,
  2422  		REG_R12B,
  2423  		REG_R13B,
  2424  		REG_R14B,
  2425  		REG_R15B:
  2426  		if ctxt.Asmode != 64 {
  2427  			return Yxxx
  2428  		}
  2429  		fallthrough
  2430  
  2431  	case REG_DL,
  2432  		REG_BL,
  2433  		REG_AH,
  2434  		REG_CH,
  2435  		REG_DH,
  2436  		REG_BH:
  2437  		return Yrb
  2438  
  2439  	case REG_CL:
  2440  		return Ycl
  2441  
  2442  	case REG_CX:
  2443  		return Ycx
  2444  
  2445  	case REG_DX, REG_BX:
  2446  		return Yrx
  2447  
  2448  	case REG_R8, /* not really Yrl */
  2449  		REG_R9,
  2450  		REG_R10,
  2451  		REG_R11,
  2452  		REG_R12,
  2453  		REG_R13,
  2454  		REG_R14,
  2455  		REG_R15:
  2456  		if ctxt.Asmode != 64 {
  2457  			return Yxxx
  2458  		}
  2459  		fallthrough
  2460  
  2461  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2462  		if p.Mode == 32 {
  2463  			return Yrl32
  2464  		}
  2465  		return Yrl
  2466  
  2467  	case REG_F0 + 0:
  2468  		return Yf0
  2469  
  2470  	case REG_F0 + 1,
  2471  		REG_F0 + 2,
  2472  		REG_F0 + 3,
  2473  		REG_F0 + 4,
  2474  		REG_F0 + 5,
  2475  		REG_F0 + 6,
  2476  		REG_F0 + 7:
  2477  		return Yrf
  2478  
  2479  	case REG_M0 + 0,
  2480  		REG_M0 + 1,
  2481  		REG_M0 + 2,
  2482  		REG_M0 + 3,
  2483  		REG_M0 + 4,
  2484  		REG_M0 + 5,
  2485  		REG_M0 + 6,
  2486  		REG_M0 + 7:
  2487  		return Ymr
  2488  
  2489  	case REG_X0 + 0,
  2490  		REG_X0 + 1,
  2491  		REG_X0 + 2,
  2492  		REG_X0 + 3,
  2493  		REG_X0 + 4,
  2494  		REG_X0 + 5,
  2495  		REG_X0 + 6,
  2496  		REG_X0 + 7,
  2497  		REG_X0 + 8,
  2498  		REG_X0 + 9,
  2499  		REG_X0 + 10,
  2500  		REG_X0 + 11,
  2501  		REG_X0 + 12,
  2502  		REG_X0 + 13,
  2503  		REG_X0 + 14,
  2504  		REG_X0 + 15:
  2505  		return Yxr
  2506  
  2507  	case REG_Y0 + 0,
  2508  		REG_Y0 + 1,
  2509  		REG_Y0 + 2,
  2510  		REG_Y0 + 3,
  2511  		REG_Y0 + 4,
  2512  		REG_Y0 + 5,
  2513  		REG_Y0 + 6,
  2514  		REG_Y0 + 7,
  2515  		REG_Y0 + 8,
  2516  		REG_Y0 + 9,
  2517  		REG_Y0 + 10,
  2518  		REG_Y0 + 11,
  2519  		REG_Y0 + 12,
  2520  		REG_Y0 + 13,
  2521  		REG_Y0 + 14,
  2522  		REG_Y0 + 15:
  2523  		return Yyr
  2524  
  2525  	case REG_CS:
  2526  		return Ycs
  2527  	case REG_SS:
  2528  		return Yss
  2529  	case REG_DS:
  2530  		return Yds
  2531  	case REG_ES:
  2532  		return Yes
  2533  	case REG_FS:
  2534  		return Yfs
  2535  	case REG_GS:
  2536  		return Ygs
  2537  	case REG_TLS:
  2538  		return Ytls
  2539  
  2540  	case REG_GDTR:
  2541  		return Ygdtr
  2542  	case REG_IDTR:
  2543  		return Yidtr
  2544  	case REG_LDTR:
  2545  		return Yldtr
  2546  	case REG_MSW:
  2547  		return Ymsw
  2548  	case REG_TASK:
  2549  		return Ytask
  2550  
  2551  	case REG_CR + 0:
  2552  		return Ycr0
  2553  	case REG_CR + 1:
  2554  		return Ycr1
  2555  	case REG_CR + 2:
  2556  		return Ycr2
  2557  	case REG_CR + 3:
  2558  		return Ycr3
  2559  	case REG_CR + 4:
  2560  		return Ycr4
  2561  	case REG_CR + 5:
  2562  		return Ycr5
  2563  	case REG_CR + 6:
  2564  		return Ycr6
  2565  	case REG_CR + 7:
  2566  		return Ycr7
  2567  	case REG_CR + 8:
  2568  		return Ycr8
  2569  
  2570  	case REG_DR + 0:
  2571  		return Ydr0
  2572  	case REG_DR + 1:
  2573  		return Ydr1
  2574  	case REG_DR + 2:
  2575  		return Ydr2
  2576  	case REG_DR + 3:
  2577  		return Ydr3
  2578  	case REG_DR + 4:
  2579  		return Ydr4
  2580  	case REG_DR + 5:
  2581  		return Ydr5
  2582  	case REG_DR + 6:
  2583  		return Ydr6
  2584  	case REG_DR + 7:
  2585  		return Ydr7
  2586  
  2587  	case REG_TR + 0:
  2588  		return Ytr0
  2589  	case REG_TR + 1:
  2590  		return Ytr1
  2591  	case REG_TR + 2:
  2592  		return Ytr2
  2593  	case REG_TR + 3:
  2594  		return Ytr3
  2595  	case REG_TR + 4:
  2596  		return Ytr4
  2597  	case REG_TR + 5:
  2598  		return Ytr5
  2599  	case REG_TR + 6:
  2600  		return Ytr6
  2601  	case REG_TR + 7:
  2602  		return Ytr7
  2603  	}
  2604  
  2605  	return Yxxx
  2606  }
  2607  
  2608  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2609  	var i int
  2610  
  2611  	switch index {
  2612  	default:
  2613  		goto bad
  2614  
  2615  	case REG_NONE:
  2616  		i = 4 << 3
  2617  		goto bas
  2618  
  2619  	case REG_R8,
  2620  		REG_R9,
  2621  		REG_R10,
  2622  		REG_R11,
  2623  		REG_R12,
  2624  		REG_R13,
  2625  		REG_R14,
  2626  		REG_R15:
  2627  		if ctxt.Asmode != 64 {
  2628  			goto bad
  2629  		}
  2630  		fallthrough
  2631  
  2632  	case REG_AX,
  2633  		REG_CX,
  2634  		REG_DX,
  2635  		REG_BX,
  2636  		REG_BP,
  2637  		REG_SI,
  2638  		REG_DI:
  2639  		i = reg[index] << 3
  2640  	}
  2641  
  2642  	switch scale {
  2643  	default:
  2644  		goto bad
  2645  
  2646  	case 1:
  2647  		break
  2648  
  2649  	case 2:
  2650  		i |= 1 << 6
  2651  
  2652  	case 4:
  2653  		i |= 2 << 6
  2654  
  2655  	case 8:
  2656  		i |= 3 << 6
  2657  	}
  2658  
  2659  bas:
  2660  	switch base {
  2661  	default:
  2662  		goto bad
  2663  
  2664  	case REG_NONE: /* must be mod=00 */
  2665  		i |= 5
  2666  
  2667  	case REG_R8,
  2668  		REG_R9,
  2669  		REG_R10,
  2670  		REG_R11,
  2671  		REG_R12,
  2672  		REG_R13,
  2673  		REG_R14,
  2674  		REG_R15:
  2675  		if ctxt.Asmode != 64 {
  2676  			goto bad
  2677  		}
  2678  		fallthrough
  2679  
  2680  	case REG_AX,
  2681  		REG_CX,
  2682  		REG_DX,
  2683  		REG_BX,
  2684  		REG_SP,
  2685  		REG_BP,
  2686  		REG_SI,
  2687  		REG_DI:
  2688  		i |= reg[base]
  2689  	}
  2690  
  2691  	ctxt.AsmBuf.Put1(byte(i))
  2692  	return
  2693  
  2694  bad:
  2695  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2696  	ctxt.AsmBuf.Put1(0)
  2697  	return
  2698  }
  2699  
  2700  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2701  	var rel obj.Reloc
  2702  
  2703  	v := vaddr(ctxt, p, a, &rel)
  2704  	if rel.Siz != 0 {
  2705  		if rel.Siz != 4 {
  2706  			ctxt.Diag("bad reloc")
  2707  		}
  2708  		r := obj.Addrel(ctxt.Cursym)
  2709  		*r = rel
  2710  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2711  	}
  2712  
  2713  	ctxt.AsmBuf.PutInt32(int32(v))
  2714  }
  2715  
  2716  /*
  2717  static void
  2718  relput8(Prog *p, Addr *a)
  2719  {
  2720  	vlong v;
  2721  	Reloc rel, *r;
  2722  
  2723  	v = vaddr(ctxt, p, a, &rel);
  2724  	if(rel.siz != 0) {
  2725  		r = addrel(ctxt->cursym);
  2726  		*r = rel;
  2727  		r->siz = 8;
  2728  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2729  	}
  2730  	put8(ctxt, v);
  2731  }
  2732  */
  2733  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2734  	if r != nil {
  2735  		*r = obj.Reloc{}
  2736  	}
  2737  
  2738  	switch a.Name {
  2739  	case obj.NAME_STATIC,
  2740  		obj.NAME_GOTREF,
  2741  		obj.NAME_EXTERN:
  2742  		s := a.Sym
  2743  		if r == nil {
  2744  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2745  			log.Fatalf("reloc")
  2746  		}
  2747  
  2748  		if a.Name == obj.NAME_GOTREF {
  2749  			r.Siz = 4
  2750  			r.Type = obj.R_GOTPCREL
  2751  		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
  2752  			r.Siz = 4
  2753  			r.Type = obj.R_ADDR
  2754  		} else {
  2755  			r.Siz = 4
  2756  			r.Type = obj.R_PCREL
  2757  		}
  2758  
  2759  		r.Off = -1 // caller must fill in
  2760  		r.Sym = s
  2761  		r.Add = a.Offset
  2762  
  2763  		return 0
  2764  	}
  2765  
  2766  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2767  		if r == nil {
  2768  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2769  			log.Fatalf("reloc")
  2770  		}
  2771  
  2772  		if !ctxt.Flag_shared || isAndroid {
  2773  			r.Type = obj.R_TLS_LE
  2774  			r.Siz = 4
  2775  			r.Off = -1 // caller must fill in
  2776  			r.Add = a.Offset
  2777  		}
  2778  		return 0
  2779  	}
  2780  
  2781  	return a.Offset
  2782  }
  2783  
  2784  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2785  	var base int
  2786  	var rel obj.Reloc
  2787  
  2788  	rex &= 0x40 | Rxr
  2789  	switch {
  2790  	case int64(int32(a.Offset)) == a.Offset:
  2791  		// Offset fits in sign-extended 32 bits.
  2792  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2793  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2794  		// This is allowed for assembly that wants to use 32-bit hex
  2795  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2796  	default:
  2797  		ctxt.Diag("offset too large in %s", p)
  2798  	}
  2799  	v := int32(a.Offset)
  2800  	rel.Siz = 0
  2801  
  2802  	switch a.Type {
  2803  	case obj.TYPE_ADDR:
  2804  		if a.Name == obj.NAME_NONE {
  2805  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2806  		}
  2807  		if a.Index == REG_TLS {
  2808  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2809  		}
  2810  		goto bad
  2811  
  2812  	case obj.TYPE_REG:
  2813  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2814  			goto bad
  2815  		}
  2816  		if v != 0 {
  2817  			goto bad
  2818  		}
  2819  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2820  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2821  		return
  2822  	}
  2823  
  2824  	if a.Type != obj.TYPE_MEM {
  2825  		goto bad
  2826  	}
  2827  
  2828  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2829  		base := int(a.Reg)
  2830  		switch a.Name {
  2831  		case obj.NAME_EXTERN,
  2832  			obj.NAME_GOTREF,
  2833  			obj.NAME_STATIC:
  2834  			if !isextern(a.Sym) && p.Mode == 64 {
  2835  				goto bad
  2836  			}
  2837  			if p.Mode == 32 && ctxt.Flag_shared {
  2838  				base = REG_CX
  2839  			} else {
  2840  				base = REG_NONE
  2841  			}
  2842  			v = int32(vaddr(ctxt, p, a, &rel))
  2843  
  2844  		case obj.NAME_AUTO,
  2845  			obj.NAME_PARAM:
  2846  			base = REG_SP
  2847  		}
  2848  
  2849  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2850  		if base == REG_NONE {
  2851  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2852  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2853  			goto putrelv
  2854  		}
  2855  
  2856  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2857  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2858  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2859  			return
  2860  		}
  2861  
  2862  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2863  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2864  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2865  			ctxt.AsmBuf.Put1(byte(v))
  2866  			return
  2867  		}
  2868  
  2869  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2870  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2871  		goto putrelv
  2872  	}
  2873  
  2874  	base = int(a.Reg)
  2875  	switch a.Name {
  2876  	case obj.NAME_STATIC,
  2877  		obj.NAME_GOTREF,
  2878  		obj.NAME_EXTERN:
  2879  		if a.Sym == nil {
  2880  			ctxt.Diag("bad addr: %v", p)
  2881  		}
  2882  		if p.Mode == 32 && ctxt.Flag_shared {
  2883  			base = REG_CX
  2884  		} else {
  2885  			base = REG_NONE
  2886  		}
  2887  		v = int32(vaddr(ctxt, p, a, &rel))
  2888  
  2889  	case obj.NAME_AUTO,
  2890  		obj.NAME_PARAM:
  2891  		base = REG_SP
  2892  	}
  2893  
  2894  	if base == REG_TLS {
  2895  		v = int32(vaddr(ctxt, p, a, &rel))
  2896  	}
  2897  
  2898  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2899  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2900  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2901  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2902  				ctxt.Diag("%v has offset against gotref", p)
  2903  			}
  2904  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2905  			goto putrelv
  2906  		}
  2907  
  2908  		// temporary
  2909  		ctxt.AsmBuf.Put2(
  2910  			byte(0<<6|4<<0|r<<3), // sib present
  2911  			0<<6|4<<3|5<<0,       // DS:d32
  2912  		)
  2913  		goto putrelv
  2914  	}
  2915  
  2916  	if base == REG_SP || base == REG_R12 {
  2917  		if v == 0 {
  2918  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2919  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2920  			return
  2921  		}
  2922  
  2923  		if v >= -128 && v < 128 {
  2924  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2925  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2926  			ctxt.AsmBuf.Put1(byte(v))
  2927  			return
  2928  		}
  2929  
  2930  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2931  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2932  		goto putrelv
  2933  	}
  2934  
  2935  	if REG_AX <= base && base <= REG_R15 {
  2936  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2937  			rel = obj.Reloc{}
  2938  			rel.Type = obj.R_TLS_LE
  2939  			rel.Siz = 4
  2940  			rel.Sym = nil
  2941  			rel.Add = int64(v)
  2942  			v = 0
  2943  		}
  2944  
  2945  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2946  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2947  			return
  2948  		}
  2949  
  2950  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2951  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2952  			return
  2953  		}
  2954  
  2955  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2956  		goto putrelv
  2957  	}
  2958  
  2959  	goto bad
  2960  
  2961  putrelv:
  2962  	if rel.Siz != 0 {
  2963  		if rel.Siz != 4 {
  2964  			ctxt.Diag("bad rel")
  2965  			goto bad
  2966  		}
  2967  
  2968  		r := obj.Addrel(ctxt.Cursym)
  2969  		*r = rel
  2970  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2971  	}
  2972  
  2973  	ctxt.AsmBuf.PutInt32(v)
  2974  	return
  2975  
  2976  bad:
  2977  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2978  	return
  2979  }
  2980  
  2981  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2982  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2983  }
  2984  
  2985  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2986  	asmandsz(ctxt, p, a, o, 0, 0)
  2987  }
  2988  
  2989  func bytereg(a *obj.Addr, t *uint8) {
  2990  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2991  		a.Reg += REG_AL - REG_AX
  2992  		*t = 0
  2993  	}
  2994  }
  2995  
  2996  func unbytereg(a *obj.Addr, t *uint8) {
  2997  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2998  		a.Reg += REG_AX - REG_AL
  2999  		*t = 0
  3000  	}
  3001  }
  3002  
  3003  const (
  3004  	E = 0xff
  3005  )
  3006  
  3007  var ymovtab = []Movtab{
  3008  	/* push */
  3009  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3010  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3011  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3012  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3013  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3014  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3015  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3016  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3017  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3018  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3019  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3020  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3021  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3022  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3023  
  3024  	/* pop */
  3025  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3026  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3027  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3028  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3029  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3030  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3031  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3032  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3033  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3034  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3035  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3036  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3037  
  3038  	/* mov seg */
  3039  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3040  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3041  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3042  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3043  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3044  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3045  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3046  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3047  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3048  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3049  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3050  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3051  
  3052  	/* mov cr */
  3053  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3054  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3055  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3056  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3057  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3058  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3059  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3060  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3061  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3062  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3063  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3064  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3065  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3066  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3067  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3068  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3069  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3070  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3071  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3072  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3073  
  3074  	/* mov dr */
  3075  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3076  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3077  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3078  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3079  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3080  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3081  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3082  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3083  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3084  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3085  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3086  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3087  
  3088  	/* mov tr */
  3089  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3090  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3091  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3092  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3093  
  3094  	/* lgdt, sgdt, lidt, sidt */
  3095  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3096  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3097  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3098  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3099  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3100  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3101  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3102  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3103  
  3104  	/* lldt, sldt */
  3105  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3106  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3107  
  3108  	/* lmsw, smsw */
  3109  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3110  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3111  
  3112  	/* ltr, str */
  3113  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3114  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3115  
  3116  	/* load full pointer - unsupported
  3117  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3118  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3119  	*/
  3120  
  3121  	/* double shift */
  3122  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3123  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3124  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3125  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3126  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3127  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3128  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3129  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3130  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3131  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3132  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3133  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3134  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3135  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3136  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3137  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3138  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3139  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3140  
  3141  	/* load TLS base */
  3142  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3143  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3144  	{0, 0, 0, 0, 0, [4]uint8{}},
  3145  }
  3146  
  3147  func isax(a *obj.Addr) bool {
  3148  	switch a.Reg {
  3149  	case REG_AX, REG_AL, REG_AH:
  3150  		return true
  3151  	}
  3152  
  3153  	if a.Index == REG_AX {
  3154  		return true
  3155  	}
  3156  	return false
  3157  }
  3158  
  3159  func subreg(p *obj.Prog, from int, to int) {
  3160  	if false { /* debug['Q'] */
  3161  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3162  	}
  3163  
  3164  	if int(p.From.Reg) == from {
  3165  		p.From.Reg = int16(to)
  3166  		p.Ft = 0
  3167  	}
  3168  
  3169  	if int(p.To.Reg) == from {
  3170  		p.To.Reg = int16(to)
  3171  		p.Tt = 0
  3172  	}
  3173  
  3174  	if int(p.From.Index) == from {
  3175  		p.From.Index = int16(to)
  3176  		p.Ft = 0
  3177  	}
  3178  
  3179  	if int(p.To.Index) == from {
  3180  		p.To.Index = int16(to)
  3181  		p.Tt = 0
  3182  	}
  3183  
  3184  	if false { /* debug['Q'] */
  3185  		fmt.Printf("%v\n", p)
  3186  	}
  3187  }
  3188  
  3189  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3190  	switch op {
  3191  	case Pm, Pe, Pf2, Pf3:
  3192  		if osize != 1 {
  3193  			if op != Pm {
  3194  				ctxt.AsmBuf.Put1(byte(op))
  3195  			}
  3196  			ctxt.AsmBuf.Put1(Pm)
  3197  			z++
  3198  			op = int(o.op[z])
  3199  			break
  3200  		}
  3201  		fallthrough
  3202  
  3203  	default:
  3204  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3205  			ctxt.AsmBuf.Put1(Pm)
  3206  		}
  3207  	}
  3208  
  3209  	ctxt.AsmBuf.Put1(byte(op))
  3210  	return z
  3211  }
  3212  
  3213  var bpduff1 = []byte{
  3214  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3215  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3216  }
  3217  
  3218  var bpduff2 = []byte{
  3219  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3220  }
  3221  
  3222  // Emit VEX prefix and opcode byte.
  3223  // The three addresses are the r/m, vvvv, and reg fields.
  3224  // The reg and rm arguments appear in the same order as the
  3225  // arguments to asmand, which typically follows the call to asmvex.
  3226  // The final two arguments are the VEX prefix (see encoding above)
  3227  // and the opcode byte.
  3228  // For details about vex prefix see:
  3229  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3230  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3231  	ctxt.Vexflag = 1
  3232  	rexR := 0
  3233  	if r != nil {
  3234  		rexR = regrex[r.Reg] & Rxr
  3235  	}
  3236  	rexB := 0
  3237  	rexX := 0
  3238  	if rm != nil {
  3239  		rexB = regrex[rm.Reg] & Rxb
  3240  		rexX = regrex[rm.Index] & Rxx
  3241  	}
  3242  	vexM := (vex >> 3) & 0xF
  3243  	vexWLP := vex & 0x87
  3244  	vexV := byte(0)
  3245  	if v != nil {
  3246  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3247  	}
  3248  	vexV ^= 0xF
  3249  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3250  		// Can use 2-byte encoding.
  3251  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3252  	} else {
  3253  		// Must use 3-byte encoding.
  3254  		ctxt.AsmBuf.Put3(0xc4,
  3255  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3256  			vexV<<3|vexWLP,
  3257  		)
  3258  	}
  3259  	ctxt.AsmBuf.Put1(opcode)
  3260  }
  3261  
  3262  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3263  	ctxt.Curp = p // TODO
  3264  
  3265  	o := opindex[p.As&obj.AMask]
  3266  
  3267  	if o == nil {
  3268  		ctxt.Diag("asmins: missing op %v", p)
  3269  		return
  3270  	}
  3271  
  3272  	pre := prefixof(ctxt, p, &p.From)
  3273  	if pre != 0 {
  3274  		ctxt.AsmBuf.Put1(byte(pre))
  3275  	}
  3276  	pre = prefixof(ctxt, p, &p.To)
  3277  	if pre != 0 {
  3278  		ctxt.AsmBuf.Put1(byte(pre))
  3279  	}
  3280  
  3281  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3282  	// which encodes as SHRQ $32(DX*0), AX.
  3283  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3284  	// Change encoding generated by assemblers and compilers and remove.
  3285  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3286  		p.From3 = new(obj.Addr)
  3287  		p.From3.Type = obj.TYPE_REG
  3288  		p.From3.Reg = p.From.Index
  3289  		p.From.Index = 0
  3290  	}
  3291  
  3292  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3293  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3294  	switch p.As {
  3295  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3296  		if p.From3Type() == obj.TYPE_NONE {
  3297  			p.From3 = new(obj.Addr)
  3298  			*p.From3 = p.From
  3299  			p.From = obj.Addr{}
  3300  			p.From.Type = obj.TYPE_CONST
  3301  			p.From.Offset = p.To.Offset
  3302  			p.To.Offset = 0
  3303  		}
  3304  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3305  		if p.From3Type() == obj.TYPE_NONE {
  3306  			p.From3 = new(obj.Addr)
  3307  			*p.From3 = p.To
  3308  			p.To = obj.Addr{}
  3309  			p.To.Type = obj.TYPE_CONST
  3310  			p.To.Offset = p.From3.Offset
  3311  			p.From3.Offset = 0
  3312  		}
  3313  	}
  3314  
  3315  	if p.Ft == 0 {
  3316  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3317  	}
  3318  	if p.Tt == 0 {
  3319  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3320  	}
  3321  
  3322  	ft := int(p.Ft) * Ymax
  3323  	f3t := Ynone * Ymax
  3324  	if p.From3 != nil {
  3325  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3326  	}
  3327  	tt := int(p.Tt) * Ymax
  3328  
  3329  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3330  	z := 0
  3331  	var a *obj.Addr
  3332  	var l int
  3333  	var op int
  3334  	var q *obj.Prog
  3335  	var r *obj.Reloc
  3336  	var rel obj.Reloc
  3337  	var v int64
  3338  	for i := range o.ytab {
  3339  		yt := &o.ytab[i]
  3340  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3341  			switch o.prefix {
  3342  			case Px1: /* first option valid only in 32-bit mode */
  3343  				if ctxt.Mode == 64 && z == 0 {
  3344  					z += int(yt.zoffset) + xo
  3345  					continue
  3346  				}
  3347  			case Pq: /* 16 bit escape and opcode escape */
  3348  				ctxt.AsmBuf.Put2(Pe, Pm)
  3349  
  3350  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3351  				ctxt.Rexflag |= Pw
  3352  				ctxt.AsmBuf.Put2(Pe, Pm)
  3353  
  3354  			case Pq4: /*  66 0F 38 */
  3355  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3356  
  3357  			case Pf2, /* xmm opcode escape */
  3358  				Pf3:
  3359  				ctxt.AsmBuf.Put2(o.prefix, Pm)
  3360  
  3361  			case Pef3:
  3362  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3363  
  3364  			case Pfw: /* xmm opcode escape + REX.W */
  3365  				ctxt.Rexflag |= Pw
  3366  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3367  
  3368  			case Pm: /* opcode escape */
  3369  				ctxt.AsmBuf.Put1(Pm)
  3370  
  3371  			case Pe: /* 16 bit escape */
  3372  				ctxt.AsmBuf.Put1(Pe)
  3373  
  3374  			case Pw: /* 64-bit escape */
  3375  				if p.Mode != 64 {
  3376  					ctxt.Diag("asmins: illegal 64: %v", p)
  3377  				}
  3378  				ctxt.Rexflag |= Pw
  3379  
  3380  			case Pw8: /* 64-bit escape if z >= 8 */
  3381  				if z >= 8 {
  3382  					if p.Mode != 64 {
  3383  						ctxt.Diag("asmins: illegal 64: %v", p)
  3384  					}
  3385  					ctxt.Rexflag |= Pw
  3386  				}
  3387  
  3388  			case Pb: /* botch */
  3389  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3390  					goto bad
  3391  				}
  3392  				// NOTE(rsc): This is probably safe to do always,
  3393  				// but when enabled it chooses different encodings
  3394  				// than the old cmd/internal/obj/i386 code did,
  3395  				// which breaks our "same bits out" checks.
  3396  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3397  				// in the original obj/i386, and it would encode
  3398  				// (using a valid, shorter form) as 3c 00 if we enabled
  3399  				// the call to bytereg here.
  3400  				if p.Mode == 64 {
  3401  					bytereg(&p.From, &p.Ft)
  3402  					bytereg(&p.To, &p.Tt)
  3403  				}
  3404  
  3405  			case P32: /* 32 bit but illegal if 64-bit mode */
  3406  				if p.Mode == 64 {
  3407  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3408  				}
  3409  
  3410  			case Py: /* 64-bit only, no prefix */
  3411  				if p.Mode != 64 {
  3412  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3413  				}
  3414  
  3415  			case Py1: /* 64-bit only if z < 1, no prefix */
  3416  				if z < 1 && p.Mode != 64 {
  3417  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3418  				}
  3419  
  3420  			case Py3: /* 64-bit only if z < 3, no prefix */
  3421  				if z < 3 && p.Mode != 64 {
  3422  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3423  				}
  3424  			}
  3425  
  3426  			if z >= len(o.op) {
  3427  				log.Fatalf("asmins bad table %v", p)
  3428  			}
  3429  			op = int(o.op[z])
  3430  			if op == 0x0f {
  3431  				ctxt.AsmBuf.Put1(byte(op))
  3432  				z++
  3433  				op = int(o.op[z])
  3434  			}
  3435  
  3436  			switch yt.zcase {
  3437  			default:
  3438  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3439  				return
  3440  
  3441  			case Zpseudo:
  3442  				break
  3443  
  3444  			case Zlit:
  3445  				for ; ; z++ {
  3446  					op = int(o.op[z])
  3447  					if op == 0 {
  3448  						break
  3449  					}
  3450  					ctxt.AsmBuf.Put1(byte(op))
  3451  				}
  3452  
  3453  			case Zlitm_r:
  3454  				for ; ; z++ {
  3455  					op = int(o.op[z])
  3456  					if op == 0 {
  3457  						break
  3458  					}
  3459  					ctxt.AsmBuf.Put1(byte(op))
  3460  				}
  3461  				asmand(ctxt, p, &p.From, &p.To)
  3462  
  3463  			case Zmb_r:
  3464  				bytereg(&p.From, &p.Ft)
  3465  				fallthrough
  3466  
  3467  			case Zm_r:
  3468  				ctxt.AsmBuf.Put1(byte(op))
  3469  				asmand(ctxt, p, &p.From, &p.To)
  3470  
  3471  			case Zm2_r:
  3472  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3473  				asmand(ctxt, p, &p.From, &p.To)
  3474  
  3475  			case Zm_r_xm:
  3476  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3477  				asmand(ctxt, p, &p.From, &p.To)
  3478  
  3479  			case Zm_r_xm_nr:
  3480  				ctxt.Rexflag = 0
  3481  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3482  				asmand(ctxt, p, &p.From, &p.To)
  3483  
  3484  			case Zm_r_i_xm:
  3485  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3486  				asmand(ctxt, p, &p.From, p.From3)
  3487  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3488  
  3489  			case Zibm_r, Zibr_m:
  3490  				for {
  3491  					tmp1 := z
  3492  					z++
  3493  					op = int(o.op[tmp1])
  3494  					if op == 0 {
  3495  						break
  3496  					}
  3497  					ctxt.AsmBuf.Put1(byte(op))
  3498  				}
  3499  				if yt.zcase == Zibr_m {
  3500  					asmand(ctxt, p, &p.To, p.From3)
  3501  				} else {
  3502  					asmand(ctxt, p, p.From3, &p.To)
  3503  				}
  3504  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3505  
  3506  			case Zaut_r:
  3507  				ctxt.AsmBuf.Put1(0x8d) // leal
  3508  				if p.From.Type != obj.TYPE_ADDR {
  3509  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3510  				}
  3511  				p.From.Type = obj.TYPE_MEM
  3512  				asmand(ctxt, p, &p.From, &p.To)
  3513  				p.From.Type = obj.TYPE_ADDR
  3514  
  3515  			case Zm_o:
  3516  				ctxt.AsmBuf.Put1(byte(op))
  3517  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3518  
  3519  			case Zr_m:
  3520  				ctxt.AsmBuf.Put1(byte(op))
  3521  				asmand(ctxt, p, &p.To, &p.From)
  3522  
  3523  			case Zvex_rm_v_r:
  3524  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3525  				asmand(ctxt, p, &p.From, &p.To)
  3526  
  3527  			case Zvex_i_r_v:
  3528  				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3529  				regnum := byte(0x7)
  3530  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3531  					regnum &= byte(p.From3.Reg - REG_X0)
  3532  				} else {
  3533  					regnum &= byte(p.From3.Reg - REG_Y0)
  3534  				}
  3535  				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
  3536  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3537  
  3538  			case Zvex_i_rm_v_r:
  3539  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3540  				asmand(ctxt, p, &p.From, &p.To)
  3541  				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
  3542  
  3543  			case Zvex_i_rm_r:
  3544  				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3545  				asmand(ctxt, p, p.From3, &p.To)
  3546  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3547  
  3548  			case Zvex_v_rm_r:
  3549  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3550  				asmand(ctxt, p, p.From3, &p.To)
  3551  
  3552  			case Zvex_r_v_rm:
  3553  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3554  				asmand(ctxt, p, &p.To, &p.From)
  3555  
  3556  			case Zr_m_xm:
  3557  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3558  				asmand(ctxt, p, &p.To, &p.From)
  3559  
  3560  			case Zr_m_xm_nr:
  3561  				ctxt.Rexflag = 0
  3562  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3563  				asmand(ctxt, p, &p.To, &p.From)
  3564  
  3565  			case Zo_m:
  3566  				ctxt.AsmBuf.Put1(byte(op))
  3567  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3568  
  3569  			case Zcallindreg:
  3570  				r = obj.Addrel(ctxt.Cursym)
  3571  				r.Off = int32(p.Pc)
  3572  				r.Type = obj.R_CALLIND
  3573  				r.Siz = 0
  3574  				fallthrough
  3575  
  3576  			case Zo_m64:
  3577  				ctxt.AsmBuf.Put1(byte(op))
  3578  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3579  
  3580  			case Zm_ibo:
  3581  				ctxt.AsmBuf.Put1(byte(op))
  3582  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3583  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3584  
  3585  			case Zibo_m:
  3586  				ctxt.AsmBuf.Put1(byte(op))
  3587  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3588  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3589  
  3590  			case Zibo_m_xm:
  3591  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3592  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3593  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3594  
  3595  			case Z_ib, Zib_:
  3596  				if yt.zcase == Zib_ {
  3597  					a = &p.From
  3598  				} else {
  3599  					a = &p.To
  3600  				}
  3601  				ctxt.AsmBuf.Put1(byte(op))
  3602  				if p.As == AXABORT {
  3603  					ctxt.AsmBuf.Put1(o.op[z+1])
  3604  				}
  3605  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3606  
  3607  			case Zib_rp:
  3608  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3609  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3610  
  3611  			case Zil_rp:
  3612  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3613  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3614  				if o.prefix == Pe {
  3615  					v = vaddr(ctxt, p, &p.From, nil)
  3616  					ctxt.AsmBuf.PutInt16(int16(v))
  3617  				} else {
  3618  					relput4(ctxt, p, &p.From)
  3619  				}
  3620  
  3621  			case Zo_iw:
  3622  				ctxt.AsmBuf.Put1(byte(op))
  3623  				if p.From.Type != obj.TYPE_NONE {
  3624  					v = vaddr(ctxt, p, &p.From, nil)
  3625  					ctxt.AsmBuf.PutInt16(int16(v))
  3626  				}
  3627  
  3628  			case Ziq_rp:
  3629  				v = vaddr(ctxt, p, &p.From, &rel)
  3630  				l = int(v >> 32)
  3631  				if l == 0 && rel.Siz != 8 {
  3632  					//p->mark |= 0100;
  3633  					//print("zero: %llux %v\n", v, p);
  3634  					ctxt.Rexflag &^= (0x40 | Rxw)
  3635  
  3636  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3637  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3638  					if rel.Type != 0 {
  3639  						r = obj.Addrel(ctxt.Cursym)
  3640  						*r = rel
  3641  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3642  					}
  3643  
  3644  					ctxt.AsmBuf.PutInt32(int32(v))
  3645  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3646  
  3647  					//p->mark |= 0100;
  3648  					//print("sign: %llux %v\n", v, p);
  3649  					ctxt.AsmBuf.Put1(0xc7)
  3650  					asmando(ctxt, p, &p.To, 0)
  3651  
  3652  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3653  				} else {
  3654  					//print("all: %llux %v\n", v, p);
  3655  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3656  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3657  					if rel.Type != 0 {
  3658  						r = obj.Addrel(ctxt.Cursym)
  3659  						*r = rel
  3660  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3661  					}
  3662  
  3663  					ctxt.AsmBuf.PutInt64(v)
  3664  				}
  3665  
  3666  			case Zib_rr:
  3667  				ctxt.AsmBuf.Put1(byte(op))
  3668  				asmand(ctxt, p, &p.To, &p.To)
  3669  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3670  
  3671  			case Z_il, Zil_:
  3672  				if yt.zcase == Zil_ {
  3673  					a = &p.From
  3674  				} else {
  3675  					a = &p.To
  3676  				}
  3677  				ctxt.AsmBuf.Put1(byte(op))
  3678  				if o.prefix == Pe {
  3679  					v = vaddr(ctxt, p, a, nil)
  3680  					ctxt.AsmBuf.PutInt16(int16(v))
  3681  				} else {
  3682  					relput4(ctxt, p, a)
  3683  				}
  3684  
  3685  			case Zm_ilo, Zilo_m:
  3686  				ctxt.AsmBuf.Put1(byte(op))
  3687  				if yt.zcase == Zilo_m {
  3688  					a = &p.From
  3689  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3690  				} else {
  3691  					a = &p.To
  3692  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3693  				}
  3694  
  3695  				if o.prefix == Pe {
  3696  					v = vaddr(ctxt, p, a, nil)
  3697  					ctxt.AsmBuf.PutInt16(int16(v))
  3698  				} else {
  3699  					relput4(ctxt, p, a)
  3700  				}
  3701  
  3702  			case Zil_rr:
  3703  				ctxt.AsmBuf.Put1(byte(op))
  3704  				asmand(ctxt, p, &p.To, &p.To)
  3705  				if o.prefix == Pe {
  3706  					v = vaddr(ctxt, p, &p.From, nil)
  3707  					ctxt.AsmBuf.PutInt16(int16(v))
  3708  				} else {
  3709  					relput4(ctxt, p, &p.From)
  3710  				}
  3711  
  3712  			case Z_rp:
  3713  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3714  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3715  
  3716  			case Zrp_:
  3717  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3718  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3719  
  3720  			case Zclr:
  3721  				ctxt.Rexflag &^= Pw
  3722  				ctxt.AsmBuf.Put1(byte(op))
  3723  				asmand(ctxt, p, &p.To, &p.To)
  3724  
  3725  			case Zcallcon, Zjmpcon:
  3726  				if yt.zcase == Zcallcon {
  3727  					ctxt.AsmBuf.Put1(byte(op))
  3728  				} else {
  3729  					ctxt.AsmBuf.Put1(o.op[z+1])
  3730  				}
  3731  				r = obj.Addrel(ctxt.Cursym)
  3732  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3733  				r.Type = obj.R_PCREL
  3734  				r.Siz = 4
  3735  				r.Add = p.To.Offset
  3736  				ctxt.AsmBuf.PutInt32(0)
  3737  
  3738  			case Zcallind:
  3739  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3740  				r = obj.Addrel(ctxt.Cursym)
  3741  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3742  				r.Type = obj.R_ADDR
  3743  				r.Siz = 4
  3744  				r.Add = p.To.Offset
  3745  				r.Sym = p.To.Sym
  3746  				ctxt.AsmBuf.PutInt32(0)
  3747  
  3748  			case Zcall, Zcallduff:
  3749  				if p.To.Sym == nil {
  3750  					ctxt.Diag("call without target")
  3751  					log.Fatalf("bad code")
  3752  				}
  3753  
  3754  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3755  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3756  				}
  3757  
  3758  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3759  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3760  					// (the call jumps into the middle of the function).
  3761  					// This makes it possible to see call sites for duffcopy/duffzero in
  3762  					// BP-based profiling tools like Linux perf (which is the
  3763  					// whole point of obj.Framepointer_enabled).
  3764  					// MOVQ BP, -16(SP)
  3765  					// LEAQ -16(SP), BP
  3766  					ctxt.AsmBuf.Put(bpduff1)
  3767  				}
  3768  				ctxt.AsmBuf.Put1(byte(op))
  3769  				r = obj.Addrel(ctxt.Cursym)
  3770  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3771  				r.Sym = p.To.Sym
  3772  				r.Add = p.To.Offset
  3773  				r.Type = obj.R_CALL
  3774  				r.Siz = 4
  3775  				ctxt.AsmBuf.PutInt32(0)
  3776  
  3777  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3778  					// Pop BP pushed above.
  3779  					// MOVQ 0(BP), BP
  3780  					ctxt.AsmBuf.Put(bpduff2)
  3781  				}
  3782  
  3783  			// TODO: jump across functions needs reloc
  3784  			case Zbr, Zjmp, Zloop:
  3785  				if p.As == AXBEGIN {
  3786  					ctxt.AsmBuf.Put1(byte(op))
  3787  				}
  3788  				if p.To.Sym != nil {
  3789  					if yt.zcase != Zjmp {
  3790  						ctxt.Diag("branch to ATEXT")
  3791  						log.Fatalf("bad code")
  3792  					}
  3793  
  3794  					ctxt.AsmBuf.Put1(o.op[z+1])
  3795  					r = obj.Addrel(ctxt.Cursym)
  3796  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3797  					r.Sym = p.To.Sym
  3798  					r.Type = obj.R_PCREL
  3799  					r.Siz = 4
  3800  					ctxt.AsmBuf.PutInt32(0)
  3801  					break
  3802  				}
  3803  
  3804  				// Assumes q is in this function.
  3805  				// TODO: Check in input, preserve in brchain.
  3806  
  3807  				// Fill in backward jump now.
  3808  				q = p.Pcond
  3809  
  3810  				if q == nil {
  3811  					ctxt.Diag("jmp/branch/loop without target")
  3812  					log.Fatalf("bad code")
  3813  				}
  3814  
  3815  				if p.Back&1 != 0 {
  3816  					v = q.Pc - (p.Pc + 2)
  3817  					if v >= -128 && p.As != AXBEGIN {
  3818  						if p.As == AJCXZL {
  3819  							ctxt.AsmBuf.Put1(0x67)
  3820  						}
  3821  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3822  					} else if yt.zcase == Zloop {
  3823  						ctxt.Diag("loop too far: %v", p)
  3824  					} else {
  3825  						v -= 5 - 2
  3826  						if p.As == AXBEGIN {
  3827  							v--
  3828  						}
  3829  						if yt.zcase == Zbr {
  3830  							ctxt.AsmBuf.Put1(0x0f)
  3831  							v--
  3832  						}
  3833  
  3834  						ctxt.AsmBuf.Put1(o.op[z+1])
  3835  						ctxt.AsmBuf.PutInt32(int32(v))
  3836  					}
  3837  
  3838  					break
  3839  				}
  3840  
  3841  				// Annotate target; will fill in later.
  3842  				p.Forwd = q.Rel
  3843  
  3844  				q.Rel = p
  3845  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3846  					if p.As == AJCXZL {
  3847  						ctxt.AsmBuf.Put1(0x67)
  3848  					}
  3849  					ctxt.AsmBuf.Put2(byte(op), 0)
  3850  				} else if yt.zcase == Zloop {
  3851  					ctxt.Diag("loop too far: %v", p)
  3852  				} else {
  3853  					if yt.zcase == Zbr {
  3854  						ctxt.AsmBuf.Put1(0x0f)
  3855  					}
  3856  					ctxt.AsmBuf.Put1(o.op[z+1])
  3857  					ctxt.AsmBuf.PutInt32(0)
  3858  				}
  3859  
  3860  				break
  3861  
  3862  			/*
  3863  				v = q->pc - p->pc - 2;
  3864  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3865  					*ctxt->andptr++ = op;
  3866  					*ctxt->andptr++ = v;
  3867  				} else {
  3868  					v -= 5-2;
  3869  					if(yt.zcase == Zbr) {
  3870  						*ctxt->andptr++ = 0x0f;
  3871  						v--;
  3872  					}
  3873  					*ctxt->andptr++ = o->op[z+1];
  3874  					*ctxt->andptr++ = v;
  3875  					*ctxt->andptr++ = v>>8;
  3876  					*ctxt->andptr++ = v>>16;
  3877  					*ctxt->andptr++ = v>>24;
  3878  				}
  3879  			*/
  3880  
  3881  			case Zbyte:
  3882  				v = vaddr(ctxt, p, &p.From, &rel)
  3883  				if rel.Siz != 0 {
  3884  					rel.Siz = uint8(op)
  3885  					r = obj.Addrel(ctxt.Cursym)
  3886  					*r = rel
  3887  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3888  				}
  3889  
  3890  				ctxt.AsmBuf.Put1(byte(v))
  3891  				if op > 1 {
  3892  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3893  					if op > 2 {
  3894  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3895  						if op > 4 {
  3896  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3897  						}
  3898  					}
  3899  				}
  3900  			}
  3901  
  3902  			return
  3903  		}
  3904  		z += int(yt.zoffset) + xo
  3905  	}
  3906  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3907  		var pp obj.Prog
  3908  		var t []byte
  3909  		if p.As == mo[0].as {
  3910  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3911  				t = mo[0].op[:]
  3912  				switch mo[0].code {
  3913  				default:
  3914  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3915  
  3916  				case 0: /* lit */
  3917  					for z = 0; t[z] != E; z++ {
  3918  						ctxt.AsmBuf.Put1(t[z])
  3919  					}
  3920  
  3921  				case 1: /* r,m */
  3922  					ctxt.AsmBuf.Put1(t[0])
  3923  					asmando(ctxt, p, &p.To, int(t[1]))
  3924  
  3925  				case 2: /* m,r */
  3926  					ctxt.AsmBuf.Put1(t[0])
  3927  					asmando(ctxt, p, &p.From, int(t[1]))
  3928  
  3929  				case 3: /* r,m - 2op */
  3930  					ctxt.AsmBuf.Put2(t[0], t[1])
  3931  					asmando(ctxt, p, &p.To, int(t[2]))
  3932  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3933  
  3934  				case 4: /* m,r - 2op */
  3935  					ctxt.AsmBuf.Put2(t[0], t[1])
  3936  					asmando(ctxt, p, &p.From, int(t[2]))
  3937  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3938  
  3939  				case 5: /* load full pointer, trash heap */
  3940  					if t[0] != 0 {
  3941  						ctxt.AsmBuf.Put1(t[0])
  3942  					}
  3943  					switch p.To.Index {
  3944  					default:
  3945  						goto bad
  3946  
  3947  					case REG_DS:
  3948  						ctxt.AsmBuf.Put1(0xc5)
  3949  
  3950  					case REG_SS:
  3951  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3952  
  3953  					case REG_ES:
  3954  						ctxt.AsmBuf.Put1(0xc4)
  3955  
  3956  					case REG_FS:
  3957  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3958  
  3959  					case REG_GS:
  3960  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3961  					}
  3962  
  3963  					asmand(ctxt, p, &p.From, &p.To)
  3964  
  3965  				case 6: /* double shift */
  3966  					if t[0] == Pw {
  3967  						if p.Mode != 64 {
  3968  							ctxt.Diag("asmins: illegal 64: %v", p)
  3969  						}
  3970  						ctxt.Rexflag |= Pw
  3971  						t = t[1:]
  3972  					} else if t[0] == Pe {
  3973  						ctxt.AsmBuf.Put1(Pe)
  3974  						t = t[1:]
  3975  					}
  3976  
  3977  					switch p.From.Type {
  3978  					default:
  3979  						goto bad
  3980  
  3981  					case obj.TYPE_CONST:
  3982  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3983  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3984  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3985  
  3986  					case obj.TYPE_REG:
  3987  						switch p.From.Reg {
  3988  						default:
  3989  							goto bad
  3990  
  3991  						case REG_CL, REG_CX:
  3992  							ctxt.AsmBuf.Put2(0x0f, t[1])
  3993  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3994  						}
  3995  					}
  3996  
  3997  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3998  				// where you load the TLS base register into a register and then index off that
  3999  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4000  				// are handled in prefixof above and should not be listed here.
  4001  				case 7: /* mov tls, r */
  4002  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  4003  						ctxt.Diag("invalid load of TLS: %v", p)
  4004  					}
  4005  
  4006  					if p.Mode == 32 {
  4007  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4008  						// where you load the TLS base register into a register and then index off that
  4009  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4010  						// are handled in prefixof above and should not be listed here.
  4011  						switch ctxt.Headtype {
  4012  						default:
  4013  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4014  
  4015  						case obj.Hlinux,
  4016  							obj.Hnacl:
  4017  							if ctxt.Flag_shared {
  4018  								// Note that this is not generating the same insns as the other cases.
  4019  								//     MOV TLS, R_to
  4020  								// becomes
  4021  								//     call __x86.get_pc_thunk.cx
  4022  								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
  4023  								// which is encoded as
  4024  								//     call __x86.get_pc_thunk.cx
  4025  								//     movq 0(%ecx), R_to
  4026  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4027  								// is g, which we can't check here, but will when we assemble the second
  4028  								// instruction.
  4029  								ctxt.AsmBuf.Put1(0xe8)
  4030  								r = obj.Addrel(ctxt.Cursym)
  4031  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4032  								r.Type = obj.R_CALL
  4033  								r.Siz = 4
  4034  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
  4035  								ctxt.AsmBuf.PutInt32(0)
  4036  
  4037  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[REG_CX]|(reg[p.To.Reg]<<3)))
  4038  								r = obj.Addrel(ctxt.Cursym)
  4039  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4040  								r.Type = obj.R_TLS_IE
  4041  								r.Siz = 4
  4042  								r.Add = 2
  4043  								ctxt.AsmBuf.PutInt32(0)
  4044  							} else {
  4045  								// ELF TLS base is 0(GS).
  4046  								pp.From = p.From
  4047  
  4048  								pp.From.Type = obj.TYPE_MEM
  4049  								pp.From.Reg = REG_GS
  4050  								pp.From.Offset = 0
  4051  								pp.From.Index = REG_NONE
  4052  								pp.From.Scale = 0
  4053  								ctxt.AsmBuf.Put2(0x65, // GS
  4054  									0x8B)
  4055  								asmand(ctxt, p, &pp.From, &p.To)
  4056  							}
  4057  						case obj.Hplan9:
  4058  							if ctxt.Plan9privates == nil {
  4059  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4060  							}
  4061  							pp.From = obj.Addr{}
  4062  							pp.From.Type = obj.TYPE_MEM
  4063  							pp.From.Name = obj.NAME_EXTERN
  4064  							pp.From.Sym = ctxt.Plan9privates
  4065  							pp.From.Offset = 0
  4066  							pp.From.Index = REG_NONE
  4067  							ctxt.AsmBuf.Put1(0x8B)
  4068  							asmand(ctxt, p, &pp.From, &p.To)
  4069  
  4070  						case obj.Hwindows:
  4071  							// Windows TLS base is always 0x14(FS).
  4072  							pp.From = p.From
  4073  
  4074  							pp.From.Type = obj.TYPE_MEM
  4075  							pp.From.Reg = REG_FS
  4076  							pp.From.Offset = 0x14
  4077  							pp.From.Index = REG_NONE
  4078  							pp.From.Scale = 0
  4079  							ctxt.AsmBuf.Put2(0x64, // FS
  4080  								0x8B)
  4081  							asmand(ctxt, p, &pp.From, &p.To)
  4082  						}
  4083  						break
  4084  					}
  4085  
  4086  					switch ctxt.Headtype {
  4087  					default:
  4088  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4089  
  4090  					case obj.Hlinux:
  4091  						if !ctxt.Flag_shared {
  4092  							log.Fatalf("unknown TLS base location for linux without -shared")
  4093  						}
  4094  						// Note that this is not generating the same insn as the other cases.
  4095  						//     MOV TLS, R_to
  4096  						// becomes
  4097  						//     movq g@gottpoff(%rip), R_to
  4098  						// which is encoded as
  4099  						//     movq 0(%rip), R_to
  4100  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4101  						// is g, which we can't check here, but will when we assemble the second
  4102  						// instruction.
  4103  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4104  
  4105  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4106  						r = obj.Addrel(ctxt.Cursym)
  4107  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4108  						r.Type = obj.R_TLS_IE
  4109  						r.Siz = 4
  4110  						r.Add = -4
  4111  						ctxt.AsmBuf.PutInt32(0)
  4112  
  4113  					case obj.Hplan9:
  4114  						if ctxt.Plan9privates == nil {
  4115  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4116  						}
  4117  						pp.From = obj.Addr{}
  4118  						pp.From.Type = obj.TYPE_MEM
  4119  						pp.From.Name = obj.NAME_EXTERN
  4120  						pp.From.Sym = ctxt.Plan9privates
  4121  						pp.From.Offset = 0
  4122  						pp.From.Index = REG_NONE
  4123  						ctxt.Rexflag |= Pw
  4124  						ctxt.AsmBuf.Put1(0x8B)
  4125  						asmand(ctxt, p, &pp.From, &p.To)
  4126  
  4127  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4128  						// TLS base is 0(FS).
  4129  						pp.From = p.From
  4130  
  4131  						pp.From.Type = obj.TYPE_MEM
  4132  						pp.From.Name = obj.NAME_NONE
  4133  						pp.From.Reg = REG_NONE
  4134  						pp.From.Offset = 0
  4135  						pp.From.Index = REG_NONE
  4136  						pp.From.Scale = 0
  4137  						ctxt.Rexflag |= Pw
  4138  						ctxt.AsmBuf.Put2(0x64, // FS
  4139  							0x8B)
  4140  						asmand(ctxt, p, &pp.From, &p.To)
  4141  
  4142  					case obj.Hwindows:
  4143  						// Windows TLS base is always 0x28(GS).
  4144  						pp.From = p.From
  4145  
  4146  						pp.From.Type = obj.TYPE_MEM
  4147  						pp.From.Name = obj.NAME_NONE
  4148  						pp.From.Reg = REG_GS
  4149  						pp.From.Offset = 0x28
  4150  						pp.From.Index = REG_NONE
  4151  						pp.From.Scale = 0
  4152  						ctxt.Rexflag |= Pw
  4153  						ctxt.AsmBuf.Put2(0x65, // GS
  4154  							0x8B)
  4155  						asmand(ctxt, p, &pp.From, &p.To)
  4156  					}
  4157  				}
  4158  				return
  4159  			}
  4160  		}
  4161  	}
  4162  	goto bad
  4163  
  4164  bad:
  4165  	if p.Mode != 64 {
  4166  		/*
  4167  		 * here, the assembly has failed.
  4168  		 * if its a byte instruction that has
  4169  		 * unaddressable registers, try to
  4170  		 * exchange registers and reissue the
  4171  		 * instruction with the operands renamed.
  4172  		 */
  4173  		pp := *p
  4174  
  4175  		unbytereg(&pp.From, &pp.Ft)
  4176  		unbytereg(&pp.To, &pp.Tt)
  4177  
  4178  		z := int(p.From.Reg)
  4179  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4180  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4181  			// For now, different to keep bit-for-bit compatibility.
  4182  			if p.Mode == 32 {
  4183  				breg := byteswapreg(ctxt, &p.To)
  4184  				if breg != REG_AX {
  4185  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4186  					asmando(ctxt, p, &p.From, reg[breg])
  4187  					subreg(&pp, z, breg)
  4188  					doasm(ctxt, &pp)
  4189  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4190  					asmando(ctxt, p, &p.From, reg[breg])
  4191  				} else {
  4192  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4193  					subreg(&pp, z, REG_AX)
  4194  					doasm(ctxt, &pp)
  4195  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4196  				}
  4197  				return
  4198  			}
  4199  
  4200  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4201  				// We certainly don't want to exchange
  4202  				// with AX if the op is MUL or DIV.
  4203  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4204  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4205  				subreg(&pp, z, REG_BX)
  4206  				doasm(ctxt, &pp)
  4207  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4208  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4209  			} else {
  4210  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4211  				subreg(&pp, z, REG_AX)
  4212  				doasm(ctxt, &pp)
  4213  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4214  			}
  4215  			return
  4216  		}
  4217  
  4218  		z = int(p.To.Reg)
  4219  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4220  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4221  			// For now, different to keep bit-for-bit compatibility.
  4222  			if p.Mode == 32 {
  4223  				breg := byteswapreg(ctxt, &p.From)
  4224  				if breg != REG_AX {
  4225  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4226  					asmando(ctxt, p, &p.To, reg[breg])
  4227  					subreg(&pp, z, breg)
  4228  					doasm(ctxt, &pp)
  4229  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4230  					asmando(ctxt, p, &p.To, reg[breg])
  4231  				} else {
  4232  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4233  					subreg(&pp, z, REG_AX)
  4234  					doasm(ctxt, &pp)
  4235  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4236  				}
  4237  				return
  4238  			}
  4239  
  4240  			if isax(&p.From) {
  4241  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4242  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4243  				subreg(&pp, z, REG_BX)
  4244  				doasm(ctxt, &pp)
  4245  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4246  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4247  			} else {
  4248  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4249  				subreg(&pp, z, REG_AX)
  4250  				doasm(ctxt, &pp)
  4251  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4252  			}
  4253  			return
  4254  		}
  4255  	}
  4256  
  4257  	ctxt.Diag("invalid instruction: %v", p)
  4258  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4259  	return
  4260  }
  4261  
  4262  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4263  // which is not referenced in a.
  4264  // If a is empty, it returns BX to account for MULB-like instructions
  4265  // that might use DX and AX.
  4266  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4267  	cand := 1
  4268  	canc := cand
  4269  	canb := canc
  4270  	cana := canb
  4271  
  4272  	if a.Type == obj.TYPE_NONE {
  4273  		cand = 0
  4274  		cana = cand
  4275  	}
  4276  
  4277  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4278  		switch a.Reg {
  4279  		case REG_NONE:
  4280  			cand = 0
  4281  			cana = cand
  4282  
  4283  		case REG_AX, REG_AL, REG_AH:
  4284  			cana = 0
  4285  
  4286  		case REG_BX, REG_BL, REG_BH:
  4287  			canb = 0
  4288  
  4289  		case REG_CX, REG_CL, REG_CH:
  4290  			canc = 0
  4291  
  4292  		case REG_DX, REG_DL, REG_DH:
  4293  			cand = 0
  4294  		}
  4295  	}
  4296  
  4297  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4298  		switch a.Index {
  4299  		case REG_AX:
  4300  			cana = 0
  4301  
  4302  		case REG_BX:
  4303  			canb = 0
  4304  
  4305  		case REG_CX:
  4306  			canc = 0
  4307  
  4308  		case REG_DX:
  4309  			cand = 0
  4310  		}
  4311  	}
  4312  
  4313  	if cana != 0 {
  4314  		return REG_AX
  4315  	}
  4316  	if canb != 0 {
  4317  		return REG_BX
  4318  	}
  4319  	if canc != 0 {
  4320  		return REG_CX
  4321  	}
  4322  	if cand != 0 {
  4323  		return REG_DX
  4324  	}
  4325  
  4326  	ctxt.Diag("impossible byte register")
  4327  	log.Fatalf("bad code")
  4328  	return 0
  4329  }
  4330  
  4331  func isbadbyte(a *obj.Addr) bool {
  4332  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4333  }
  4334  
  4335  var naclret = []uint8{
  4336  	0x5e, // POPL SI
  4337  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4338  	0x83,
  4339  	0xe6,
  4340  	0xe0, // ANDL $~31, SI
  4341  	0x4c,
  4342  	0x01,
  4343  	0xfe, // ADDQ R15, SI
  4344  	0xff,
  4345  	0xe6, // JMP SI
  4346  }
  4347  
  4348  var naclret8 = []uint8{
  4349  	0x5d, // POPL BP
  4350  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4351  	0x83,
  4352  	0xe5,
  4353  	0xe0, // ANDL $~31, BP
  4354  	0xff,
  4355  	0xe5, // JMP BP
  4356  }
  4357  
  4358  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4359  
  4360  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4361  
  4362  var naclmovs = []uint8{
  4363  	0x89,
  4364  	0xf6, // MOVL SI, SI
  4365  	0x49,
  4366  	0x8d,
  4367  	0x34,
  4368  	0x37, // LEAQ (R15)(SI*1), SI
  4369  	0x89,
  4370  	0xff, // MOVL DI, DI
  4371  	0x49,
  4372  	0x8d,
  4373  	0x3c,
  4374  	0x3f, // LEAQ (R15)(DI*1), DI
  4375  }
  4376  
  4377  var naclstos = []uint8{
  4378  	0x89,
  4379  	0xff, // MOVL DI, DI
  4380  	0x49,
  4381  	0x8d,
  4382  	0x3c,
  4383  	0x3f, // LEAQ (R15)(DI*1), DI
  4384  }
  4385  
  4386  func nacltrunc(ctxt *obj.Link, reg int) {
  4387  	if reg >= REG_R8 {
  4388  		ctxt.AsmBuf.Put1(0x45)
  4389  	}
  4390  	reg = (reg - REG_AX) & 7
  4391  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4392  }
  4393  
  4394  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4395  	ctxt.AsmBuf.Reset()
  4396  	ctxt.Asmode = int(p.Mode)
  4397  
  4398  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4399  		switch p.As {
  4400  		case obj.ARET:
  4401  			ctxt.AsmBuf.Put(naclret8)
  4402  			return
  4403  
  4404  		case obj.ACALL,
  4405  			obj.AJMP:
  4406  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4407  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4408  			}
  4409  
  4410  		case AINT:
  4411  			ctxt.AsmBuf.Put1(0xf4)
  4412  			return
  4413  		}
  4414  	}
  4415  
  4416  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4417  		if p.As == AREP {
  4418  			ctxt.Rep++
  4419  			return
  4420  		}
  4421  
  4422  		if p.As == AREPN {
  4423  			ctxt.Repn++
  4424  			return
  4425  		}
  4426  
  4427  		if p.As == ALOCK {
  4428  			ctxt.Lock++
  4429  			return
  4430  		}
  4431  
  4432  		if p.As != ALEAQ && p.As != ALEAL {
  4433  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4434  				nacltrunc(ctxt, int(p.From.Index))
  4435  			}
  4436  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4437  				nacltrunc(ctxt, int(p.To.Index))
  4438  			}
  4439  		}
  4440  
  4441  		switch p.As {
  4442  		case obj.ARET:
  4443  			ctxt.AsmBuf.Put(naclret)
  4444  			return
  4445  
  4446  		case obj.ACALL,
  4447  			obj.AJMP:
  4448  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4449  				// ANDL $~31, reg
  4450  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4451  				// ADDQ R15, reg
  4452  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4453  			}
  4454  
  4455  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4456  				// ANDL $~31, reg
  4457  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4458  				// ADDQ R15, reg
  4459  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4460  			}
  4461  
  4462  		case AINT:
  4463  			ctxt.AsmBuf.Put1(0xf4)
  4464  			return
  4465  
  4466  		case ASCASB,
  4467  			ASCASW,
  4468  			ASCASL,
  4469  			ASCASQ,
  4470  			ASTOSB,
  4471  			ASTOSW,
  4472  			ASTOSL,
  4473  			ASTOSQ:
  4474  			ctxt.AsmBuf.Put(naclstos)
  4475  
  4476  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4477  			ctxt.AsmBuf.Put(naclmovs)
  4478  		}
  4479  
  4480  		if ctxt.Rep != 0 {
  4481  			ctxt.AsmBuf.Put1(0xf3)
  4482  			ctxt.Rep = 0
  4483  		}
  4484  
  4485  		if ctxt.Repn != 0 {
  4486  			ctxt.AsmBuf.Put1(0xf2)
  4487  			ctxt.Repn = 0
  4488  		}
  4489  
  4490  		if ctxt.Lock != 0 {
  4491  			ctxt.AsmBuf.Put1(0xf0)
  4492  			ctxt.Lock = 0
  4493  		}
  4494  	}
  4495  
  4496  	ctxt.Rexflag = 0
  4497  	ctxt.Vexflag = 0
  4498  	mark := ctxt.AsmBuf.Len()
  4499  	ctxt.Asmode = int(p.Mode)
  4500  	doasm(ctxt, p)
  4501  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4502  		/*
  4503  		 * as befits the whole approach of the architecture,
  4504  		 * the rex prefix must appear before the first opcode byte
  4505  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4506  		 * before the 0f opcode escape!), or it might be ignored.
  4507  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4508  		 */
  4509  		if p.Mode != 64 {
  4510  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4511  		}
  4512  		n := ctxt.AsmBuf.Len()
  4513  		var np int
  4514  		for np = mark; np < n; np++ {
  4515  			c := ctxt.AsmBuf.Peek(np)
  4516  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4517  				break
  4518  			}
  4519  		}
  4520  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4521  	}
  4522  
  4523  	n := ctxt.AsmBuf.Len()
  4524  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4525  		r := &ctxt.Cursym.R[i]
  4526  		if int64(r.Off) < p.Pc {
  4527  			break
  4528  		}
  4529  		if ctxt.Rexflag != 0 {
  4530  			r.Off++
  4531  		}
  4532  		if r.Type == obj.R_PCREL {
  4533  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4534  				// PC-relative addressing is relative to the end of the instruction,
  4535  				// but the relocations applied by the linker are relative to the end
  4536  				// of the relocation. Because immediate instruction
  4537  				// arguments can follow the PC-relative memory reference in the
  4538  				// instruction encoding, the two may not coincide. In this case,
  4539  				// adjust addend so that linker can keep relocating relative to the
  4540  				// end of the relocation.
  4541  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4542  			} else if p.Mode == 32 {
  4543  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4544  				// assumes that the previous instruction loaded the PC of the end
  4545  				// of that instruction into CX, so the adjustment is relative to
  4546  				// that.
  4547  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4548  			}
  4549  		}
  4550  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4551  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4552  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4553  		}
  4554  
  4555  	}
  4556  
  4557  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4558  		switch p.To.Reg {
  4559  		case REG_SP:
  4560  			ctxt.AsmBuf.Put(naclspfix)
  4561  		case REG_BP:
  4562  			ctxt.AsmBuf.Put(naclbpfix)
  4563  		}
  4564  	}
  4565  }