github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	funcAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zvex_i_rm_r
   212  	Zvex_i_r_v
   213  	Zvex_i_rm_v_r
   214  	Zmax
   215  )
   216  
   217  const (
   218  	Px   = 0
   219  	Px1  = 1    // symbolic; exact value doesn't matter
   220  	P32  = 0x32 /* 32-bit only */
   221  	Pe   = 0x66 /* operand escape */
   222  	Pm   = 0x0f /* 2byte opcode escape */
   223  	Pq   = 0xff /* both escapes: 66 0f */
   224  	Pb   = 0xfe /* byte operands */
   225  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   226  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   227  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   228  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   229  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   230  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   231  	Pw   = 0x48 /* Rex.w */
   232  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   233  	Py   = 0x80 /* defaults to 64-bit mode */
   234  	Py1  = 0x81 // symbolic; exact value doesn't matter
   235  	Py3  = 0x83 // symbolic; exact value doesn't matter
   236  	Pvex = 0x84 // symbolic: exact value doesn't matter
   237  
   238  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   239  	Rxr = 1 << 2 /* extend modrm reg */
   240  	Rxx = 1 << 1 /* extend sib index */
   241  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   242  )
   243  
   244  const (
   245  	// Encoding for VEX prefix in tables.
   246  	// The P, L, and W fields are chosen to match
   247  	// their eventual locations in the VEX prefix bytes.
   248  
   249  	// P field - 2 bits
   250  	vex66 = 1 << 0
   251  	vexF3 = 2 << 0
   252  	vexF2 = 3 << 0
   253  	// L field - 1 bit
   254  	vexLZ  = 0 << 2
   255  	vexLIG = 0 << 2
   256  	vex128 = 0 << 2
   257  	vex256 = 1 << 2
   258  	// W field - 1 bit
   259  	vexWIG = 0 << 7
   260  	vexW0  = 0 << 7
   261  	vexW1  = 1 << 7
   262  	// M field - 5 bits, but mostly reserved; we can store up to 4
   263  	vex0F   = 1 << 3
   264  	vex0F38 = 2 << 3
   265  	vex0F3A = 3 << 3
   266  
   267  	// Combinations used in the manual.
   268  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   269  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   270  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   271  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   272  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   273  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   274  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   275  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   276  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   277  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   278  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   279  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   280  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   281  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   282  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   283  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   284  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   285  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   286  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   287  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   288  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   289  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   290  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   291  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   292  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   293  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   294  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   295  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   296  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   297  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   298  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   299  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   300  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   301  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   302  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   303  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   304  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   305  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   306  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   307  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   308  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   309  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   310  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   311  )
   312  
   313  var ycover [Ymax * Ymax]uint8
   314  
   315  var reg [MAXREG]int
   316  
   317  var regrex [MAXREG + 1]int
   318  
   319  var ynone = []ytab{
   320  	{Ynone, Ynone, Ynone, Zlit, 1},
   321  }
   322  
   323  var ytext = []ytab{
   324  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   325  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   326  }
   327  
   328  var ynop = []ytab{
   329  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   330  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   331  	{Ynone, Ynone, Yml, Zpseudo, 0},
   332  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   333  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   334  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   335  	{Yml, Ynone, Ynone, Zpseudo, 0},
   336  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   337  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   338  }
   339  
   340  var yfuncdata = []ytab{
   341  	{Yi32, Ynone, Ym, Zpseudo, 0},
   342  }
   343  
   344  var ypcdata = []ytab{
   345  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   346  }
   347  
   348  var yxorb = []ytab{
   349  	{Yi32, Ynone, Yal, Zib_, 1},
   350  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   351  	{Yrb, Ynone, Ymb, Zr_m, 1},
   352  	{Ymb, Ynone, Yrb, Zm_r, 1},
   353  }
   354  
   355  var yaddl = []ytab{
   356  	{Yi8, Ynone, Yml, Zibo_m, 2},
   357  	{Yi32, Ynone, Yax, Zil_, 1},
   358  	{Yi32, Ynone, Yml, Zilo_m, 2},
   359  	{Yrl, Ynone, Yml, Zr_m, 1},
   360  	{Yml, Ynone, Yrl, Zm_r, 1},
   361  }
   362  
   363  var yincl = []ytab{
   364  	{Ynone, Ynone, Yrl, Z_rp, 1},
   365  	{Ynone, Ynone, Yml, Zo_m, 2},
   366  }
   367  
   368  var yincq = []ytab{
   369  	{Ynone, Ynone, Yml, Zo_m, 2},
   370  }
   371  
   372  var ycmpb = []ytab{
   373  	{Yal, Ynone, Yi32, Z_ib, 1},
   374  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   375  	{Ymb, Ynone, Yrb, Zm_r, 1},
   376  	{Yrb, Ynone, Ymb, Zr_m, 1},
   377  }
   378  
   379  var ycmpl = []ytab{
   380  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   381  	{Yax, Ynone, Yi32, Z_il, 1},
   382  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   383  	{Yml, Ynone, Yrl, Zm_r, 1},
   384  	{Yrl, Ynone, Yml, Zr_m, 1},
   385  }
   386  
   387  var yshb = []ytab{
   388  	{Yi1, Ynone, Ymb, Zo_m, 2},
   389  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   390  	{Ycx, Ynone, Ymb, Zo_m, 2},
   391  }
   392  
   393  var yshl = []ytab{
   394  	{Yi1, Ynone, Yml, Zo_m, 2},
   395  	{Yi32, Ynone, Yml, Zibo_m, 2},
   396  	{Ycl, Ynone, Yml, Zo_m, 2},
   397  	{Ycx, Ynone, Yml, Zo_m, 2},
   398  }
   399  
   400  var ytestl = []ytab{
   401  	{Yi32, Ynone, Yax, Zil_, 1},
   402  	{Yi32, Ynone, Yml, Zilo_m, 2},
   403  	{Yrl, Ynone, Yml, Zr_m, 1},
   404  	{Yml, Ynone, Yrl, Zm_r, 1},
   405  }
   406  
   407  var ymovb = []ytab{
   408  	{Yrb, Ynone, Ymb, Zr_m, 1},
   409  	{Ymb, Ynone, Yrb, Zm_r, 1},
   410  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   411  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   412  }
   413  
   414  var ybtl = []ytab{
   415  	{Yi8, Ynone, Yml, Zibo_m, 2},
   416  	{Yrl, Ynone, Yml, Zr_m, 1},
   417  }
   418  
   419  var ymovw = []ytab{
   420  	{Yrl, Ynone, Yml, Zr_m, 1},
   421  	{Yml, Ynone, Yrl, Zm_r, 1},
   422  	{Yi0, Ynone, Yrl, Zclr, 1},
   423  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   424  	{Yi32, Ynone, Yml, Zilo_m, 2},
   425  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   426  }
   427  
   428  var ymovl = []ytab{
   429  	{Yrl, Ynone, Yml, Zr_m, 1},
   430  	{Yml, Ynone, Yrl, Zm_r, 1},
   431  	{Yi0, Ynone, Yrl, Zclr, 1},
   432  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   433  	{Yi32, Ynone, Yml, Zilo_m, 2},
   434  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   435  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   436  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   437  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   438  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   439  }
   440  
   441  var yret = []ytab{
   442  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   443  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   444  }
   445  
   446  var ymovq = []ytab{
   447  	// valid in 32-bit mode
   448  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   449  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   450  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   451  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   452  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   453  
   454  	// valid only in 64-bit mode, usually with 64-bit prefix
   455  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   456  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   457  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   458  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   459  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   460  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   461  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   462  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   463  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   464  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   465  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   466  }
   467  
   468  var ym_rl = []ytab{
   469  	{Ym, Ynone, Yrl, Zm_r, 1},
   470  }
   471  
   472  var yrl_m = []ytab{
   473  	{Yrl, Ynone, Ym, Zr_m, 1},
   474  }
   475  
   476  var ymb_rl = []ytab{
   477  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   478  }
   479  
   480  var yml_rl = []ytab{
   481  	{Yml, Ynone, Yrl, Zm_r, 1},
   482  }
   483  
   484  var yrl_ml = []ytab{
   485  	{Yrl, Ynone, Yml, Zr_m, 1},
   486  }
   487  
   488  var yml_mb = []ytab{
   489  	{Yrb, Ynone, Ymb, Zr_m, 1},
   490  	{Ymb, Ynone, Yrb, Zm_r, 1},
   491  }
   492  
   493  var yrb_mb = []ytab{
   494  	{Yrb, Ynone, Ymb, Zr_m, 1},
   495  }
   496  
   497  var yxchg = []ytab{
   498  	{Yax, Ynone, Yrl, Z_rp, 1},
   499  	{Yrl, Ynone, Yax, Zrp_, 1},
   500  	{Yrl, Ynone, Yml, Zr_m, 1},
   501  	{Yml, Ynone, Yrl, Zm_r, 1},
   502  }
   503  
   504  var ydivl = []ytab{
   505  	{Yml, Ynone, Ynone, Zm_o, 2},
   506  }
   507  
   508  var ydivb = []ytab{
   509  	{Ymb, Ynone, Ynone, Zm_o, 2},
   510  }
   511  
   512  var yimul = []ytab{
   513  	{Yml, Ynone, Ynone, Zm_o, 2},
   514  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   515  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   516  	{Yml, Ynone, Yrl, Zm_r, 2},
   517  }
   518  
   519  var yimul3 = []ytab{
   520  	{Yi8, Yml, Yrl, Zibm_r, 2},
   521  }
   522  
   523  var ybyte = []ytab{
   524  	{Yi64, Ynone, Ynone, Zbyte, 1},
   525  }
   526  
   527  var yin = []ytab{
   528  	{Yi32, Ynone, Ynone, Zib_, 1},
   529  	{Ynone, Ynone, Ynone, Zlit, 1},
   530  }
   531  
   532  var yint = []ytab{
   533  	{Yi32, Ynone, Ynone, Zib_, 1},
   534  }
   535  
   536  var ypushl = []ytab{
   537  	{Yrl, Ynone, Ynone, Zrp_, 1},
   538  	{Ym, Ynone, Ynone, Zm_o, 2},
   539  	{Yi8, Ynone, Ynone, Zib_, 1},
   540  	{Yi32, Ynone, Ynone, Zil_, 1},
   541  }
   542  
   543  var ypopl = []ytab{
   544  	{Ynone, Ynone, Yrl, Z_rp, 1},
   545  	{Ynone, Ynone, Ym, Zo_m, 2},
   546  }
   547  
   548  var ybswap = []ytab{
   549  	{Ynone, Ynone, Yrl, Z_rp, 2},
   550  }
   551  
   552  var yscond = []ytab{
   553  	{Ynone, Ynone, Ymb, Zo_m, 2},
   554  }
   555  
   556  var yjcond = []ytab{
   557  	{Ynone, Ynone, Ybr, Zbr, 0},
   558  	{Yi0, Ynone, Ybr, Zbr, 0},
   559  	{Yi1, Ynone, Ybr, Zbr, 1},
   560  }
   561  
   562  var yloop = []ytab{
   563  	{Ynone, Ynone, Ybr, Zloop, 1},
   564  }
   565  
   566  var ycall = []ytab{
   567  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   568  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   569  	{Ynone, Ynone, Yindir, Zcallind, 2},
   570  	{Ynone, Ynone, Ybr, Zcall, 0},
   571  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   572  }
   573  
   574  var yduff = []ytab{
   575  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   576  }
   577  
   578  var yjmp = []ytab{
   579  	{Ynone, Ynone, Yml, Zo_m64, 2},
   580  	{Ynone, Ynone, Ybr, Zjmp, 0},
   581  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   582  }
   583  
   584  var yfmvd = []ytab{
   585  	{Ym, Ynone, Yf0, Zm_o, 2},
   586  	{Yf0, Ynone, Ym, Zo_m, 2},
   587  	{Yrf, Ynone, Yf0, Zm_o, 2},
   588  	{Yf0, Ynone, Yrf, Zo_m, 2},
   589  }
   590  
   591  var yfmvdp = []ytab{
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yf0, Ynone, Yrf, Zo_m, 2},
   594  }
   595  
   596  var yfmvf = []ytab{
   597  	{Ym, Ynone, Yf0, Zm_o, 2},
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  }
   600  
   601  var yfmvx = []ytab{
   602  	{Ym, Ynone, Yf0, Zm_o, 2},
   603  }
   604  
   605  var yfmvp = []ytab{
   606  	{Yf0, Ynone, Ym, Zo_m, 2},
   607  }
   608  
   609  var yfcmv = []ytab{
   610  	{Yrf, Ynone, Yf0, Zm_o, 2},
   611  }
   612  
   613  var yfadd = []ytab{
   614  	{Ym, Ynone, Yf0, Zm_o, 2},
   615  	{Yrf, Ynone, Yf0, Zm_o, 2},
   616  	{Yf0, Ynone, Yrf, Zo_m, 2},
   617  }
   618  
   619  var yfxch = []ytab{
   620  	{Yf0, Ynone, Yrf, Zo_m, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  }
   623  
   624  var ycompp = []ytab{
   625  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   626  }
   627  
   628  var ystsw = []ytab{
   629  	{Ynone, Ynone, Ym, Zo_m, 2},
   630  	{Ynone, Ynone, Yax, Zlit, 1},
   631  }
   632  
   633  var ysvrs = []ytab{
   634  	{Ynone, Ynone, Ym, Zo_m, 2},
   635  	{Ym, Ynone, Ynone, Zm_o, 2},
   636  }
   637  
   638  var ymm = []ytab{
   639  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   640  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   641  }
   642  
   643  var yxm = []ytab{
   644  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   645  }
   646  
   647  var yxm_q4 = []ytab{
   648  	{Yxm, Ynone, Yxr, Zm_r, 1},
   649  }
   650  
   651  var yxcvm1 = []ytab{
   652  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   653  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   654  }
   655  
   656  var yxcvm2 = []ytab{
   657  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   658  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   659  }
   660  
   661  var yxr = []ytab{
   662  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   663  }
   664  
   665  var yxr_ml = []ytab{
   666  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   667  }
   668  
   669  var ymr = []ytab{
   670  	{Ymr, Ynone, Ymr, Zm_r, 1},
   671  }
   672  
   673  var ymr_ml = []ytab{
   674  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   675  }
   676  
   677  var yxcmpi = []ytab{
   678  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   679  }
   680  
   681  var yxmov = []ytab{
   682  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   683  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   684  }
   685  
   686  var yxcvfl = []ytab{
   687  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   688  }
   689  
   690  var yxcvlf = []ytab{
   691  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   692  }
   693  
   694  var yxcvfq = []ytab{
   695  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   696  }
   697  
   698  var yxcvqf = []ytab{
   699  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   700  }
   701  
   702  var yps = []ytab{
   703  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   704  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   705  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   706  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   707  }
   708  
   709  var yxrrl = []ytab{
   710  	{Yxr, Ynone, Yrl, Zm_r, 1},
   711  }
   712  
   713  var ymrxr = []ytab{
   714  	{Ymr, Ynone, Yxr, Zm_r, 1},
   715  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   716  }
   717  
   718  var ymshuf = []ytab{
   719  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   720  }
   721  
   722  var ymshufb = []ytab{
   723  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   724  }
   725  
   726  var yxshuf = []ytab{
   727  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   728  }
   729  
   730  var yextrw = []ytab{
   731  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   732  }
   733  
   734  var yextr = []ytab{
   735  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   736  }
   737  
   738  var yinsrw = []ytab{
   739  	{Yu8, Yml, Yxr, Zibm_r, 2},
   740  }
   741  
   742  var yinsr = []ytab{
   743  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   744  }
   745  
   746  var ypsdq = []ytab{
   747  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   748  }
   749  
   750  var ymskb = []ytab{
   751  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   752  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   753  }
   754  
   755  var ycrc32l = []ytab{
   756  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   757  }
   758  
   759  var yprefetch = []ytab{
   760  	{Ym, Ynone, Ynone, Zm_o, 2},
   761  }
   762  
   763  var yaes = []ytab{
   764  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   765  }
   766  
   767  var yxbegin = []ytab{
   768  	{Ynone, Ynone, Ybr, Zjmp, 1},
   769  }
   770  
   771  var yxabort = []ytab{
   772  	{Yu8, Ynone, Ynone, Zib_, 1},
   773  }
   774  
   775  var ylddqu = []ytab{
   776  	{Ym, Ynone, Yxr, Zm_r, 1},
   777  }
   778  
   779  // VEX instructions that come in two forms:
   780  //	VTHING xmm2/m128, xmmV, xmm1
   781  //	VTHING ymm2/m256, ymmV, ymm1
   782  // The opcode array in the corresponding Optab entry
   783  // should contain the (VEX prefixes, opcode byte) pair
   784  // for each of the two forms.
   785  // For example, the entries for VPXOR are:
   786  //
   787  //	VPXOR xmm2/m128, xmmV, xmm1
   788  //	VEX.NDS.128.66.0F.WIG EF /r
   789  //
   790  //	VPXOR ymm2/m256, ymmV, ymm1
   791  //	VEX.NDS.256.66.0F.WIG EF /r
   792  //
   793  // The NDS/NDD/DDS part can be dropped, producing this
   794  // Optab entry:
   795  //
   796  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   797  //
   798  var yvex_xy3 = []ytab{
   799  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   800  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   801  }
   802  
   803  var yvex_ri3 = []ytab{
   804  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   805  }
   806  
   807  var yvex_xyi3 = []ytab{
   808  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   809  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   810  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   811  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   812  }
   813  
   814  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   815  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   816  }
   817  
   818  var yvex_xyi4 = []ytab{
   819  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   820  }
   821  
   822  var yvex_shift = []ytab{
   823  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   824  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   825  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   826  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   827  }
   828  
   829  var yvex_shift_dq = []ytab{
   830  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   831  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   832  }
   833  
   834  var yvex_r3 = []ytab{
   835  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   836  }
   837  
   838  var yvex_vmr3 = []ytab{
   839  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   840  }
   841  
   842  var yvex_xy2 = []ytab{
   843  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   844  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   845  }
   846  
   847  var yvex_xyr2 = []ytab{
   848  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   849  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   850  }
   851  
   852  var yvex_vmovdqa = []ytab{
   853  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   854  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   855  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   856  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   857  }
   858  
   859  var yvex_vmovntdq = []ytab{
   860  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   861  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   862  }
   863  
   864  var yvex_vpbroadcast = []ytab{
   865  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   866  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   867  }
   868  
   869  var yvex_vpbroadcast_sd = []ytab{
   870  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   871  }
   872  
   873  var ymmxmm0f38 = []ytab{
   874  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   875  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   876  }
   877  
   878  /*
   879   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   880   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   881   * the entry with the given p->as and then looks through the ytable for that
   882   * instruction (the second field in the optab struct) for a line whose first
   883   * two values match the Ytypes of the p->from and p->to operands.  The function
   884   * oclass in span.c computes the specific Ytype of an operand and then the set
   885   * of more general Ytypes that it satisfies is implied by the ycover table, set
   886   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   887   * from the more general 8-bit constants, but instinit says
   888   *
   889   *        ycover[Yi0*Ymax + Ys32] = 1;
   890   *        ycover[Yi1*Ymax + Ys32] = 1;
   891   *        ycover[Yi8*Ymax + Ys32] = 1;
   892   *
   893   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   894   * if that's what an instruction can handle.
   895   *
   896   * In parallel with the scan through the ytable for the appropriate line, there
   897   * is a z pointer that starts out pointing at the strange magic byte list in
   898   * the Optab struct.  With each step past a non-matching ytable line, z
   899   * advances by the 4th entry in the line.  When a matching line is found, that
   900   * z pointer has the extra data to use in laying down the instruction bytes.
   901   * The actual bytes laid down are a function of the 3rd entry in the line (that
   902   * is, the Ztype) and the z bytes.
   903   *
   904   * For example, let's look at AADDL.  The optab line says:
   905   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   906   *
   907   * and yaddl says
   908   *        uchar   yaddl[] =
   909   *        {
   910   *                Yi8,    Yml,    Zibo_m, 2,
   911   *                Yi32,   Yax,    Zil_,   1,
   912   *                Yi32,   Yml,    Zilo_m, 2,
   913   *                Yrl,    Yml,    Zr_m,   1,
   914   *                Yml,    Yrl,    Zm_r,   1,
   915   *                0
   916   *        };
   917   *
   918   * so there are 5 possible types of ADDL instruction that can be laid down, and
   919   * possible states used to lay them down (Ztype and z pointer, assuming z
   920   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   921   *
   922   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   923   *        Yi32, Yax -> Zil_, z+2 (0x05)
   924   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   925   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   926   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   927   *
   928   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   929   * relatively straightforward as this program goes.
   930   *
   931   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   932   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   933   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   934   * Zilo_m is the same but a long (32-bit) immediate.
   935   */
   936  var optab =
   937  /*	as, ytab, andproto, opcode */
   938  []Optab{
   939  	{obj.AXXX, nil, 0, [23]uint8{}},
   940  	{AAAA, ynone, P32, [23]uint8{0x37}},
   941  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   942  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   943  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   944  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   945  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   946  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   947  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   948  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   949  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   950  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   951  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   952  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   953  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   954  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   955  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   956  	{AADJSP, nil, 0, [23]uint8{}},
   957  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   958  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   959  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   960  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   961  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   962  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   963  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   964  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   965  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   966  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   967  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   968  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   969  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   970  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   971  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   972  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   973  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   974  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   975  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   976  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   977  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   978  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   979  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   980  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   981  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   982  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   983  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   984  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   985  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   986  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   987  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   988  	{ABYTE, ybyte, Px, [23]uint8{1}},
   989  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   990  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   991  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   992  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   993  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   994  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   995  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   996  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   997  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   998  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   999  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1000  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1001  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1002  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1003  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1004  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1005  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1006  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1007  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1008  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1009  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1010  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1011  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1012  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1013  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1014  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1015  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1016  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1017  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1018  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1019  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1020  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1021  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1022  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1023  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1024  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1025  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1026  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1027  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1028  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1029  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1030  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1031  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1032  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1033  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1034  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1035  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1036  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1037  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1038  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1039  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1040  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1041  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1042  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1043  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1044  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1045  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1047  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1048  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1049  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1050  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1051  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1052  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1053  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1054  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1055  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1056  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1057  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1058  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1059  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1060  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1061  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1062  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1063  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1064  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1065  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1066  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1067  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1068  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1069  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1070  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1071  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1072  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1073  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1074  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1075  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1076  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1077  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1078  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1079  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1080  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1081  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1082  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1083  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1084  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1085  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1086  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1087  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1088  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1089  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1090  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1091  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1092  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1093  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1094  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1095  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1096  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1097  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1098  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1099  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1100  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1101  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1102  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1103  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1104  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1105  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1106  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1107  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1108  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1109  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1110  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1113  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1114  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1115  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1116  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1117  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1118  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1119  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1120  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1121  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1122  	{AINT, yint, Px, [23]uint8{0xcd}},
  1123  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1124  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1125  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1126  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1127  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1128  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1129  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1130  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1131  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1132  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1133  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1134  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1135  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1136  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1137  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1138  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1139  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1140  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1141  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1142  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1143  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1144  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1145  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1146  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1147  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1148  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1149  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1150  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1151  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1152  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1153  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1154  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1155  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1156  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1157  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1158  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1159  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1160  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1161  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1162  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1163  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1164  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1165  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1166  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1167  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1168  	{ALONG, ybyte, Px, [23]uint8{4}},
  1169  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1170  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1171  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1172  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1173  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1174  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1175  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1176  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1177  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1178  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1179  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1180  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1181  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1182  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1183  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1184  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1185  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1186  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1187  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1188  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1189  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1190  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1191  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1192  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1193  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1194  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1195  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1196  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1197  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1198  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1199  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1200  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1201  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1202  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1203  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1204  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1205  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1206  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1207  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1208  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1209  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1210  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1211  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1212  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1213  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1214  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1215  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1216  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1217  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1218  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1219  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1220  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1221  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1222  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1223  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1224  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1225  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1226  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1227  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1228  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1229  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1230  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1231  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1232  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1233  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1234  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1235  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1236  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1237  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1238  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1239  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1240  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1241  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1242  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1243  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1244  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1245  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1246  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1247  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1249  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1250  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1251  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1252  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1253  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1254  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1255  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1256  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1257  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1258  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1259  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1260  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1261  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1262  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1263  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1264  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1265  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1266  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1267  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1268  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1269  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1270  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1271  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1272  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1273  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1274  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1275  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1276  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1277  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1278  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1279  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1280  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1281  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1282  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1283  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1284  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1285  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1286  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1287  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1288  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1289  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1290  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1291  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1292  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1293  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1294  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1295  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1296  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1297  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1298  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1299  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1300  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1301  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1302  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1303  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1304  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1305  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1306  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1307  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1308  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1309  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1310  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1311  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1312  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1313  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1314  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1315  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1316  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1317  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1318  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1319  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1320  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1321  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1322  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1323  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1324  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1325  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1326  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1327  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1328  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1329  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1330  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1331  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1332  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1333  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1334  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1335  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1336  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1337  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1338  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1339  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1340  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1341  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1342  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1343  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1344  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1345  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1346  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1347  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1348  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1349  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1350  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1351  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1352  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1353  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1354  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1355  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1356  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1357  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1358  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1359  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1360  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1361  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1362  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1363  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1364  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1365  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1366  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1367  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1368  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1369  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1370  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1371  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1372  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1373  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1374  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1375  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1376  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1377  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1378  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1379  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1380  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1381  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1382  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1383  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1384  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1385  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1386  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1387  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1388  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1389  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1390  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1391  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1392  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1393  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1394  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1395  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1396  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1397  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1398  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1399  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1400  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1401  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1402  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1403  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1404  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1405  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1406  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1407  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1408  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1409  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1410  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1411  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1412  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1413  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1414  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1415  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1416  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1417  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1418  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1419  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1420  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1421  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1422  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1423  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1424  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1425  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1426  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1427  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1428  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1429  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1430  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1431  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1432  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1433  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1434  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1435  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1436  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1437  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1438  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1439  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1440  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1441  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1442  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1443  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1444  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1445  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1446  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1447  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1448  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1449  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1450  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1451  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1452  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1453  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1454  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1455  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1456  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1457  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1458  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1459  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1460  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1461  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1462  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1463  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1464  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1465  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1466  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1467  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1468  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1469  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1470  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1471  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1472  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1473  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1474  	{AWORD, ybyte, Px, [23]uint8{2}},
  1475  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1476  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1477  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1478  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1479  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1480  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1481  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1482  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1483  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1484  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1485  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1487  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1488  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1489  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1490  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1491  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1492  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1493  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1494  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1495  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1496  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1497  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1498  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1499  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1500  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1501  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1502  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1503  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1504  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1505  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1506  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1507  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1508  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1509  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1510  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1511  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1512  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1513  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1514  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1515  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1516  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1517  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1518  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1519  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1520  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1521  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1522  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1523  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1524  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1525  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1526  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1527  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1528  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1529  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1530  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1531  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1532  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1533  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1534  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1535  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1536  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1537  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1538  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1539  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1541  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1542  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1543  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1544  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1545  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1546  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1547  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1548  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1549  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1551  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1552  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1553  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1554  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1555  	{AFFREE, nil, 0, [23]uint8{}},
  1556  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1557  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1558  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1559  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1560  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1561  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1562  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1563  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1564  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1565  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1566  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1567  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1568  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1569  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1570  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1571  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1572  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1573  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1574  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1575  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1576  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1577  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1578  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1579  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1580  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1581  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1582  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1583  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1584  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1585  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1586  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1587  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1588  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1589  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1590  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1591  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1592  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1593  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1594  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1595  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1596  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1597  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1598  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1599  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1600  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1601  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1602  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1603  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1604  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1605  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1606  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1607  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1608  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1609  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1610  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1611  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1612  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1613  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1614  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1615  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1616  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1617  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1618  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1619  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1620  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1621  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1622  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1623  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1624  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1625  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1626  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1627  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1628  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1629  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1630  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1631  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1632  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1633  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1634  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1635  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1636  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1637  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1638  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1639  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1640  
  1641  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1642  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1643  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1644  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1645  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1646  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1647  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1648  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1649  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1650  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1651  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1652  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1653  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1654  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1655  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1656  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1657  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1658  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1659  
  1660  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1661  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1662  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1663  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1664  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1665  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1666  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1667  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1668  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1669  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1670  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1671  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1672  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1673  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1674  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1675  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1676  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1677  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1678  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1679  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1680  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1681  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1682  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1683  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1684  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1685  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1686  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1687  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1688  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1689  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1690  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1691  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1692  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1693  
  1694  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1695  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1696  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1697  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1698  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1699  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1700  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1701  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1702  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1703  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1704  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1705  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1706  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1707  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.AEND, nil, 0, [23]uint8{}},
  1709  	{0, nil, 0, [23]uint8{}},
  1710  }
  1711  
  1712  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1713  
  1714  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1715  // This happens on systems like Solaris that call .so functions instead of system calls.
  1716  // It does not seem to be necessary for any other systems. This is probably working
  1717  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1718  // what that bug is. And this does fix it.
  1719  func isextern(s *obj.LSym) bool {
  1720  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1721  	return strings.HasPrefix(s.Name, "libc_")
  1722  }
  1723  
  1724  // single-instruction no-ops of various lengths.
  1725  // constructed by hand and disassembled with gdb to verify.
  1726  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1727  var nop = [][16]uint8{
  1728  	{0x90},
  1729  	{0x66, 0x90},
  1730  	{0x0F, 0x1F, 0x00},
  1731  	{0x0F, 0x1F, 0x40, 0x00},
  1732  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1733  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1734  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1735  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1736  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1737  }
  1738  
  1739  // Native Client rejects the repeated 0x66 prefix.
  1740  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1741  func fillnop(p []byte, n int) {
  1742  	var m int
  1743  
  1744  	for n > 0 {
  1745  		m = n
  1746  		if m > len(nop) {
  1747  			m = len(nop)
  1748  		}
  1749  		copy(p[:m], nop[m-1][:m])
  1750  		p = p[m:]
  1751  		n -= m
  1752  	}
  1753  }
  1754  
  1755  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1756  	s.Grow(int64(c) + int64(pad))
  1757  	fillnop(s.P[c:], int(pad))
  1758  	return c + pad
  1759  }
  1760  
  1761  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1762  	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
  1763  		return l
  1764  	}
  1765  	return q
  1766  }
  1767  
  1768  func span6(ctxt *obj.Link, s *obj.LSym) {
  1769  	ctxt.Cursym = s
  1770  
  1771  	if s.P != nil {
  1772  		return
  1773  	}
  1774  
  1775  	if ycover[0] == 0 {
  1776  		instinit()
  1777  	}
  1778  
  1779  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1780  		if p.To.Type == obj.TYPE_BRANCH {
  1781  			if p.Pcond == nil {
  1782  				p.Pcond = p
  1783  			}
  1784  		}
  1785  		if p.As == AADJSP {
  1786  			p.To.Type = obj.TYPE_REG
  1787  			p.To.Reg = REG_SP
  1788  			v := int32(-p.From.Offset)
  1789  			p.From.Offset = int64(v)
  1790  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1791  			if v < 0 {
  1792  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1793  				v = -v
  1794  				p.From.Offset = int64(v)
  1795  			}
  1796  
  1797  			if v == 0 {
  1798  				p.As = obj.ANOP
  1799  			}
  1800  		}
  1801  	}
  1802  
  1803  	var q *obj.Prog
  1804  	var count int64 // rough count of number of instructions
  1805  	for p := s.Text; p != nil; p = p.Link {
  1806  		count++
  1807  		p.Back = 2 // use short branches first time through
  1808  		q = p.Pcond
  1809  		if q != nil && (q.Back&2 != 0) {
  1810  			p.Back |= 1 // backward jump
  1811  			q.Back |= 4 // loop head
  1812  		}
  1813  
  1814  		if p.As == AADJSP {
  1815  			p.To.Type = obj.TYPE_REG
  1816  			p.To.Reg = REG_SP
  1817  			v := int32(-p.From.Offset)
  1818  			p.From.Offset = int64(v)
  1819  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1820  			if v < 0 {
  1821  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1822  				v = -v
  1823  				p.From.Offset = int64(v)
  1824  			}
  1825  
  1826  			if v == 0 {
  1827  				p.As = obj.ANOP
  1828  			}
  1829  		}
  1830  	}
  1831  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1832  
  1833  	n := 0
  1834  	var c int32
  1835  	errors := ctxt.Errors
  1836  	var deferreturn *obj.LSym
  1837  	if ctxt.Headtype == obj.Hnacl {
  1838  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1839  	}
  1840  	for {
  1841  		loop := int32(0)
  1842  		for i := range s.R {
  1843  			s.R[i] = obj.Reloc{}
  1844  		}
  1845  		s.R = s.R[:0]
  1846  		s.P = s.P[:0]
  1847  		c = 0
  1848  		for p := s.Text; p != nil; p = p.Link {
  1849  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1850  
  1851  				// pad everything to avoid crossing 32-byte boundary
  1852  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1853  					c = naclpad(ctxt, s, c, -c&31)
  1854  				}
  1855  
  1856  				// pad call deferreturn to start at 32-byte boundary
  1857  				// so that subtracting 5 in jmpdefer will jump back
  1858  				// to that boundary and rerun the call.
  1859  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1860  					c = naclpad(ctxt, s, c, -c&31)
  1861  				}
  1862  
  1863  				// pad call to end at 32-byte boundary
  1864  				if p.As == obj.ACALL {
  1865  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1866  				}
  1867  
  1868  				// the linker treats REP and STOSQ as different instructions
  1869  				// but in fact the REP is a prefix on the STOSQ.
  1870  				// make sure REP has room for 2 more bytes, so that
  1871  				// padding will not be inserted before the next instruction.
  1872  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1873  					c = naclpad(ctxt, s, c, -c&31)
  1874  				}
  1875  
  1876  				// same for LOCK.
  1877  				// various instructions follow; the longest is 4 bytes.
  1878  				// give ourselves 8 bytes so as to avoid surprises.
  1879  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1880  					c = naclpad(ctxt, s, c, -c&31)
  1881  				}
  1882  			}
  1883  
  1884  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1885  				// pad with NOPs
  1886  				v := -c & (LoopAlign - 1)
  1887  
  1888  				if v <= MaxLoopPad {
  1889  					s.Grow(int64(c) + int64(v))
  1890  					fillnop(s.P[c:], int(v))
  1891  					c += v
  1892  				}
  1893  			}
  1894  
  1895  			p.Pc = int64(c)
  1896  
  1897  			// process forward jumps to p
  1898  			for q = p.Rel; q != nil; q = q.Forwd {
  1899  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1900  				if q.Back&2 != 0 { // short
  1901  					if v > 127 {
  1902  						loop++
  1903  						q.Back ^= 2
  1904  					}
  1905  
  1906  					if q.As == AJCXZL || q.As == AXBEGIN {
  1907  						s.P[q.Pc+2] = byte(v)
  1908  					} else {
  1909  						s.P[q.Pc+1] = byte(v)
  1910  					}
  1911  				} else {
  1912  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1913  				}
  1914  			}
  1915  
  1916  			p.Rel = nil
  1917  
  1918  			p.Pc = int64(c)
  1919  			asmins(ctxt, p)
  1920  			m := ctxt.AsmBuf.Len()
  1921  			if int(p.Isize) != m {
  1922  				p.Isize = uint8(m)
  1923  				loop++
  1924  			}
  1925  
  1926  			s.Grow(p.Pc + int64(m))
  1927  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1928  			c += int32(m)
  1929  		}
  1930  
  1931  		n++
  1932  		if n > 20 {
  1933  			ctxt.Diag("span must be looping")
  1934  			log.Fatalf("loop")
  1935  		}
  1936  		if loop == 0 {
  1937  			break
  1938  		}
  1939  		if ctxt.Errors > errors {
  1940  			return
  1941  		}
  1942  	}
  1943  
  1944  	if ctxt.Headtype == obj.Hnacl {
  1945  		c = naclpad(ctxt, s, c, -c&31)
  1946  	}
  1947  
  1948  	s.Size = int64(c)
  1949  
  1950  	if false { /* debug['a'] > 1 */
  1951  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1952  		var i int
  1953  		for i = 0; i < len(s.P); i++ {
  1954  			fmt.Printf(" %.2x", s.P[i])
  1955  			if i%16 == 15 {
  1956  				fmt.Printf("\n  %.6x", uint(i+1))
  1957  			}
  1958  		}
  1959  
  1960  		if i%16 != 0 {
  1961  			fmt.Printf("\n")
  1962  		}
  1963  
  1964  		for i := 0; i < len(s.R); i++ {
  1965  			r := &s.R[i]
  1966  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1967  		}
  1968  	}
  1969  }
  1970  
  1971  func instinit() {
  1972  	for i := 1; optab[i].as != 0; i++ {
  1973  		c := optab[i].as
  1974  		if opindex[c&obj.AMask] != nil {
  1975  			log.Fatalf("phase error in optab: %d (%v)", i, c)
  1976  		}
  1977  		opindex[c&obj.AMask] = &optab[i]
  1978  	}
  1979  
  1980  	for i := 0; i < Ymax; i++ {
  1981  		ycover[i*Ymax+i] = 1
  1982  	}
  1983  
  1984  	ycover[Yi0*Ymax+Yi8] = 1
  1985  	ycover[Yi1*Ymax+Yi8] = 1
  1986  	ycover[Yu7*Ymax+Yi8] = 1
  1987  
  1988  	ycover[Yi0*Ymax+Yu7] = 1
  1989  	ycover[Yi1*Ymax+Yu7] = 1
  1990  
  1991  	ycover[Yi0*Ymax+Yu8] = 1
  1992  	ycover[Yi1*Ymax+Yu8] = 1
  1993  	ycover[Yu7*Ymax+Yu8] = 1
  1994  
  1995  	ycover[Yi0*Ymax+Ys32] = 1
  1996  	ycover[Yi1*Ymax+Ys32] = 1
  1997  	ycover[Yu7*Ymax+Ys32] = 1
  1998  	ycover[Yu8*Ymax+Ys32] = 1
  1999  	ycover[Yi8*Ymax+Ys32] = 1
  2000  
  2001  	ycover[Yi0*Ymax+Yi32] = 1
  2002  	ycover[Yi1*Ymax+Yi32] = 1
  2003  	ycover[Yu7*Ymax+Yi32] = 1
  2004  	ycover[Yu8*Ymax+Yi32] = 1
  2005  	ycover[Yi8*Ymax+Yi32] = 1
  2006  	ycover[Ys32*Ymax+Yi32] = 1
  2007  
  2008  	ycover[Yi0*Ymax+Yi64] = 1
  2009  	ycover[Yi1*Ymax+Yi64] = 1
  2010  	ycover[Yu7*Ymax+Yi64] = 1
  2011  	ycover[Yu8*Ymax+Yi64] = 1
  2012  	ycover[Yi8*Ymax+Yi64] = 1
  2013  	ycover[Ys32*Ymax+Yi64] = 1
  2014  	ycover[Yi32*Ymax+Yi64] = 1
  2015  
  2016  	ycover[Yal*Ymax+Yrb] = 1
  2017  	ycover[Ycl*Ymax+Yrb] = 1
  2018  	ycover[Yax*Ymax+Yrb] = 1
  2019  	ycover[Ycx*Ymax+Yrb] = 1
  2020  	ycover[Yrx*Ymax+Yrb] = 1
  2021  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2022  
  2023  	ycover[Ycl*Ymax+Ycx] = 1
  2024  
  2025  	ycover[Yax*Ymax+Yrx] = 1
  2026  	ycover[Ycx*Ymax+Yrx] = 1
  2027  
  2028  	ycover[Yax*Ymax+Yrl] = 1
  2029  	ycover[Ycx*Ymax+Yrl] = 1
  2030  	ycover[Yrx*Ymax+Yrl] = 1
  2031  	ycover[Yrl32*Ymax+Yrl] = 1
  2032  
  2033  	ycover[Yf0*Ymax+Yrf] = 1
  2034  
  2035  	ycover[Yal*Ymax+Ymb] = 1
  2036  	ycover[Ycl*Ymax+Ymb] = 1
  2037  	ycover[Yax*Ymax+Ymb] = 1
  2038  	ycover[Ycx*Ymax+Ymb] = 1
  2039  	ycover[Yrx*Ymax+Ymb] = 1
  2040  	ycover[Yrb*Ymax+Ymb] = 1
  2041  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2042  	ycover[Ym*Ymax+Ymb] = 1
  2043  
  2044  	ycover[Yax*Ymax+Yml] = 1
  2045  	ycover[Ycx*Ymax+Yml] = 1
  2046  	ycover[Yrx*Ymax+Yml] = 1
  2047  	ycover[Yrl*Ymax+Yml] = 1
  2048  	ycover[Yrl32*Ymax+Yml] = 1
  2049  	ycover[Ym*Ymax+Yml] = 1
  2050  
  2051  	ycover[Yax*Ymax+Ymm] = 1
  2052  	ycover[Ycx*Ymax+Ymm] = 1
  2053  	ycover[Yrx*Ymax+Ymm] = 1
  2054  	ycover[Yrl*Ymax+Ymm] = 1
  2055  	ycover[Yrl32*Ymax+Ymm] = 1
  2056  	ycover[Ym*Ymax+Ymm] = 1
  2057  	ycover[Ymr*Ymax+Ymm] = 1
  2058  
  2059  	ycover[Ym*Ymax+Yxm] = 1
  2060  	ycover[Yxr*Ymax+Yxm] = 1
  2061  
  2062  	ycover[Ym*Ymax+Yym] = 1
  2063  	ycover[Yyr*Ymax+Yym] = 1
  2064  
  2065  	for i := 0; i < MAXREG; i++ {
  2066  		reg[i] = -1
  2067  		if i >= REG_AL && i <= REG_R15B {
  2068  			reg[i] = (i - REG_AL) & 7
  2069  			if i >= REG_SPB && i <= REG_DIB {
  2070  				regrex[i] = 0x40
  2071  			}
  2072  			if i >= REG_R8B && i <= REG_R15B {
  2073  				regrex[i] = Rxr | Rxx | Rxb
  2074  			}
  2075  		}
  2076  
  2077  		if i >= REG_AH && i <= REG_BH {
  2078  			reg[i] = 4 + ((i - REG_AH) & 7)
  2079  		}
  2080  		if i >= REG_AX && i <= REG_R15 {
  2081  			reg[i] = (i - REG_AX) & 7
  2082  			if i >= REG_R8 {
  2083  				regrex[i] = Rxr | Rxx | Rxb
  2084  			}
  2085  		}
  2086  
  2087  		if i >= REG_F0 && i <= REG_F0+7 {
  2088  			reg[i] = (i - REG_F0) & 7
  2089  		}
  2090  		if i >= REG_M0 && i <= REG_M0+7 {
  2091  			reg[i] = (i - REG_M0) & 7
  2092  		}
  2093  		if i >= REG_X0 && i <= REG_X0+15 {
  2094  			reg[i] = (i - REG_X0) & 7
  2095  			if i >= REG_X0+8 {
  2096  				regrex[i] = Rxr | Rxx | Rxb
  2097  			}
  2098  		}
  2099  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2100  			reg[i] = (i - REG_Y0) & 7
  2101  			if i >= REG_Y0+8 {
  2102  				regrex[i] = Rxr | Rxx | Rxb
  2103  			}
  2104  		}
  2105  
  2106  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2107  			regrex[i] = Rxr
  2108  		}
  2109  	}
  2110  }
  2111  
  2112  var isAndroid = (obj.GOOS == "android")
  2113  
  2114  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2115  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2116  		return 0
  2117  	}
  2118  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2119  		switch a.Reg {
  2120  		case REG_CS:
  2121  			return 0x2e
  2122  
  2123  		case REG_DS:
  2124  			return 0x3e
  2125  
  2126  		case REG_ES:
  2127  			return 0x26
  2128  
  2129  		case REG_FS:
  2130  			return 0x64
  2131  
  2132  		case REG_GS:
  2133  			return 0x65
  2134  
  2135  		case REG_TLS:
  2136  			// NOTE: Systems listed here should be only systems that
  2137  			// support direct TLS references like 8(TLS) implemented as
  2138  			// direct references from FS or GS. Systems that require
  2139  			// the initial-exec model, where you load the TLS base into
  2140  			// a register and then index from that register, do not reach
  2141  			// this code and should not be listed.
  2142  			if p.Mode == 32 {
  2143  				switch ctxt.Headtype {
  2144  				default:
  2145  					if isAndroid {
  2146  						return 0x65 // GS
  2147  					}
  2148  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2149  
  2150  				case obj.Hdarwin,
  2151  					obj.Hdragonfly,
  2152  					obj.Hfreebsd,
  2153  					obj.Hnetbsd,
  2154  					obj.Hopenbsd:
  2155  					return 0x65 // GS
  2156  				}
  2157  			}
  2158  
  2159  			switch ctxt.Headtype {
  2160  			default:
  2161  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2162  
  2163  			case obj.Hlinux:
  2164  				if isAndroid {
  2165  					return 0x64 // FS
  2166  				}
  2167  
  2168  				if ctxt.Flag_shared {
  2169  					log.Fatalf("unknown TLS base register for linux with -shared")
  2170  				} else {
  2171  					return 0x64 // FS
  2172  				}
  2173  
  2174  			case obj.Hdragonfly,
  2175  				obj.Hfreebsd,
  2176  				obj.Hnetbsd,
  2177  				obj.Hopenbsd,
  2178  				obj.Hsolaris:
  2179  				return 0x64 // FS
  2180  
  2181  			case obj.Hdarwin:
  2182  				return 0x65 // GS
  2183  			}
  2184  		}
  2185  	}
  2186  
  2187  	if p.Mode == 32 {
  2188  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2189  			// When building for inclusion into a shared library, an instruction of the form
  2190  			//     MOVL 0(CX)(TLS*1), AX
  2191  			// becomes
  2192  			//     mov %gs:(%ecx), %eax
  2193  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2194  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2195  			// a shared library the instruction it becomes
  2196  			//     mov 0x0(%ecx), $eax
  2197  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2198  			if a.Offset != 0 {
  2199  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2200  			}
  2201  			return 0x65 // GS
  2202  		}
  2203  		return 0
  2204  	}
  2205  
  2206  	switch a.Index {
  2207  	case REG_CS:
  2208  		return 0x2e
  2209  
  2210  	case REG_DS:
  2211  		return 0x3e
  2212  
  2213  	case REG_ES:
  2214  		return 0x26
  2215  
  2216  	case REG_TLS:
  2217  		if ctxt.Flag_shared {
  2218  			// When building for inclusion into a shared library, an instruction of the form
  2219  			//     MOV 0(CX)(TLS*1), AX
  2220  			// becomes
  2221  			//     mov %fs:(%rcx), %rax
  2222  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2223  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2224  			// a shared library the instruction does not require a prefix.
  2225  			if a.Offset != 0 {
  2226  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2227  			}
  2228  			return 0x64
  2229  		}
  2230  
  2231  	case REG_FS:
  2232  		return 0x64
  2233  
  2234  	case REG_GS:
  2235  		return 0x65
  2236  	}
  2237  
  2238  	return 0
  2239  }
  2240  
  2241  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2242  	switch a.Type {
  2243  	case obj.TYPE_NONE:
  2244  		return Ynone
  2245  
  2246  	case obj.TYPE_BRANCH:
  2247  		return Ybr
  2248  
  2249  	case obj.TYPE_INDIR:
  2250  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2251  			return Yindir
  2252  		}
  2253  		return Yxxx
  2254  
  2255  	case obj.TYPE_MEM:
  2256  		if a.Index == REG_SP {
  2257  			// Can't use SP as the index register
  2258  			return Yxxx
  2259  		}
  2260  		if ctxt.Asmode == 64 {
  2261  			switch a.Name {
  2262  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2263  				// Global variables can't use index registers and their
  2264  				// base register is %rip (%rip is encoded as REG_NONE).
  2265  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2266  					return Yxxx
  2267  				}
  2268  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2269  				// These names must have a base of SP.  The old compiler
  2270  				// uses 0 for the base register. SSA uses REG_SP.
  2271  				if a.Reg != REG_SP && a.Reg != 0 {
  2272  					return Yxxx
  2273  				}
  2274  			case obj.NAME_NONE:
  2275  				// everything is ok
  2276  			default:
  2277  				// unknown name
  2278  				return Yxxx
  2279  			}
  2280  		}
  2281  		return Ym
  2282  
  2283  	case obj.TYPE_ADDR:
  2284  		switch a.Name {
  2285  		case obj.NAME_GOTREF:
  2286  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2287  			return Yxxx
  2288  
  2289  		case obj.NAME_EXTERN,
  2290  			obj.NAME_STATIC:
  2291  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
  2292  				return Yi32
  2293  			}
  2294  			return Yiauto // use pc-relative addressing
  2295  
  2296  		case obj.NAME_AUTO,
  2297  			obj.NAME_PARAM:
  2298  			return Yiauto
  2299  		}
  2300  
  2301  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2302  		// and got Yi32 in an earlier version of this code.
  2303  		// Keep doing that until we fix yduff etc.
  2304  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2305  			return Yi32
  2306  		}
  2307  
  2308  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2309  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2310  		}
  2311  		fallthrough
  2312  
  2313  		// fall through
  2314  
  2315  	case obj.TYPE_CONST:
  2316  		if a.Sym != nil {
  2317  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2318  		}
  2319  
  2320  		v := a.Offset
  2321  		if p.Mode == 32 {
  2322  			v = int64(int32(v))
  2323  		}
  2324  		if v == 0 {
  2325  			if p.Mark&PRESERVEFLAGS != 0 {
  2326  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2327  				return Yu7
  2328  			}
  2329  			return Yi0
  2330  		}
  2331  		if v == 1 {
  2332  			return Yi1
  2333  		}
  2334  		if v >= 0 && v <= 127 {
  2335  			return Yu7
  2336  		}
  2337  		if v >= 0 && v <= 255 {
  2338  			return Yu8
  2339  		}
  2340  		if v >= -128 && v <= 127 {
  2341  			return Yi8
  2342  		}
  2343  		if p.Mode == 32 {
  2344  			return Yi32
  2345  		}
  2346  		l := int32(v)
  2347  		if int64(l) == v {
  2348  			return Ys32 /* can sign extend */
  2349  		}
  2350  		if v>>32 == 0 {
  2351  			return Yi32 /* unsigned */
  2352  		}
  2353  		return Yi64
  2354  
  2355  	case obj.TYPE_TEXTSIZE:
  2356  		return Ytextsize
  2357  	}
  2358  
  2359  	if a.Type != obj.TYPE_REG {
  2360  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2361  		return Yxxx
  2362  	}
  2363  
  2364  	switch a.Reg {
  2365  	case REG_AL:
  2366  		return Yal
  2367  
  2368  	case REG_AX:
  2369  		return Yax
  2370  
  2371  		/*
  2372  			case REG_SPB:
  2373  		*/
  2374  	case REG_BPB,
  2375  		REG_SIB,
  2376  		REG_DIB,
  2377  		REG_R8B,
  2378  		REG_R9B,
  2379  		REG_R10B,
  2380  		REG_R11B,
  2381  		REG_R12B,
  2382  		REG_R13B,
  2383  		REG_R14B,
  2384  		REG_R15B:
  2385  		if ctxt.Asmode != 64 {
  2386  			return Yxxx
  2387  		}
  2388  		fallthrough
  2389  
  2390  	case REG_DL,
  2391  		REG_BL,
  2392  		REG_AH,
  2393  		REG_CH,
  2394  		REG_DH,
  2395  		REG_BH:
  2396  		return Yrb
  2397  
  2398  	case REG_CL:
  2399  		return Ycl
  2400  
  2401  	case REG_CX:
  2402  		return Ycx
  2403  
  2404  	case REG_DX, REG_BX:
  2405  		return Yrx
  2406  
  2407  	case REG_R8, /* not really Yrl */
  2408  		REG_R9,
  2409  		REG_R10,
  2410  		REG_R11,
  2411  		REG_R12,
  2412  		REG_R13,
  2413  		REG_R14,
  2414  		REG_R15:
  2415  		if ctxt.Asmode != 64 {
  2416  			return Yxxx
  2417  		}
  2418  		fallthrough
  2419  
  2420  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2421  		if p.Mode == 32 {
  2422  			return Yrl32
  2423  		}
  2424  		return Yrl
  2425  
  2426  	case REG_F0 + 0:
  2427  		return Yf0
  2428  
  2429  	case REG_F0 + 1,
  2430  		REG_F0 + 2,
  2431  		REG_F0 + 3,
  2432  		REG_F0 + 4,
  2433  		REG_F0 + 5,
  2434  		REG_F0 + 6,
  2435  		REG_F0 + 7:
  2436  		return Yrf
  2437  
  2438  	case REG_M0 + 0,
  2439  		REG_M0 + 1,
  2440  		REG_M0 + 2,
  2441  		REG_M0 + 3,
  2442  		REG_M0 + 4,
  2443  		REG_M0 + 5,
  2444  		REG_M0 + 6,
  2445  		REG_M0 + 7:
  2446  		return Ymr
  2447  
  2448  	case REG_X0 + 0,
  2449  		REG_X0 + 1,
  2450  		REG_X0 + 2,
  2451  		REG_X0 + 3,
  2452  		REG_X0 + 4,
  2453  		REG_X0 + 5,
  2454  		REG_X0 + 6,
  2455  		REG_X0 + 7,
  2456  		REG_X0 + 8,
  2457  		REG_X0 + 9,
  2458  		REG_X0 + 10,
  2459  		REG_X0 + 11,
  2460  		REG_X0 + 12,
  2461  		REG_X0 + 13,
  2462  		REG_X0 + 14,
  2463  		REG_X0 + 15:
  2464  		return Yxr
  2465  
  2466  	case REG_Y0 + 0,
  2467  		REG_Y0 + 1,
  2468  		REG_Y0 + 2,
  2469  		REG_Y0 + 3,
  2470  		REG_Y0 + 4,
  2471  		REG_Y0 + 5,
  2472  		REG_Y0 + 6,
  2473  		REG_Y0 + 7,
  2474  		REG_Y0 + 8,
  2475  		REG_Y0 + 9,
  2476  		REG_Y0 + 10,
  2477  		REG_Y0 + 11,
  2478  		REG_Y0 + 12,
  2479  		REG_Y0 + 13,
  2480  		REG_Y0 + 14,
  2481  		REG_Y0 + 15:
  2482  		return Yyr
  2483  
  2484  	case REG_CS:
  2485  		return Ycs
  2486  	case REG_SS:
  2487  		return Yss
  2488  	case REG_DS:
  2489  		return Yds
  2490  	case REG_ES:
  2491  		return Yes
  2492  	case REG_FS:
  2493  		return Yfs
  2494  	case REG_GS:
  2495  		return Ygs
  2496  	case REG_TLS:
  2497  		return Ytls
  2498  
  2499  	case REG_GDTR:
  2500  		return Ygdtr
  2501  	case REG_IDTR:
  2502  		return Yidtr
  2503  	case REG_LDTR:
  2504  		return Yldtr
  2505  	case REG_MSW:
  2506  		return Ymsw
  2507  	case REG_TASK:
  2508  		return Ytask
  2509  
  2510  	case REG_CR + 0:
  2511  		return Ycr0
  2512  	case REG_CR + 1:
  2513  		return Ycr1
  2514  	case REG_CR + 2:
  2515  		return Ycr2
  2516  	case REG_CR + 3:
  2517  		return Ycr3
  2518  	case REG_CR + 4:
  2519  		return Ycr4
  2520  	case REG_CR + 5:
  2521  		return Ycr5
  2522  	case REG_CR + 6:
  2523  		return Ycr6
  2524  	case REG_CR + 7:
  2525  		return Ycr7
  2526  	case REG_CR + 8:
  2527  		return Ycr8
  2528  
  2529  	case REG_DR + 0:
  2530  		return Ydr0
  2531  	case REG_DR + 1:
  2532  		return Ydr1
  2533  	case REG_DR + 2:
  2534  		return Ydr2
  2535  	case REG_DR + 3:
  2536  		return Ydr3
  2537  	case REG_DR + 4:
  2538  		return Ydr4
  2539  	case REG_DR + 5:
  2540  		return Ydr5
  2541  	case REG_DR + 6:
  2542  		return Ydr6
  2543  	case REG_DR + 7:
  2544  		return Ydr7
  2545  
  2546  	case REG_TR + 0:
  2547  		return Ytr0
  2548  	case REG_TR + 1:
  2549  		return Ytr1
  2550  	case REG_TR + 2:
  2551  		return Ytr2
  2552  	case REG_TR + 3:
  2553  		return Ytr3
  2554  	case REG_TR + 4:
  2555  		return Ytr4
  2556  	case REG_TR + 5:
  2557  		return Ytr5
  2558  	case REG_TR + 6:
  2559  		return Ytr6
  2560  	case REG_TR + 7:
  2561  		return Ytr7
  2562  	}
  2563  
  2564  	return Yxxx
  2565  }
  2566  
  2567  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2568  	var i int
  2569  
  2570  	switch index {
  2571  	default:
  2572  		goto bad
  2573  
  2574  	case REG_NONE:
  2575  		i = 4 << 3
  2576  		goto bas
  2577  
  2578  	case REG_R8,
  2579  		REG_R9,
  2580  		REG_R10,
  2581  		REG_R11,
  2582  		REG_R12,
  2583  		REG_R13,
  2584  		REG_R14,
  2585  		REG_R15:
  2586  		if ctxt.Asmode != 64 {
  2587  			goto bad
  2588  		}
  2589  		fallthrough
  2590  
  2591  	case REG_AX,
  2592  		REG_CX,
  2593  		REG_DX,
  2594  		REG_BX,
  2595  		REG_BP,
  2596  		REG_SI,
  2597  		REG_DI:
  2598  		i = reg[index] << 3
  2599  	}
  2600  
  2601  	switch scale {
  2602  	default:
  2603  		goto bad
  2604  
  2605  	case 1:
  2606  		break
  2607  
  2608  	case 2:
  2609  		i |= 1 << 6
  2610  
  2611  	case 4:
  2612  		i |= 2 << 6
  2613  
  2614  	case 8:
  2615  		i |= 3 << 6
  2616  	}
  2617  
  2618  bas:
  2619  	switch base {
  2620  	default:
  2621  		goto bad
  2622  
  2623  	case REG_NONE: /* must be mod=00 */
  2624  		i |= 5
  2625  
  2626  	case REG_R8,
  2627  		REG_R9,
  2628  		REG_R10,
  2629  		REG_R11,
  2630  		REG_R12,
  2631  		REG_R13,
  2632  		REG_R14,
  2633  		REG_R15:
  2634  		if ctxt.Asmode != 64 {
  2635  			goto bad
  2636  		}
  2637  		fallthrough
  2638  
  2639  	case REG_AX,
  2640  		REG_CX,
  2641  		REG_DX,
  2642  		REG_BX,
  2643  		REG_SP,
  2644  		REG_BP,
  2645  		REG_SI,
  2646  		REG_DI:
  2647  		i |= reg[base]
  2648  	}
  2649  
  2650  	ctxt.AsmBuf.Put1(byte(i))
  2651  	return
  2652  
  2653  bad:
  2654  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2655  	ctxt.AsmBuf.Put1(0)
  2656  	return
  2657  }
  2658  
  2659  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2660  	var rel obj.Reloc
  2661  
  2662  	v := vaddr(ctxt, p, a, &rel)
  2663  	if rel.Siz != 0 {
  2664  		if rel.Siz != 4 {
  2665  			ctxt.Diag("bad reloc")
  2666  		}
  2667  		r := obj.Addrel(ctxt.Cursym)
  2668  		*r = rel
  2669  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2670  	}
  2671  
  2672  	ctxt.AsmBuf.PutInt32(int32(v))
  2673  }
  2674  
  2675  /*
  2676  static void
  2677  relput8(Prog *p, Addr *a)
  2678  {
  2679  	vlong v;
  2680  	Reloc rel, *r;
  2681  
  2682  	v = vaddr(ctxt, p, a, &rel);
  2683  	if(rel.siz != 0) {
  2684  		r = addrel(ctxt->cursym);
  2685  		*r = rel;
  2686  		r->siz = 8;
  2687  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2688  	}
  2689  	put8(ctxt, v);
  2690  }
  2691  */
  2692  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2693  	if r != nil {
  2694  		*r = obj.Reloc{}
  2695  	}
  2696  
  2697  	switch a.Name {
  2698  	case obj.NAME_STATIC,
  2699  		obj.NAME_GOTREF,
  2700  		obj.NAME_EXTERN:
  2701  		s := a.Sym
  2702  		if r == nil {
  2703  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2704  			log.Fatalf("reloc")
  2705  		}
  2706  
  2707  		if a.Name == obj.NAME_GOTREF {
  2708  			r.Siz = 4
  2709  			r.Type = obj.R_GOTPCREL
  2710  		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
  2711  			r.Siz = 4
  2712  			r.Type = obj.R_ADDR
  2713  		} else {
  2714  			r.Siz = 4
  2715  			r.Type = obj.R_PCREL
  2716  		}
  2717  
  2718  		r.Off = -1 // caller must fill in
  2719  		r.Sym = s
  2720  		r.Add = a.Offset
  2721  
  2722  		return 0
  2723  	}
  2724  
  2725  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2726  		if r == nil {
  2727  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2728  			log.Fatalf("reloc")
  2729  		}
  2730  
  2731  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == obj.Hdarwin {
  2732  			r.Type = obj.R_TLS_LE
  2733  			r.Siz = 4
  2734  			r.Off = -1 // caller must fill in
  2735  			r.Add = a.Offset
  2736  		}
  2737  		return 0
  2738  	}
  2739  
  2740  	return a.Offset
  2741  }
  2742  
  2743  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2744  	var base int
  2745  	var rel obj.Reloc
  2746  
  2747  	rex &= 0x40 | Rxr
  2748  	switch {
  2749  	case int64(int32(a.Offset)) == a.Offset:
  2750  		// Offset fits in sign-extended 32 bits.
  2751  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2752  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2753  		// This is allowed for assembly that wants to use 32-bit hex
  2754  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2755  	default:
  2756  		ctxt.Diag("offset too large in %s", p)
  2757  	}
  2758  	v := int32(a.Offset)
  2759  	rel.Siz = 0
  2760  
  2761  	switch a.Type {
  2762  	case obj.TYPE_ADDR:
  2763  		if a.Name == obj.NAME_NONE {
  2764  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2765  		}
  2766  		if a.Index == REG_TLS {
  2767  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2768  		}
  2769  		goto bad
  2770  
  2771  	case obj.TYPE_REG:
  2772  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2773  			goto bad
  2774  		}
  2775  		if v != 0 {
  2776  			goto bad
  2777  		}
  2778  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2779  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2780  		return
  2781  	}
  2782  
  2783  	if a.Type != obj.TYPE_MEM {
  2784  		goto bad
  2785  	}
  2786  
  2787  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2788  		base := int(a.Reg)
  2789  		switch a.Name {
  2790  		case obj.NAME_EXTERN,
  2791  			obj.NAME_GOTREF,
  2792  			obj.NAME_STATIC:
  2793  			if !isextern(a.Sym) && p.Mode == 64 {
  2794  				goto bad
  2795  			}
  2796  			if p.Mode == 32 && ctxt.Flag_shared {
  2797  				// The base register has already been set. It holds the PC
  2798  				// of this instruction returned by a PC-reading thunk.
  2799  				// See obj6.go:rewriteToPcrel.
  2800  			} else {
  2801  				base = REG_NONE
  2802  			}
  2803  			v = int32(vaddr(ctxt, p, a, &rel))
  2804  
  2805  		case obj.NAME_AUTO,
  2806  			obj.NAME_PARAM:
  2807  			base = REG_SP
  2808  		}
  2809  
  2810  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2811  		if base == REG_NONE {
  2812  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2813  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2814  			goto putrelv
  2815  		}
  2816  
  2817  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2818  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2819  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2820  			return
  2821  		}
  2822  
  2823  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2824  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2825  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2826  			ctxt.AsmBuf.Put1(byte(v))
  2827  			return
  2828  		}
  2829  
  2830  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2831  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2832  		goto putrelv
  2833  	}
  2834  
  2835  	base = int(a.Reg)
  2836  	switch a.Name {
  2837  	case obj.NAME_STATIC,
  2838  		obj.NAME_GOTREF,
  2839  		obj.NAME_EXTERN:
  2840  		if a.Sym == nil {
  2841  			ctxt.Diag("bad addr: %v", p)
  2842  		}
  2843  		if p.Mode == 32 && ctxt.Flag_shared {
  2844  			// The base register has already been set. It holds the PC
  2845  			// of this instruction returned by a PC-reading thunk.
  2846  			// See obj6.go:rewriteToPcrel.
  2847  		} else {
  2848  			base = REG_NONE
  2849  		}
  2850  		v = int32(vaddr(ctxt, p, a, &rel))
  2851  
  2852  	case obj.NAME_AUTO,
  2853  		obj.NAME_PARAM:
  2854  		base = REG_SP
  2855  	}
  2856  
  2857  	if base == REG_TLS {
  2858  		v = int32(vaddr(ctxt, p, a, &rel))
  2859  	}
  2860  
  2861  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2862  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2863  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2864  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2865  				ctxt.Diag("%v has offset against gotref", p)
  2866  			}
  2867  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2868  			goto putrelv
  2869  		}
  2870  
  2871  		// temporary
  2872  		ctxt.AsmBuf.Put2(
  2873  			byte(0<<6|4<<0|r<<3), // sib present
  2874  			0<<6|4<<3|5<<0,       // DS:d32
  2875  		)
  2876  		goto putrelv
  2877  	}
  2878  
  2879  	if base == REG_SP || base == REG_R12 {
  2880  		if v == 0 {
  2881  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2882  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2883  			return
  2884  		}
  2885  
  2886  		if v >= -128 && v < 128 {
  2887  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2888  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2889  			ctxt.AsmBuf.Put1(byte(v))
  2890  			return
  2891  		}
  2892  
  2893  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2894  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2895  		goto putrelv
  2896  	}
  2897  
  2898  	if REG_AX <= base && base <= REG_R15 {
  2899  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2900  			rel = obj.Reloc{}
  2901  			rel.Type = obj.R_TLS_LE
  2902  			rel.Siz = 4
  2903  			rel.Sym = nil
  2904  			rel.Add = int64(v)
  2905  			v = 0
  2906  		}
  2907  
  2908  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2909  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2910  			return
  2911  		}
  2912  
  2913  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2914  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2915  			return
  2916  		}
  2917  
  2918  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2919  		goto putrelv
  2920  	}
  2921  
  2922  	goto bad
  2923  
  2924  putrelv:
  2925  	if rel.Siz != 0 {
  2926  		if rel.Siz != 4 {
  2927  			ctxt.Diag("bad rel")
  2928  			goto bad
  2929  		}
  2930  
  2931  		r := obj.Addrel(ctxt.Cursym)
  2932  		*r = rel
  2933  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2934  	}
  2935  
  2936  	ctxt.AsmBuf.PutInt32(v)
  2937  	return
  2938  
  2939  bad:
  2940  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2941  	return
  2942  }
  2943  
  2944  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2945  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2946  }
  2947  
  2948  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2949  	asmandsz(ctxt, p, a, o, 0, 0)
  2950  }
  2951  
  2952  func bytereg(a *obj.Addr, t *uint8) {
  2953  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2954  		a.Reg += REG_AL - REG_AX
  2955  		*t = 0
  2956  	}
  2957  }
  2958  
  2959  func unbytereg(a *obj.Addr, t *uint8) {
  2960  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2961  		a.Reg += REG_AX - REG_AL
  2962  		*t = 0
  2963  	}
  2964  }
  2965  
  2966  const (
  2967  	E = 0xff
  2968  )
  2969  
  2970  var ymovtab = []Movtab{
  2971  	/* push */
  2972  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2973  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2974  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2975  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2976  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2977  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2978  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2979  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2980  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2981  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2982  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2983  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2984  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2985  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2986  
  2987  	/* pop */
  2988  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2989  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2990  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2991  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2992  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2993  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2994  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2995  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2996  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2997  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2998  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2999  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3000  
  3001  	/* mov seg */
  3002  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3003  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3004  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3005  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3006  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3007  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3008  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3009  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3010  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3011  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3012  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3013  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3014  
  3015  	/* mov cr */
  3016  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3017  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3018  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3019  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3020  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3021  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3022  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3023  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3024  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3025  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3026  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3027  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3028  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3029  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3030  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3031  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3032  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3033  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3034  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3035  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3036  
  3037  	/* mov dr */
  3038  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3039  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3040  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3041  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3042  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3043  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3044  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3045  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3046  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3047  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3048  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3049  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3050  
  3051  	/* mov tr */
  3052  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3053  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3054  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3055  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3056  
  3057  	/* lgdt, sgdt, lidt, sidt */
  3058  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3059  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3060  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3061  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3062  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3063  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3064  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3065  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3066  
  3067  	/* lldt, sldt */
  3068  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3069  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3070  
  3071  	/* lmsw, smsw */
  3072  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3073  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3074  
  3075  	/* ltr, str */
  3076  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3077  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3078  
  3079  	/* load full pointer - unsupported
  3080  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3081  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3082  	*/
  3083  
  3084  	/* double shift */
  3085  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3086  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3087  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3088  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3089  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3090  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3091  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3092  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3093  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3094  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3095  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3096  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3097  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3098  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3099  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3100  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3101  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3102  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3103  
  3104  	/* load TLS base */
  3105  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3106  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3107  	{0, 0, 0, 0, 0, [4]uint8{}},
  3108  }
  3109  
  3110  func isax(a *obj.Addr) bool {
  3111  	switch a.Reg {
  3112  	case REG_AX, REG_AL, REG_AH:
  3113  		return true
  3114  	}
  3115  
  3116  	if a.Index == REG_AX {
  3117  		return true
  3118  	}
  3119  	return false
  3120  }
  3121  
  3122  func subreg(p *obj.Prog, from int, to int) {
  3123  	if false { /* debug['Q'] */
  3124  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3125  	}
  3126  
  3127  	if int(p.From.Reg) == from {
  3128  		p.From.Reg = int16(to)
  3129  		p.Ft = 0
  3130  	}
  3131  
  3132  	if int(p.To.Reg) == from {
  3133  		p.To.Reg = int16(to)
  3134  		p.Tt = 0
  3135  	}
  3136  
  3137  	if int(p.From.Index) == from {
  3138  		p.From.Index = int16(to)
  3139  		p.Ft = 0
  3140  	}
  3141  
  3142  	if int(p.To.Index) == from {
  3143  		p.To.Index = int16(to)
  3144  		p.Tt = 0
  3145  	}
  3146  
  3147  	if false { /* debug['Q'] */
  3148  		fmt.Printf("%v\n", p)
  3149  	}
  3150  }
  3151  
  3152  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3153  	switch op {
  3154  	case Pm, Pe, Pf2, Pf3:
  3155  		if osize != 1 {
  3156  			if op != Pm {
  3157  				ctxt.AsmBuf.Put1(byte(op))
  3158  			}
  3159  			ctxt.AsmBuf.Put1(Pm)
  3160  			z++
  3161  			op = int(o.op[z])
  3162  			break
  3163  		}
  3164  		fallthrough
  3165  
  3166  	default:
  3167  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3168  			ctxt.AsmBuf.Put1(Pm)
  3169  		}
  3170  	}
  3171  
  3172  	ctxt.AsmBuf.Put1(byte(op))
  3173  	return z
  3174  }
  3175  
  3176  var bpduff1 = []byte{
  3177  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3178  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3179  }
  3180  
  3181  var bpduff2 = []byte{
  3182  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3183  }
  3184  
  3185  // Emit VEX prefix and opcode byte.
  3186  // The three addresses are the r/m, vvvv, and reg fields.
  3187  // The reg and rm arguments appear in the same order as the
  3188  // arguments to asmand, which typically follows the call to asmvex.
  3189  // The final two arguments are the VEX prefix (see encoding above)
  3190  // and the opcode byte.
  3191  // For details about vex prefix see:
  3192  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3193  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3194  	ctxt.Vexflag = 1
  3195  	rexR := 0
  3196  	if r != nil {
  3197  		rexR = regrex[r.Reg] & Rxr
  3198  	}
  3199  	rexB := 0
  3200  	rexX := 0
  3201  	if rm != nil {
  3202  		rexB = regrex[rm.Reg] & Rxb
  3203  		rexX = regrex[rm.Index] & Rxx
  3204  	}
  3205  	vexM := (vex >> 3) & 0xF
  3206  	vexWLP := vex & 0x87
  3207  	vexV := byte(0)
  3208  	if v != nil {
  3209  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3210  	}
  3211  	vexV ^= 0xF
  3212  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3213  		// Can use 2-byte encoding.
  3214  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3215  	} else {
  3216  		// Must use 3-byte encoding.
  3217  		ctxt.AsmBuf.Put3(0xc4,
  3218  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3219  			vexV<<3|vexWLP,
  3220  		)
  3221  	}
  3222  	ctxt.AsmBuf.Put1(opcode)
  3223  }
  3224  
  3225  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3226  	ctxt.Curp = p // TODO
  3227  
  3228  	o := opindex[p.As&obj.AMask]
  3229  
  3230  	if o == nil {
  3231  		ctxt.Diag("asmins: missing op %v", p)
  3232  		return
  3233  	}
  3234  
  3235  	pre := prefixof(ctxt, p, &p.From)
  3236  	if pre != 0 {
  3237  		ctxt.AsmBuf.Put1(byte(pre))
  3238  	}
  3239  	pre = prefixof(ctxt, p, &p.To)
  3240  	if pre != 0 {
  3241  		ctxt.AsmBuf.Put1(byte(pre))
  3242  	}
  3243  
  3244  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3245  	// which encodes as SHRQ $32(DX*0), AX.
  3246  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3247  	// Change encoding generated by assemblers and compilers and remove.
  3248  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3249  		p.From3 = new(obj.Addr)
  3250  		p.From3.Type = obj.TYPE_REG
  3251  		p.From3.Reg = p.From.Index
  3252  		p.From.Index = 0
  3253  	}
  3254  
  3255  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3256  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3257  	switch p.As {
  3258  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3259  		if p.From3Type() == obj.TYPE_NONE {
  3260  			p.From3 = new(obj.Addr)
  3261  			*p.From3 = p.From
  3262  			p.From = obj.Addr{}
  3263  			p.From.Type = obj.TYPE_CONST
  3264  			p.From.Offset = p.To.Offset
  3265  			p.To.Offset = 0
  3266  		}
  3267  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3268  		if p.From3Type() == obj.TYPE_NONE {
  3269  			p.From3 = new(obj.Addr)
  3270  			*p.From3 = p.To
  3271  			p.To = obj.Addr{}
  3272  			p.To.Type = obj.TYPE_CONST
  3273  			p.To.Offset = p.From3.Offset
  3274  			p.From3.Offset = 0
  3275  		}
  3276  	}
  3277  
  3278  	if p.Ft == 0 {
  3279  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3280  	}
  3281  	if p.Tt == 0 {
  3282  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3283  	}
  3284  
  3285  	ft := int(p.Ft) * Ymax
  3286  	f3t := Ynone * Ymax
  3287  	if p.From3 != nil {
  3288  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3289  	}
  3290  	tt := int(p.Tt) * Ymax
  3291  
  3292  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3293  	z := 0
  3294  	var a *obj.Addr
  3295  	var l int
  3296  	var op int
  3297  	var q *obj.Prog
  3298  	var r *obj.Reloc
  3299  	var rel obj.Reloc
  3300  	var v int64
  3301  	for i := range o.ytab {
  3302  		yt := &o.ytab[i]
  3303  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3304  			switch o.prefix {
  3305  			case Px1: /* first option valid only in 32-bit mode */
  3306  				if ctxt.Mode == 64 && z == 0 {
  3307  					z += int(yt.zoffset) + xo
  3308  					continue
  3309  				}
  3310  			case Pq: /* 16 bit escape and opcode escape */
  3311  				ctxt.AsmBuf.Put2(Pe, Pm)
  3312  
  3313  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3314  				ctxt.Rexflag |= Pw
  3315  				ctxt.AsmBuf.Put2(Pe, Pm)
  3316  
  3317  			case Pq4: /*  66 0F 38 */
  3318  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3319  
  3320  			case Pf2, /* xmm opcode escape */
  3321  				Pf3:
  3322  				ctxt.AsmBuf.Put2(o.prefix, Pm)
  3323  
  3324  			case Pef3:
  3325  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3326  
  3327  			case Pfw: /* xmm opcode escape + REX.W */
  3328  				ctxt.Rexflag |= Pw
  3329  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3330  
  3331  			case Pm: /* opcode escape */
  3332  				ctxt.AsmBuf.Put1(Pm)
  3333  
  3334  			case Pe: /* 16 bit escape */
  3335  				ctxt.AsmBuf.Put1(Pe)
  3336  
  3337  			case Pw: /* 64-bit escape */
  3338  				if p.Mode != 64 {
  3339  					ctxt.Diag("asmins: illegal 64: %v", p)
  3340  				}
  3341  				ctxt.Rexflag |= Pw
  3342  
  3343  			case Pw8: /* 64-bit escape if z >= 8 */
  3344  				if z >= 8 {
  3345  					if p.Mode != 64 {
  3346  						ctxt.Diag("asmins: illegal 64: %v", p)
  3347  					}
  3348  					ctxt.Rexflag |= Pw
  3349  				}
  3350  
  3351  			case Pb: /* botch */
  3352  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3353  					goto bad
  3354  				}
  3355  				// NOTE(rsc): This is probably safe to do always,
  3356  				// but when enabled it chooses different encodings
  3357  				// than the old cmd/internal/obj/i386 code did,
  3358  				// which breaks our "same bits out" checks.
  3359  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3360  				// in the original obj/i386, and it would encode
  3361  				// (using a valid, shorter form) as 3c 00 if we enabled
  3362  				// the call to bytereg here.
  3363  				if p.Mode == 64 {
  3364  					bytereg(&p.From, &p.Ft)
  3365  					bytereg(&p.To, &p.Tt)
  3366  				}
  3367  
  3368  			case P32: /* 32 bit but illegal if 64-bit mode */
  3369  				if p.Mode == 64 {
  3370  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3371  				}
  3372  
  3373  			case Py: /* 64-bit only, no prefix */
  3374  				if p.Mode != 64 {
  3375  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3376  				}
  3377  
  3378  			case Py1: /* 64-bit only if z < 1, no prefix */
  3379  				if z < 1 && p.Mode != 64 {
  3380  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3381  				}
  3382  
  3383  			case Py3: /* 64-bit only if z < 3, no prefix */
  3384  				if z < 3 && p.Mode != 64 {
  3385  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3386  				}
  3387  			}
  3388  
  3389  			if z >= len(o.op) {
  3390  				log.Fatalf("asmins bad table %v", p)
  3391  			}
  3392  			op = int(o.op[z])
  3393  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3394  			if op == 0x0f && o.prefix != Pvex {
  3395  				ctxt.AsmBuf.Put1(byte(op))
  3396  				z++
  3397  				op = int(o.op[z])
  3398  			}
  3399  
  3400  			switch yt.zcase {
  3401  			default:
  3402  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3403  				return
  3404  
  3405  			case Zpseudo:
  3406  				break
  3407  
  3408  			case Zlit:
  3409  				for ; ; z++ {
  3410  					op = int(o.op[z])
  3411  					if op == 0 {
  3412  						break
  3413  					}
  3414  					ctxt.AsmBuf.Put1(byte(op))
  3415  				}
  3416  
  3417  			case Zlitm_r:
  3418  				for ; ; z++ {
  3419  					op = int(o.op[z])
  3420  					if op == 0 {
  3421  						break
  3422  					}
  3423  					ctxt.AsmBuf.Put1(byte(op))
  3424  				}
  3425  				asmand(ctxt, p, &p.From, &p.To)
  3426  
  3427  			case Zmb_r:
  3428  				bytereg(&p.From, &p.Ft)
  3429  				fallthrough
  3430  
  3431  			case Zm_r:
  3432  				ctxt.AsmBuf.Put1(byte(op))
  3433  				asmand(ctxt, p, &p.From, &p.To)
  3434  
  3435  			case Zm2_r:
  3436  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3437  				asmand(ctxt, p, &p.From, &p.To)
  3438  
  3439  			case Zm_r_xm:
  3440  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3441  				asmand(ctxt, p, &p.From, &p.To)
  3442  
  3443  			case Zm_r_xm_nr:
  3444  				ctxt.Rexflag = 0
  3445  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3446  				asmand(ctxt, p, &p.From, &p.To)
  3447  
  3448  			case Zm_r_i_xm:
  3449  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3450  				asmand(ctxt, p, &p.From, p.From3)
  3451  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3452  
  3453  			case Zibm_r, Zibr_m:
  3454  				for {
  3455  					tmp1 := z
  3456  					z++
  3457  					op = int(o.op[tmp1])
  3458  					if op == 0 {
  3459  						break
  3460  					}
  3461  					ctxt.AsmBuf.Put1(byte(op))
  3462  				}
  3463  				if yt.zcase == Zibr_m {
  3464  					asmand(ctxt, p, &p.To, p.From3)
  3465  				} else {
  3466  					asmand(ctxt, p, p.From3, &p.To)
  3467  				}
  3468  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3469  
  3470  			case Zaut_r:
  3471  				ctxt.AsmBuf.Put1(0x8d) // leal
  3472  				if p.From.Type != obj.TYPE_ADDR {
  3473  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3474  				}
  3475  				p.From.Type = obj.TYPE_MEM
  3476  				asmand(ctxt, p, &p.From, &p.To)
  3477  				p.From.Type = obj.TYPE_ADDR
  3478  
  3479  			case Zm_o:
  3480  				ctxt.AsmBuf.Put1(byte(op))
  3481  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3482  
  3483  			case Zr_m:
  3484  				ctxt.AsmBuf.Put1(byte(op))
  3485  				asmand(ctxt, p, &p.To, &p.From)
  3486  
  3487  			case Zvex_rm_v_r:
  3488  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3489  				asmand(ctxt, p, &p.From, &p.To)
  3490  
  3491  			case Zvex_i_r_v:
  3492  				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3493  				regnum := byte(0x7)
  3494  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3495  					regnum &= byte(p.From3.Reg - REG_X0)
  3496  				} else {
  3497  					regnum &= byte(p.From3.Reg - REG_Y0)
  3498  				}
  3499  				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
  3500  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3501  
  3502  			case Zvex_i_rm_v_r:
  3503  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3504  				asmand(ctxt, p, &p.From, &p.To)
  3505  				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
  3506  
  3507  			case Zvex_i_rm_r:
  3508  				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3509  				asmand(ctxt, p, p.From3, &p.To)
  3510  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3511  
  3512  			case Zvex_v_rm_r:
  3513  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3514  				asmand(ctxt, p, p.From3, &p.To)
  3515  
  3516  			case Zvex_r_v_rm:
  3517  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3518  				asmand(ctxt, p, &p.To, &p.From)
  3519  
  3520  			case Zr_m_xm:
  3521  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3522  				asmand(ctxt, p, &p.To, &p.From)
  3523  
  3524  			case Zr_m_xm_nr:
  3525  				ctxt.Rexflag = 0
  3526  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3527  				asmand(ctxt, p, &p.To, &p.From)
  3528  
  3529  			case Zo_m:
  3530  				ctxt.AsmBuf.Put1(byte(op))
  3531  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3532  
  3533  			case Zcallindreg:
  3534  				r = obj.Addrel(ctxt.Cursym)
  3535  				r.Off = int32(p.Pc)
  3536  				r.Type = obj.R_CALLIND
  3537  				r.Siz = 0
  3538  				fallthrough
  3539  
  3540  			case Zo_m64:
  3541  				ctxt.AsmBuf.Put1(byte(op))
  3542  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3543  
  3544  			case Zm_ibo:
  3545  				ctxt.AsmBuf.Put1(byte(op))
  3546  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3547  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3548  
  3549  			case Zibo_m:
  3550  				ctxt.AsmBuf.Put1(byte(op))
  3551  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3552  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3553  
  3554  			case Zibo_m_xm:
  3555  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3556  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3557  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3558  
  3559  			case Z_ib, Zib_:
  3560  				if yt.zcase == Zib_ {
  3561  					a = &p.From
  3562  				} else {
  3563  					a = &p.To
  3564  				}
  3565  				ctxt.AsmBuf.Put1(byte(op))
  3566  				if p.As == AXABORT {
  3567  					ctxt.AsmBuf.Put1(o.op[z+1])
  3568  				}
  3569  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3570  
  3571  			case Zib_rp:
  3572  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3573  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3574  
  3575  			case Zil_rp:
  3576  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3577  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3578  				if o.prefix == Pe {
  3579  					v = vaddr(ctxt, p, &p.From, nil)
  3580  					ctxt.AsmBuf.PutInt16(int16(v))
  3581  				} else {
  3582  					relput4(ctxt, p, &p.From)
  3583  				}
  3584  
  3585  			case Zo_iw:
  3586  				ctxt.AsmBuf.Put1(byte(op))
  3587  				if p.From.Type != obj.TYPE_NONE {
  3588  					v = vaddr(ctxt, p, &p.From, nil)
  3589  					ctxt.AsmBuf.PutInt16(int16(v))
  3590  				}
  3591  
  3592  			case Ziq_rp:
  3593  				v = vaddr(ctxt, p, &p.From, &rel)
  3594  				l = int(v >> 32)
  3595  				if l == 0 && rel.Siz != 8 {
  3596  					//p->mark |= 0100;
  3597  					//print("zero: %llux %v\n", v, p);
  3598  					ctxt.Rexflag &^= (0x40 | Rxw)
  3599  
  3600  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3601  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3602  					if rel.Type != 0 {
  3603  						r = obj.Addrel(ctxt.Cursym)
  3604  						*r = rel
  3605  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3606  					}
  3607  
  3608  					ctxt.AsmBuf.PutInt32(int32(v))
  3609  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3610  
  3611  					//p->mark |= 0100;
  3612  					//print("sign: %llux %v\n", v, p);
  3613  					ctxt.AsmBuf.Put1(0xc7)
  3614  					asmando(ctxt, p, &p.To, 0)
  3615  
  3616  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3617  				} else {
  3618  					//print("all: %llux %v\n", v, p);
  3619  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3620  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3621  					if rel.Type != 0 {
  3622  						r = obj.Addrel(ctxt.Cursym)
  3623  						*r = rel
  3624  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3625  					}
  3626  
  3627  					ctxt.AsmBuf.PutInt64(v)
  3628  				}
  3629  
  3630  			case Zib_rr:
  3631  				ctxt.AsmBuf.Put1(byte(op))
  3632  				asmand(ctxt, p, &p.To, &p.To)
  3633  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3634  
  3635  			case Z_il, Zil_:
  3636  				if yt.zcase == Zil_ {
  3637  					a = &p.From
  3638  				} else {
  3639  					a = &p.To
  3640  				}
  3641  				ctxt.AsmBuf.Put1(byte(op))
  3642  				if o.prefix == Pe {
  3643  					v = vaddr(ctxt, p, a, nil)
  3644  					ctxt.AsmBuf.PutInt16(int16(v))
  3645  				} else {
  3646  					relput4(ctxt, p, a)
  3647  				}
  3648  
  3649  			case Zm_ilo, Zilo_m:
  3650  				ctxt.AsmBuf.Put1(byte(op))
  3651  				if yt.zcase == Zilo_m {
  3652  					a = &p.From
  3653  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3654  				} else {
  3655  					a = &p.To
  3656  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3657  				}
  3658  
  3659  				if o.prefix == Pe {
  3660  					v = vaddr(ctxt, p, a, nil)
  3661  					ctxt.AsmBuf.PutInt16(int16(v))
  3662  				} else {
  3663  					relput4(ctxt, p, a)
  3664  				}
  3665  
  3666  			case Zil_rr:
  3667  				ctxt.AsmBuf.Put1(byte(op))
  3668  				asmand(ctxt, p, &p.To, &p.To)
  3669  				if o.prefix == Pe {
  3670  					v = vaddr(ctxt, p, &p.From, nil)
  3671  					ctxt.AsmBuf.PutInt16(int16(v))
  3672  				} else {
  3673  					relput4(ctxt, p, &p.From)
  3674  				}
  3675  
  3676  			case Z_rp:
  3677  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3678  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3679  
  3680  			case Zrp_:
  3681  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3682  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3683  
  3684  			case Zclr:
  3685  				ctxt.Rexflag &^= Pw
  3686  				ctxt.AsmBuf.Put1(byte(op))
  3687  				asmand(ctxt, p, &p.To, &p.To)
  3688  
  3689  			case Zcallcon, Zjmpcon:
  3690  				if yt.zcase == Zcallcon {
  3691  					ctxt.AsmBuf.Put1(byte(op))
  3692  				} else {
  3693  					ctxt.AsmBuf.Put1(o.op[z+1])
  3694  				}
  3695  				r = obj.Addrel(ctxt.Cursym)
  3696  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3697  				r.Type = obj.R_PCREL
  3698  				r.Siz = 4
  3699  				r.Add = p.To.Offset
  3700  				ctxt.AsmBuf.PutInt32(0)
  3701  
  3702  			case Zcallind:
  3703  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3704  				r = obj.Addrel(ctxt.Cursym)
  3705  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3706  				if p.Mode == 64 {
  3707  					r.Type = obj.R_PCREL
  3708  				} else {
  3709  					r.Type = obj.R_ADDR
  3710  				}
  3711  				r.Siz = 4
  3712  				r.Add = p.To.Offset
  3713  				r.Sym = p.To.Sym
  3714  				ctxt.AsmBuf.PutInt32(0)
  3715  
  3716  			case Zcall, Zcallduff:
  3717  				if p.To.Sym == nil {
  3718  					ctxt.Diag("call without target")
  3719  					log.Fatalf("bad code")
  3720  				}
  3721  
  3722  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3723  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3724  				}
  3725  
  3726  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3727  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3728  					// (the call jumps into the middle of the function).
  3729  					// This makes it possible to see call sites for duffcopy/duffzero in
  3730  					// BP-based profiling tools like Linux perf (which is the
  3731  					// whole point of obj.Framepointer_enabled).
  3732  					// MOVQ BP, -16(SP)
  3733  					// LEAQ -16(SP), BP
  3734  					ctxt.AsmBuf.Put(bpduff1)
  3735  				}
  3736  				ctxt.AsmBuf.Put1(byte(op))
  3737  				r = obj.Addrel(ctxt.Cursym)
  3738  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3739  				r.Sym = p.To.Sym
  3740  				r.Add = p.To.Offset
  3741  				r.Type = obj.R_CALL
  3742  				r.Siz = 4
  3743  				ctxt.AsmBuf.PutInt32(0)
  3744  
  3745  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3746  					// Pop BP pushed above.
  3747  					// MOVQ 0(BP), BP
  3748  					ctxt.AsmBuf.Put(bpduff2)
  3749  				}
  3750  
  3751  			// TODO: jump across functions needs reloc
  3752  			case Zbr, Zjmp, Zloop:
  3753  				if p.As == AXBEGIN {
  3754  					ctxt.AsmBuf.Put1(byte(op))
  3755  				}
  3756  				if p.To.Sym != nil {
  3757  					if yt.zcase != Zjmp {
  3758  						ctxt.Diag("branch to ATEXT")
  3759  						log.Fatalf("bad code")
  3760  					}
  3761  
  3762  					ctxt.AsmBuf.Put1(o.op[z+1])
  3763  					r = obj.Addrel(ctxt.Cursym)
  3764  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3765  					r.Sym = p.To.Sym
  3766  					r.Type = obj.R_PCREL
  3767  					r.Siz = 4
  3768  					ctxt.AsmBuf.PutInt32(0)
  3769  					break
  3770  				}
  3771  
  3772  				// Assumes q is in this function.
  3773  				// TODO: Check in input, preserve in brchain.
  3774  
  3775  				// Fill in backward jump now.
  3776  				q = p.Pcond
  3777  
  3778  				if q == nil {
  3779  					ctxt.Diag("jmp/branch/loop without target")
  3780  					log.Fatalf("bad code")
  3781  				}
  3782  
  3783  				if p.Back&1 != 0 {
  3784  					v = q.Pc - (p.Pc + 2)
  3785  					if v >= -128 && p.As != AXBEGIN {
  3786  						if p.As == AJCXZL {
  3787  							ctxt.AsmBuf.Put1(0x67)
  3788  						}
  3789  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3790  					} else if yt.zcase == Zloop {
  3791  						ctxt.Diag("loop too far: %v", p)
  3792  					} else {
  3793  						v -= 5 - 2
  3794  						if p.As == AXBEGIN {
  3795  							v--
  3796  						}
  3797  						if yt.zcase == Zbr {
  3798  							ctxt.AsmBuf.Put1(0x0f)
  3799  							v--
  3800  						}
  3801  
  3802  						ctxt.AsmBuf.Put1(o.op[z+1])
  3803  						ctxt.AsmBuf.PutInt32(int32(v))
  3804  					}
  3805  
  3806  					break
  3807  				}
  3808  
  3809  				// Annotate target; will fill in later.
  3810  				p.Forwd = q.Rel
  3811  
  3812  				q.Rel = p
  3813  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3814  					if p.As == AJCXZL {
  3815  						ctxt.AsmBuf.Put1(0x67)
  3816  					}
  3817  					ctxt.AsmBuf.Put2(byte(op), 0)
  3818  				} else if yt.zcase == Zloop {
  3819  					ctxt.Diag("loop too far: %v", p)
  3820  				} else {
  3821  					if yt.zcase == Zbr {
  3822  						ctxt.AsmBuf.Put1(0x0f)
  3823  					}
  3824  					ctxt.AsmBuf.Put1(o.op[z+1])
  3825  					ctxt.AsmBuf.PutInt32(0)
  3826  				}
  3827  
  3828  				break
  3829  
  3830  			/*
  3831  				v = q->pc - p->pc - 2;
  3832  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3833  					*ctxt->andptr++ = op;
  3834  					*ctxt->andptr++ = v;
  3835  				} else {
  3836  					v -= 5-2;
  3837  					if(yt.zcase == Zbr) {
  3838  						*ctxt->andptr++ = 0x0f;
  3839  						v--;
  3840  					}
  3841  					*ctxt->andptr++ = o->op[z+1];
  3842  					*ctxt->andptr++ = v;
  3843  					*ctxt->andptr++ = v>>8;
  3844  					*ctxt->andptr++ = v>>16;
  3845  					*ctxt->andptr++ = v>>24;
  3846  				}
  3847  			*/
  3848  
  3849  			case Zbyte:
  3850  				v = vaddr(ctxt, p, &p.From, &rel)
  3851  				if rel.Siz != 0 {
  3852  					rel.Siz = uint8(op)
  3853  					r = obj.Addrel(ctxt.Cursym)
  3854  					*r = rel
  3855  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3856  				}
  3857  
  3858  				ctxt.AsmBuf.Put1(byte(v))
  3859  				if op > 1 {
  3860  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3861  					if op > 2 {
  3862  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3863  						if op > 4 {
  3864  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3865  						}
  3866  					}
  3867  				}
  3868  			}
  3869  
  3870  			return
  3871  		}
  3872  		z += int(yt.zoffset) + xo
  3873  	}
  3874  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3875  		var pp obj.Prog
  3876  		var t []byte
  3877  		if p.As == mo[0].as {
  3878  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3879  				t = mo[0].op[:]
  3880  				switch mo[0].code {
  3881  				default:
  3882  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3883  
  3884  				case 0: /* lit */
  3885  					for z = 0; t[z] != E; z++ {
  3886  						ctxt.AsmBuf.Put1(t[z])
  3887  					}
  3888  
  3889  				case 1: /* r,m */
  3890  					ctxt.AsmBuf.Put1(t[0])
  3891  					asmando(ctxt, p, &p.To, int(t[1]))
  3892  
  3893  				case 2: /* m,r */
  3894  					ctxt.AsmBuf.Put1(t[0])
  3895  					asmando(ctxt, p, &p.From, int(t[1]))
  3896  
  3897  				case 3: /* r,m - 2op */
  3898  					ctxt.AsmBuf.Put2(t[0], t[1])
  3899  					asmando(ctxt, p, &p.To, int(t[2]))
  3900  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3901  
  3902  				case 4: /* m,r - 2op */
  3903  					ctxt.AsmBuf.Put2(t[0], t[1])
  3904  					asmando(ctxt, p, &p.From, int(t[2]))
  3905  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3906  
  3907  				case 5: /* load full pointer, trash heap */
  3908  					if t[0] != 0 {
  3909  						ctxt.AsmBuf.Put1(t[0])
  3910  					}
  3911  					switch p.To.Index {
  3912  					default:
  3913  						goto bad
  3914  
  3915  					case REG_DS:
  3916  						ctxt.AsmBuf.Put1(0xc5)
  3917  
  3918  					case REG_SS:
  3919  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3920  
  3921  					case REG_ES:
  3922  						ctxt.AsmBuf.Put1(0xc4)
  3923  
  3924  					case REG_FS:
  3925  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3926  
  3927  					case REG_GS:
  3928  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3929  					}
  3930  
  3931  					asmand(ctxt, p, &p.From, &p.To)
  3932  
  3933  				case 6: /* double shift */
  3934  					if t[0] == Pw {
  3935  						if p.Mode != 64 {
  3936  							ctxt.Diag("asmins: illegal 64: %v", p)
  3937  						}
  3938  						ctxt.Rexflag |= Pw
  3939  						t = t[1:]
  3940  					} else if t[0] == Pe {
  3941  						ctxt.AsmBuf.Put1(Pe)
  3942  						t = t[1:]
  3943  					}
  3944  
  3945  					switch p.From.Type {
  3946  					default:
  3947  						goto bad
  3948  
  3949  					case obj.TYPE_CONST:
  3950  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3951  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3952  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3953  
  3954  					case obj.TYPE_REG:
  3955  						switch p.From.Reg {
  3956  						default:
  3957  							goto bad
  3958  
  3959  						case REG_CL, REG_CX:
  3960  							ctxt.AsmBuf.Put2(0x0f, t[1])
  3961  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3962  						}
  3963  					}
  3964  
  3965  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3966  				// where you load the TLS base register into a register and then index off that
  3967  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3968  				// are handled in prefixof above and should not be listed here.
  3969  				case 7: /* mov tls, r */
  3970  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3971  						ctxt.Diag("invalid load of TLS: %v", p)
  3972  					}
  3973  
  3974  					if p.Mode == 32 {
  3975  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3976  						// where you load the TLS base register into a register and then index off that
  3977  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3978  						// are handled in prefixof above and should not be listed here.
  3979  						switch ctxt.Headtype {
  3980  						default:
  3981  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  3982  
  3983  						case obj.Hlinux,
  3984  							obj.Hnacl:
  3985  							if ctxt.Flag_shared {
  3986  								// Note that this is not generating the same insns as the other cases.
  3987  								//     MOV TLS, dst
  3988  								// becomes
  3989  								//     call __x86.get_pc_thunk.dst
  3990  								//     movl (gotpc + g@gotntpoff)(dst), dst
  3991  								// which is encoded as
  3992  								//     call __x86.get_pc_thunk.dst
  3993  								//     movq 0(dst), dst
  3994  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  3995  								// is g, which we can't check here, but will when we assemble the second
  3996  								// instruction.
  3997  								dst := p.To.Reg
  3998  								ctxt.AsmBuf.Put1(0xe8)
  3999  								r = obj.Addrel(ctxt.Cursym)
  4000  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4001  								r.Type = obj.R_CALL
  4002  								r.Siz = 4
  4003  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
  4004  								ctxt.AsmBuf.PutInt32(0)
  4005  
  4006  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4007  								r = obj.Addrel(ctxt.Cursym)
  4008  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4009  								r.Type = obj.R_TLS_IE
  4010  								r.Siz = 4
  4011  								r.Add = 2
  4012  								ctxt.AsmBuf.PutInt32(0)
  4013  							} else {
  4014  								// ELF TLS base is 0(GS).
  4015  								pp.From = p.From
  4016  
  4017  								pp.From.Type = obj.TYPE_MEM
  4018  								pp.From.Reg = REG_GS
  4019  								pp.From.Offset = 0
  4020  								pp.From.Index = REG_NONE
  4021  								pp.From.Scale = 0
  4022  								ctxt.AsmBuf.Put2(0x65, // GS
  4023  									0x8B)
  4024  								asmand(ctxt, p, &pp.From, &p.To)
  4025  							}
  4026  						case obj.Hplan9:
  4027  							if ctxt.Plan9privates == nil {
  4028  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4029  							}
  4030  							pp.From = obj.Addr{}
  4031  							pp.From.Type = obj.TYPE_MEM
  4032  							pp.From.Name = obj.NAME_EXTERN
  4033  							pp.From.Sym = ctxt.Plan9privates
  4034  							pp.From.Offset = 0
  4035  							pp.From.Index = REG_NONE
  4036  							ctxt.AsmBuf.Put1(0x8B)
  4037  							asmand(ctxt, p, &pp.From, &p.To)
  4038  
  4039  						case obj.Hwindows, obj.Hwindowsgui:
  4040  							// Windows TLS base is always 0x14(FS).
  4041  							pp.From = p.From
  4042  
  4043  							pp.From.Type = obj.TYPE_MEM
  4044  							pp.From.Reg = REG_FS
  4045  							pp.From.Offset = 0x14
  4046  							pp.From.Index = REG_NONE
  4047  							pp.From.Scale = 0
  4048  							ctxt.AsmBuf.Put2(0x64, // FS
  4049  								0x8B)
  4050  							asmand(ctxt, p, &pp.From, &p.To)
  4051  						}
  4052  						break
  4053  					}
  4054  
  4055  					switch ctxt.Headtype {
  4056  					default:
  4057  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4058  
  4059  					case obj.Hlinux:
  4060  						if !ctxt.Flag_shared {
  4061  							log.Fatalf("unknown TLS base location for linux without -shared")
  4062  						}
  4063  						// Note that this is not generating the same insn as the other cases.
  4064  						//     MOV TLS, R_to
  4065  						// becomes
  4066  						//     movq g@gottpoff(%rip), R_to
  4067  						// which is encoded as
  4068  						//     movq 0(%rip), R_to
  4069  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4070  						// is g, which we can't check here, but will when we assemble the second
  4071  						// instruction.
  4072  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4073  
  4074  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4075  						r = obj.Addrel(ctxt.Cursym)
  4076  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4077  						r.Type = obj.R_TLS_IE
  4078  						r.Siz = 4
  4079  						r.Add = -4
  4080  						ctxt.AsmBuf.PutInt32(0)
  4081  
  4082  					case obj.Hplan9:
  4083  						if ctxt.Plan9privates == nil {
  4084  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4085  						}
  4086  						pp.From = obj.Addr{}
  4087  						pp.From.Type = obj.TYPE_MEM
  4088  						pp.From.Name = obj.NAME_EXTERN
  4089  						pp.From.Sym = ctxt.Plan9privates
  4090  						pp.From.Offset = 0
  4091  						pp.From.Index = REG_NONE
  4092  						ctxt.Rexflag |= Pw
  4093  						ctxt.AsmBuf.Put1(0x8B)
  4094  						asmand(ctxt, p, &pp.From, &p.To)
  4095  
  4096  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4097  						// TLS base is 0(FS).
  4098  						pp.From = p.From
  4099  
  4100  						pp.From.Type = obj.TYPE_MEM
  4101  						pp.From.Name = obj.NAME_NONE
  4102  						pp.From.Reg = REG_NONE
  4103  						pp.From.Offset = 0
  4104  						pp.From.Index = REG_NONE
  4105  						pp.From.Scale = 0
  4106  						ctxt.Rexflag |= Pw
  4107  						ctxt.AsmBuf.Put2(0x64, // FS
  4108  							0x8B)
  4109  						asmand(ctxt, p, &pp.From, &p.To)
  4110  
  4111  					case obj.Hwindows, obj.Hwindowsgui:
  4112  						// Windows TLS base is always 0x28(GS).
  4113  						pp.From = p.From
  4114  
  4115  						pp.From.Type = obj.TYPE_MEM
  4116  						pp.From.Name = obj.NAME_NONE
  4117  						pp.From.Reg = REG_GS
  4118  						pp.From.Offset = 0x28
  4119  						pp.From.Index = REG_NONE
  4120  						pp.From.Scale = 0
  4121  						ctxt.Rexflag |= Pw
  4122  						ctxt.AsmBuf.Put2(0x65, // GS
  4123  							0x8B)
  4124  						asmand(ctxt, p, &pp.From, &p.To)
  4125  					}
  4126  				}
  4127  				return
  4128  			}
  4129  		}
  4130  	}
  4131  	goto bad
  4132  
  4133  bad:
  4134  	if p.Mode != 64 {
  4135  		/*
  4136  		 * here, the assembly has failed.
  4137  		 * if its a byte instruction that has
  4138  		 * unaddressable registers, try to
  4139  		 * exchange registers and reissue the
  4140  		 * instruction with the operands renamed.
  4141  		 */
  4142  		pp := *p
  4143  
  4144  		unbytereg(&pp.From, &pp.Ft)
  4145  		unbytereg(&pp.To, &pp.Tt)
  4146  
  4147  		z := int(p.From.Reg)
  4148  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4149  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4150  			// For now, different to keep bit-for-bit compatibility.
  4151  			if p.Mode == 32 {
  4152  				breg := byteswapreg(ctxt, &p.To)
  4153  				if breg != REG_AX {
  4154  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4155  					asmando(ctxt, p, &p.From, reg[breg])
  4156  					subreg(&pp, z, breg)
  4157  					doasm(ctxt, &pp)
  4158  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4159  					asmando(ctxt, p, &p.From, reg[breg])
  4160  				} else {
  4161  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4162  					subreg(&pp, z, REG_AX)
  4163  					doasm(ctxt, &pp)
  4164  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4165  				}
  4166  				return
  4167  			}
  4168  
  4169  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4170  				// We certainly don't want to exchange
  4171  				// with AX if the op is MUL or DIV.
  4172  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4173  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4174  				subreg(&pp, z, REG_BX)
  4175  				doasm(ctxt, &pp)
  4176  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4177  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4178  			} else {
  4179  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4180  				subreg(&pp, z, REG_AX)
  4181  				doasm(ctxt, &pp)
  4182  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4183  			}
  4184  			return
  4185  		}
  4186  
  4187  		z = int(p.To.Reg)
  4188  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4189  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4190  			// For now, different to keep bit-for-bit compatibility.
  4191  			if p.Mode == 32 {
  4192  				breg := byteswapreg(ctxt, &p.From)
  4193  				if breg != REG_AX {
  4194  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4195  					asmando(ctxt, p, &p.To, reg[breg])
  4196  					subreg(&pp, z, breg)
  4197  					doasm(ctxt, &pp)
  4198  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4199  					asmando(ctxt, p, &p.To, reg[breg])
  4200  				} else {
  4201  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4202  					subreg(&pp, z, REG_AX)
  4203  					doasm(ctxt, &pp)
  4204  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4205  				}
  4206  				return
  4207  			}
  4208  
  4209  			if isax(&p.From) {
  4210  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4211  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4212  				subreg(&pp, z, REG_BX)
  4213  				doasm(ctxt, &pp)
  4214  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4215  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4216  			} else {
  4217  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4218  				subreg(&pp, z, REG_AX)
  4219  				doasm(ctxt, &pp)
  4220  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4221  			}
  4222  			return
  4223  		}
  4224  	}
  4225  
  4226  	ctxt.Diag("invalid instruction: %v", p)
  4227  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4228  	return
  4229  }
  4230  
  4231  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4232  // which is not referenced in a.
  4233  // If a is empty, it returns BX to account for MULB-like instructions
  4234  // that might use DX and AX.
  4235  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4236  	cand := 1
  4237  	canc := cand
  4238  	canb := canc
  4239  	cana := canb
  4240  
  4241  	if a.Type == obj.TYPE_NONE {
  4242  		cand = 0
  4243  		cana = cand
  4244  	}
  4245  
  4246  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4247  		switch a.Reg {
  4248  		case REG_NONE:
  4249  			cand = 0
  4250  			cana = cand
  4251  
  4252  		case REG_AX, REG_AL, REG_AH:
  4253  			cana = 0
  4254  
  4255  		case REG_BX, REG_BL, REG_BH:
  4256  			canb = 0
  4257  
  4258  		case REG_CX, REG_CL, REG_CH:
  4259  			canc = 0
  4260  
  4261  		case REG_DX, REG_DL, REG_DH:
  4262  			cand = 0
  4263  		}
  4264  	}
  4265  
  4266  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4267  		switch a.Index {
  4268  		case REG_AX:
  4269  			cana = 0
  4270  
  4271  		case REG_BX:
  4272  			canb = 0
  4273  
  4274  		case REG_CX:
  4275  			canc = 0
  4276  
  4277  		case REG_DX:
  4278  			cand = 0
  4279  		}
  4280  	}
  4281  
  4282  	if cana != 0 {
  4283  		return REG_AX
  4284  	}
  4285  	if canb != 0 {
  4286  		return REG_BX
  4287  	}
  4288  	if canc != 0 {
  4289  		return REG_CX
  4290  	}
  4291  	if cand != 0 {
  4292  		return REG_DX
  4293  	}
  4294  
  4295  	ctxt.Diag("impossible byte register")
  4296  	log.Fatalf("bad code")
  4297  	return 0
  4298  }
  4299  
  4300  func isbadbyte(a *obj.Addr) bool {
  4301  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4302  }
  4303  
  4304  var naclret = []uint8{
  4305  	0x5e, // POPL SI
  4306  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4307  	0x83,
  4308  	0xe6,
  4309  	0xe0, // ANDL $~31, SI
  4310  	0x4c,
  4311  	0x01,
  4312  	0xfe, // ADDQ R15, SI
  4313  	0xff,
  4314  	0xe6, // JMP SI
  4315  }
  4316  
  4317  var naclret8 = []uint8{
  4318  	0x5d, // POPL BP
  4319  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4320  	0x83,
  4321  	0xe5,
  4322  	0xe0, // ANDL $~31, BP
  4323  	0xff,
  4324  	0xe5, // JMP BP
  4325  }
  4326  
  4327  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4328  
  4329  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4330  
  4331  var naclmovs = []uint8{
  4332  	0x89,
  4333  	0xf6, // MOVL SI, SI
  4334  	0x49,
  4335  	0x8d,
  4336  	0x34,
  4337  	0x37, // LEAQ (R15)(SI*1), SI
  4338  	0x89,
  4339  	0xff, // MOVL DI, DI
  4340  	0x49,
  4341  	0x8d,
  4342  	0x3c,
  4343  	0x3f, // LEAQ (R15)(DI*1), DI
  4344  }
  4345  
  4346  var naclstos = []uint8{
  4347  	0x89,
  4348  	0xff, // MOVL DI, DI
  4349  	0x49,
  4350  	0x8d,
  4351  	0x3c,
  4352  	0x3f, // LEAQ (R15)(DI*1), DI
  4353  }
  4354  
  4355  func nacltrunc(ctxt *obj.Link, reg int) {
  4356  	if reg >= REG_R8 {
  4357  		ctxt.AsmBuf.Put1(0x45)
  4358  	}
  4359  	reg = (reg - REG_AX) & 7
  4360  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4361  }
  4362  
  4363  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4364  	ctxt.AsmBuf.Reset()
  4365  	ctxt.Asmode = int(p.Mode)
  4366  
  4367  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4368  		switch p.As {
  4369  		case obj.ARET:
  4370  			ctxt.AsmBuf.Put(naclret8)
  4371  			return
  4372  
  4373  		case obj.ACALL,
  4374  			obj.AJMP:
  4375  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4376  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4377  			}
  4378  
  4379  		case AINT:
  4380  			ctxt.AsmBuf.Put1(0xf4)
  4381  			return
  4382  		}
  4383  	}
  4384  
  4385  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4386  		if p.As == AREP {
  4387  			ctxt.Rep++
  4388  			return
  4389  		}
  4390  
  4391  		if p.As == AREPN {
  4392  			ctxt.Repn++
  4393  			return
  4394  		}
  4395  
  4396  		if p.As == ALOCK {
  4397  			ctxt.Lock++
  4398  			return
  4399  		}
  4400  
  4401  		if p.As != ALEAQ && p.As != ALEAL {
  4402  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4403  				nacltrunc(ctxt, int(p.From.Index))
  4404  			}
  4405  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4406  				nacltrunc(ctxt, int(p.To.Index))
  4407  			}
  4408  		}
  4409  
  4410  		switch p.As {
  4411  		case obj.ARET:
  4412  			ctxt.AsmBuf.Put(naclret)
  4413  			return
  4414  
  4415  		case obj.ACALL,
  4416  			obj.AJMP:
  4417  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4418  				// ANDL $~31, reg
  4419  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4420  				// ADDQ R15, reg
  4421  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4422  			}
  4423  
  4424  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4425  				// ANDL $~31, reg
  4426  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4427  				// ADDQ R15, reg
  4428  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4429  			}
  4430  
  4431  		case AINT:
  4432  			ctxt.AsmBuf.Put1(0xf4)
  4433  			return
  4434  
  4435  		case ASCASB,
  4436  			ASCASW,
  4437  			ASCASL,
  4438  			ASCASQ,
  4439  			ASTOSB,
  4440  			ASTOSW,
  4441  			ASTOSL,
  4442  			ASTOSQ:
  4443  			ctxt.AsmBuf.Put(naclstos)
  4444  
  4445  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4446  			ctxt.AsmBuf.Put(naclmovs)
  4447  		}
  4448  
  4449  		if ctxt.Rep != 0 {
  4450  			ctxt.AsmBuf.Put1(0xf3)
  4451  			ctxt.Rep = 0
  4452  		}
  4453  
  4454  		if ctxt.Repn != 0 {
  4455  			ctxt.AsmBuf.Put1(0xf2)
  4456  			ctxt.Repn = 0
  4457  		}
  4458  
  4459  		if ctxt.Lock != 0 {
  4460  			ctxt.AsmBuf.Put1(0xf0)
  4461  			ctxt.Lock = 0
  4462  		}
  4463  	}
  4464  
  4465  	ctxt.Rexflag = 0
  4466  	ctxt.Vexflag = 0
  4467  	mark := ctxt.AsmBuf.Len()
  4468  	ctxt.Asmode = int(p.Mode)
  4469  	doasm(ctxt, p)
  4470  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4471  		/*
  4472  		 * as befits the whole approach of the architecture,
  4473  		 * the rex prefix must appear before the first opcode byte
  4474  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4475  		 * before the 0f opcode escape!), or it might be ignored.
  4476  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4477  		 */
  4478  		if p.Mode != 64 {
  4479  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4480  		}
  4481  		n := ctxt.AsmBuf.Len()
  4482  		var np int
  4483  		for np = mark; np < n; np++ {
  4484  			c := ctxt.AsmBuf.Peek(np)
  4485  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4486  				break
  4487  			}
  4488  		}
  4489  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4490  	}
  4491  
  4492  	n := ctxt.AsmBuf.Len()
  4493  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4494  		r := &ctxt.Cursym.R[i]
  4495  		if int64(r.Off) < p.Pc {
  4496  			break
  4497  		}
  4498  		if ctxt.Rexflag != 0 {
  4499  			r.Off++
  4500  		}
  4501  		if r.Type == obj.R_PCREL {
  4502  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4503  				// PC-relative addressing is relative to the end of the instruction,
  4504  				// but the relocations applied by the linker are relative to the end
  4505  				// of the relocation. Because immediate instruction
  4506  				// arguments can follow the PC-relative memory reference in the
  4507  				// instruction encoding, the two may not coincide. In this case,
  4508  				// adjust addend so that linker can keep relocating relative to the
  4509  				// end of the relocation.
  4510  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4511  			} else if p.Mode == 32 {
  4512  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4513  				// assumes that the previous instruction loaded the PC of the end
  4514  				// of that instruction into CX, so the adjustment is relative to
  4515  				// that.
  4516  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4517  			}
  4518  		}
  4519  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4520  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4521  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4522  		}
  4523  
  4524  	}
  4525  
  4526  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4527  		switch p.To.Reg {
  4528  		case REG_SP:
  4529  			ctxt.AsmBuf.Put(naclspfix)
  4530  		case REG_BP:
  4531  			ctxt.AsmBuf.Put(naclbpfix)
  4532  		}
  4533  	}
  4534  }