github.com/FenixAra/go@v0.0.0-20170127160404-96ea0918e670/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	funcAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zvex_i_rm_r
   212  	Zvex_i_r_v
   213  	Zvex_i_rm_v_r
   214  	Zmax
   215  )
   216  
   217  const (
   218  	Px   = 0
   219  	Px1  = 1    // symbolic; exact value doesn't matter
   220  	P32  = 0x32 /* 32-bit only */
   221  	Pe   = 0x66 /* operand escape */
   222  	Pm   = 0x0f /* 2byte opcode escape */
   223  	Pq   = 0xff /* both escapes: 66 0f */
   224  	Pb   = 0xfe /* byte operands */
   225  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   226  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   227  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   228  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   229  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   230  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   231  	Pw   = 0x48 /* Rex.w */
   232  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   233  	Py   = 0x80 /* defaults to 64-bit mode */
   234  	Py1  = 0x81 // symbolic; exact value doesn't matter
   235  	Py3  = 0x83 // symbolic; exact value doesn't matter
   236  	Pvex = 0x84 // symbolic: exact value doesn't matter
   237  
   238  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   239  	Rxr = 1 << 2 /* extend modrm reg */
   240  	Rxx = 1 << 1 /* extend sib index */
   241  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   242  )
   243  
   244  const (
   245  	// Encoding for VEX prefix in tables.
   246  	// The P, L, and W fields are chosen to match
   247  	// their eventual locations in the VEX prefix bytes.
   248  
   249  	// P field - 2 bits
   250  	vex66 = 1 << 0
   251  	vexF3 = 2 << 0
   252  	vexF2 = 3 << 0
   253  	// L field - 1 bit
   254  	vexLZ  = 0 << 2
   255  	vexLIG = 0 << 2
   256  	vex128 = 0 << 2
   257  	vex256 = 1 << 2
   258  	// W field - 1 bit
   259  	vexWIG = 0 << 7
   260  	vexW0  = 0 << 7
   261  	vexW1  = 1 << 7
   262  	// M field - 5 bits, but mostly reserved; we can store up to 4
   263  	vex0F   = 1 << 3
   264  	vex0F38 = 2 << 3
   265  	vex0F3A = 3 << 3
   266  
   267  	// Combinations used in the manual.
   268  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   269  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   270  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   271  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   272  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   273  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   274  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   275  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   276  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   277  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   278  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   279  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   280  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   281  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   282  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   283  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   284  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   285  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   286  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   287  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   288  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   289  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   290  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   291  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   292  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   293  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   294  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   295  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   296  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   297  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   298  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   299  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   300  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   301  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   302  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   303  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   304  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   305  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   306  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   307  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   308  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   309  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   310  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   311  )
   312  
   313  var ycover [Ymax * Ymax]uint8
   314  
   315  var reg [MAXREG]int
   316  
   317  var regrex [MAXREG + 1]int
   318  
   319  var ynone = []ytab{
   320  	{Ynone, Ynone, Ynone, Zlit, 1},
   321  }
   322  
   323  var ytext = []ytab{
   324  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   325  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   326  }
   327  
   328  var ynop = []ytab{
   329  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   330  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   331  	{Ynone, Ynone, Yml, Zpseudo, 0},
   332  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   333  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   334  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   335  	{Yml, Ynone, Ynone, Zpseudo, 0},
   336  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   337  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   338  }
   339  
   340  var yfuncdata = []ytab{
   341  	{Yi32, Ynone, Ym, Zpseudo, 0},
   342  }
   343  
   344  var ypcdata = []ytab{
   345  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   346  }
   347  
   348  var yxorb = []ytab{
   349  	{Yi32, Ynone, Yal, Zib_, 1},
   350  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   351  	{Yrb, Ynone, Ymb, Zr_m, 1},
   352  	{Ymb, Ynone, Yrb, Zm_r, 1},
   353  }
   354  
   355  var yaddl = []ytab{
   356  	{Yi8, Ynone, Yml, Zibo_m, 2},
   357  	{Yi32, Ynone, Yax, Zil_, 1},
   358  	{Yi32, Ynone, Yml, Zilo_m, 2},
   359  	{Yrl, Ynone, Yml, Zr_m, 1},
   360  	{Yml, Ynone, Yrl, Zm_r, 1},
   361  }
   362  
   363  var yincl = []ytab{
   364  	{Ynone, Ynone, Yrl, Z_rp, 1},
   365  	{Ynone, Ynone, Yml, Zo_m, 2},
   366  }
   367  
   368  var yincq = []ytab{
   369  	{Ynone, Ynone, Yml, Zo_m, 2},
   370  }
   371  
   372  var ycmpb = []ytab{
   373  	{Yal, Ynone, Yi32, Z_ib, 1},
   374  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   375  	{Ymb, Ynone, Yrb, Zm_r, 1},
   376  	{Yrb, Ynone, Ymb, Zr_m, 1},
   377  }
   378  
   379  var ycmpl = []ytab{
   380  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   381  	{Yax, Ynone, Yi32, Z_il, 1},
   382  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   383  	{Yml, Ynone, Yrl, Zm_r, 1},
   384  	{Yrl, Ynone, Yml, Zr_m, 1},
   385  }
   386  
   387  var yshb = []ytab{
   388  	{Yi1, Ynone, Ymb, Zo_m, 2},
   389  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   390  	{Ycx, Ynone, Ymb, Zo_m, 2},
   391  }
   392  
   393  var yshl = []ytab{
   394  	{Yi1, Ynone, Yml, Zo_m, 2},
   395  	{Yi32, Ynone, Yml, Zibo_m, 2},
   396  	{Ycl, Ynone, Yml, Zo_m, 2},
   397  	{Ycx, Ynone, Yml, Zo_m, 2},
   398  }
   399  
   400  var ytestl = []ytab{
   401  	{Yi32, Ynone, Yax, Zil_, 1},
   402  	{Yi32, Ynone, Yml, Zilo_m, 2},
   403  	{Yrl, Ynone, Yml, Zr_m, 1},
   404  	{Yml, Ynone, Yrl, Zm_r, 1},
   405  }
   406  
   407  var ymovb = []ytab{
   408  	{Yrb, Ynone, Ymb, Zr_m, 1},
   409  	{Ymb, Ynone, Yrb, Zm_r, 1},
   410  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   411  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   412  }
   413  
   414  var ybtl = []ytab{
   415  	{Yi8, Ynone, Yml, Zibo_m, 2},
   416  	{Yrl, Ynone, Yml, Zr_m, 1},
   417  }
   418  
   419  var ymovw = []ytab{
   420  	{Yrl, Ynone, Yml, Zr_m, 1},
   421  	{Yml, Ynone, Yrl, Zm_r, 1},
   422  	{Yi0, Ynone, Yrl, Zclr, 1},
   423  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   424  	{Yi32, Ynone, Yml, Zilo_m, 2},
   425  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   426  }
   427  
   428  var ymovl = []ytab{
   429  	{Yrl, Ynone, Yml, Zr_m, 1},
   430  	{Yml, Ynone, Yrl, Zm_r, 1},
   431  	{Yi0, Ynone, Yrl, Zclr, 1},
   432  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   433  	{Yi32, Ynone, Yml, Zilo_m, 2},
   434  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   435  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   436  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   437  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   438  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   439  }
   440  
   441  var yret = []ytab{
   442  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   443  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   444  }
   445  
   446  var ymovq = []ytab{
   447  	// valid in 32-bit mode
   448  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   449  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   450  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   451  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   452  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   453  
   454  	// valid only in 64-bit mode, usually with 64-bit prefix
   455  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   456  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   457  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   458  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   459  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   460  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   461  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   462  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   463  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   464  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   465  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   466  }
   467  
   468  var ym_rl = []ytab{
   469  	{Ym, Ynone, Yrl, Zm_r, 1},
   470  }
   471  
   472  var yrl_m = []ytab{
   473  	{Yrl, Ynone, Ym, Zr_m, 1},
   474  }
   475  
   476  var ymb_rl = []ytab{
   477  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   478  }
   479  
   480  var yml_rl = []ytab{
   481  	{Yml, Ynone, Yrl, Zm_r, 1},
   482  }
   483  
   484  var yrl_ml = []ytab{
   485  	{Yrl, Ynone, Yml, Zr_m, 1},
   486  }
   487  
   488  var yml_mb = []ytab{
   489  	{Yrb, Ynone, Ymb, Zr_m, 1},
   490  	{Ymb, Ynone, Yrb, Zm_r, 1},
   491  }
   492  
   493  var yrb_mb = []ytab{
   494  	{Yrb, Ynone, Ymb, Zr_m, 1},
   495  }
   496  
   497  var yxchg = []ytab{
   498  	{Yax, Ynone, Yrl, Z_rp, 1},
   499  	{Yrl, Ynone, Yax, Zrp_, 1},
   500  	{Yrl, Ynone, Yml, Zr_m, 1},
   501  	{Yml, Ynone, Yrl, Zm_r, 1},
   502  }
   503  
   504  var ydivl = []ytab{
   505  	{Yml, Ynone, Ynone, Zm_o, 2},
   506  }
   507  
   508  var ydivb = []ytab{
   509  	{Ymb, Ynone, Ynone, Zm_o, 2},
   510  }
   511  
   512  var yimul = []ytab{
   513  	{Yml, Ynone, Ynone, Zm_o, 2},
   514  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   515  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   516  	{Yml, Ynone, Yrl, Zm_r, 2},
   517  }
   518  
   519  var yimul3 = []ytab{
   520  	{Yi8, Yml, Yrl, Zibm_r, 2},
   521  }
   522  
   523  var ybyte = []ytab{
   524  	{Yi64, Ynone, Ynone, Zbyte, 1},
   525  }
   526  
   527  var yin = []ytab{
   528  	{Yi32, Ynone, Ynone, Zib_, 1},
   529  	{Ynone, Ynone, Ynone, Zlit, 1},
   530  }
   531  
   532  var yint = []ytab{
   533  	{Yi32, Ynone, Ynone, Zib_, 1},
   534  }
   535  
   536  var ypushl = []ytab{
   537  	{Yrl, Ynone, Ynone, Zrp_, 1},
   538  	{Ym, Ynone, Ynone, Zm_o, 2},
   539  	{Yi8, Ynone, Ynone, Zib_, 1},
   540  	{Yi32, Ynone, Ynone, Zil_, 1},
   541  }
   542  
   543  var ypopl = []ytab{
   544  	{Ynone, Ynone, Yrl, Z_rp, 1},
   545  	{Ynone, Ynone, Ym, Zo_m, 2},
   546  }
   547  
   548  var ybswap = []ytab{
   549  	{Ynone, Ynone, Yrl, Z_rp, 2},
   550  }
   551  
   552  var yscond = []ytab{
   553  	{Ynone, Ynone, Ymb, Zo_m, 2},
   554  }
   555  
   556  var yjcond = []ytab{
   557  	{Ynone, Ynone, Ybr, Zbr, 0},
   558  	{Yi0, Ynone, Ybr, Zbr, 0},
   559  	{Yi1, Ynone, Ybr, Zbr, 1},
   560  }
   561  
   562  var yloop = []ytab{
   563  	{Ynone, Ynone, Ybr, Zloop, 1},
   564  }
   565  
   566  var ycall = []ytab{
   567  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   568  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   569  	{Ynone, Ynone, Yindir, Zcallind, 2},
   570  	{Ynone, Ynone, Ybr, Zcall, 0},
   571  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   572  }
   573  
   574  var yduff = []ytab{
   575  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   576  }
   577  
   578  var yjmp = []ytab{
   579  	{Ynone, Ynone, Yml, Zo_m64, 2},
   580  	{Ynone, Ynone, Ybr, Zjmp, 0},
   581  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   582  }
   583  
   584  var yfmvd = []ytab{
   585  	{Ym, Ynone, Yf0, Zm_o, 2},
   586  	{Yf0, Ynone, Ym, Zo_m, 2},
   587  	{Yrf, Ynone, Yf0, Zm_o, 2},
   588  	{Yf0, Ynone, Yrf, Zo_m, 2},
   589  }
   590  
   591  var yfmvdp = []ytab{
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yf0, Ynone, Yrf, Zo_m, 2},
   594  }
   595  
   596  var yfmvf = []ytab{
   597  	{Ym, Ynone, Yf0, Zm_o, 2},
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  }
   600  
   601  var yfmvx = []ytab{
   602  	{Ym, Ynone, Yf0, Zm_o, 2},
   603  }
   604  
   605  var yfmvp = []ytab{
   606  	{Yf0, Ynone, Ym, Zo_m, 2},
   607  }
   608  
   609  var yfcmv = []ytab{
   610  	{Yrf, Ynone, Yf0, Zm_o, 2},
   611  }
   612  
   613  var yfadd = []ytab{
   614  	{Ym, Ynone, Yf0, Zm_o, 2},
   615  	{Yrf, Ynone, Yf0, Zm_o, 2},
   616  	{Yf0, Ynone, Yrf, Zo_m, 2},
   617  }
   618  
   619  var yfxch = []ytab{
   620  	{Yf0, Ynone, Yrf, Zo_m, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  }
   623  
   624  var ycompp = []ytab{
   625  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   626  }
   627  
   628  var ystsw = []ytab{
   629  	{Ynone, Ynone, Ym, Zo_m, 2},
   630  	{Ynone, Ynone, Yax, Zlit, 1},
   631  }
   632  
   633  var ysvrs = []ytab{
   634  	{Ynone, Ynone, Ym, Zo_m, 2},
   635  	{Ym, Ynone, Ynone, Zm_o, 2},
   636  }
   637  
   638  var ymm = []ytab{
   639  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   640  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   641  }
   642  
   643  var yxm = []ytab{
   644  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   645  }
   646  
   647  var yxm_q4 = []ytab{
   648  	{Yxm, Ynone, Yxr, Zm_r, 1},
   649  }
   650  
   651  var yxcvm1 = []ytab{
   652  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   653  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   654  }
   655  
   656  var yxcvm2 = []ytab{
   657  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   658  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   659  }
   660  
   661  var yxr = []ytab{
   662  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   663  }
   664  
   665  var yxr_ml = []ytab{
   666  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   667  }
   668  
   669  var ymr = []ytab{
   670  	{Ymr, Ynone, Ymr, Zm_r, 1},
   671  }
   672  
   673  var ymr_ml = []ytab{
   674  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   675  }
   676  
   677  var yxcmpi = []ytab{
   678  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   679  }
   680  
   681  var yxmov = []ytab{
   682  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   683  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   684  }
   685  
   686  var yxcvfl = []ytab{
   687  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   688  }
   689  
   690  var yxcvlf = []ytab{
   691  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   692  }
   693  
   694  var yxcvfq = []ytab{
   695  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   696  }
   697  
   698  var yxcvqf = []ytab{
   699  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   700  }
   701  
   702  var yps = []ytab{
   703  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   704  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   705  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   706  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   707  }
   708  
   709  var yxrrl = []ytab{
   710  	{Yxr, Ynone, Yrl, Zm_r, 1},
   711  }
   712  
   713  var ymrxr = []ytab{
   714  	{Ymr, Ynone, Yxr, Zm_r, 1},
   715  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   716  }
   717  
   718  var ymshuf = []ytab{
   719  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   720  }
   721  
   722  var ymshufb = []ytab{
   723  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   724  }
   725  
   726  var yxshuf = []ytab{
   727  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   728  }
   729  
   730  var yextrw = []ytab{
   731  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   732  }
   733  
   734  var yextr = []ytab{
   735  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   736  }
   737  
   738  var yinsrw = []ytab{
   739  	{Yu8, Yml, Yxr, Zibm_r, 2},
   740  }
   741  
   742  var yinsr = []ytab{
   743  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   744  }
   745  
   746  var ypsdq = []ytab{
   747  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   748  }
   749  
   750  var ymskb = []ytab{
   751  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   752  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   753  }
   754  
   755  var ycrc32l = []ytab{
   756  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   757  }
   758  
   759  var yprefetch = []ytab{
   760  	{Ym, Ynone, Ynone, Zm_o, 2},
   761  }
   762  
   763  var yaes = []ytab{
   764  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   765  }
   766  
   767  var yxbegin = []ytab{
   768  	{Ynone, Ynone, Ybr, Zjmp, 1},
   769  }
   770  
   771  var yxabort = []ytab{
   772  	{Yu8, Ynone, Ynone, Zib_, 1},
   773  }
   774  
   775  var ylddqu = []ytab{
   776  	{Ym, Ynone, Yxr, Zm_r, 1},
   777  }
   778  
   779  // VEX instructions that come in two forms:
   780  //	VTHING xmm2/m128, xmmV, xmm1
   781  //	VTHING ymm2/m256, ymmV, ymm1
   782  // The opcode array in the corresponding Optab entry
   783  // should contain the (VEX prefixes, opcode byte) pair
   784  // for each of the two forms.
   785  // For example, the entries for VPXOR are:
   786  //
   787  //	VPXOR xmm2/m128, xmmV, xmm1
   788  //	VEX.NDS.128.66.0F.WIG EF /r
   789  //
   790  //	VPXOR ymm2/m256, ymmV, ymm1
   791  //	VEX.NDS.256.66.0F.WIG EF /r
   792  //
   793  // The NDS/NDD/DDS part can be dropped, producing this
   794  // Optab entry:
   795  //
   796  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   797  //
   798  var yvex_xy3 = []ytab{
   799  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   800  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   801  }
   802  
   803  var yvex_ri3 = []ytab{
   804  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   805  }
   806  
   807  var yvex_xyi3 = []ytab{
   808  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   809  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   810  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   811  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   812  }
   813  
   814  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   815  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   816  }
   817  
   818  var yvex_xyi4 = []ytab{
   819  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   820  }
   821  
   822  var yvex_shift = []ytab{
   823  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   824  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   825  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   826  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   827  }
   828  
   829  var yvex_shift_dq = []ytab{
   830  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   831  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   832  }
   833  
   834  var yvex_r3 = []ytab{
   835  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   836  }
   837  
   838  var yvex_vmr3 = []ytab{
   839  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   840  }
   841  
   842  var yvex_xy2 = []ytab{
   843  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   844  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   845  }
   846  
   847  var yvex_xyr2 = []ytab{
   848  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   849  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   850  }
   851  
   852  var yvex_vmovdqa = []ytab{
   853  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   854  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   855  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   856  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   857  }
   858  
   859  var yvex_vmovntdq = []ytab{
   860  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   861  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   862  }
   863  
   864  var yvex_vpbroadcast = []ytab{
   865  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   866  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   867  }
   868  
   869  var yvex_vpbroadcast_sd = []ytab{
   870  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   871  }
   872  
   873  var ymmxmm0f38 = []ytab{
   874  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   875  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   876  }
   877  
   878  /*
   879   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   880   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   881   * the entry with the given p->as and then looks through the ytable for that
   882   * instruction (the second field in the optab struct) for a line whose first
   883   * two values match the Ytypes of the p->from and p->to operands.  The function
   884   * oclass in span.c computes the specific Ytype of an operand and then the set
   885   * of more general Ytypes that it satisfies is implied by the ycover table, set
   886   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   887   * from the more general 8-bit constants, but instinit says
   888   *
   889   *        ycover[Yi0*Ymax + Ys32] = 1;
   890   *        ycover[Yi1*Ymax + Ys32] = 1;
   891   *        ycover[Yi8*Ymax + Ys32] = 1;
   892   *
   893   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   894   * if that's what an instruction can handle.
   895   *
   896   * In parallel with the scan through the ytable for the appropriate line, there
   897   * is a z pointer that starts out pointing at the strange magic byte list in
   898   * the Optab struct.  With each step past a non-matching ytable line, z
   899   * advances by the 4th entry in the line.  When a matching line is found, that
   900   * z pointer has the extra data to use in laying down the instruction bytes.
   901   * The actual bytes laid down are a function of the 3rd entry in the line (that
   902   * is, the Ztype) and the z bytes.
   903   *
   904   * For example, let's look at AADDL.  The optab line says:
   905   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   906   *
   907   * and yaddl says
   908   *        uchar   yaddl[] =
   909   *        {
   910   *                Yi8,    Yml,    Zibo_m, 2,
   911   *                Yi32,   Yax,    Zil_,   1,
   912   *                Yi32,   Yml,    Zilo_m, 2,
   913   *                Yrl,    Yml,    Zr_m,   1,
   914   *                Yml,    Yrl,    Zm_r,   1,
   915   *                0
   916   *        };
   917   *
   918   * so there are 5 possible types of ADDL instruction that can be laid down, and
   919   * possible states used to lay them down (Ztype and z pointer, assuming z
   920   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   921   *
   922   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   923   *        Yi32, Yax -> Zil_, z+2 (0x05)
   924   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   925   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   926   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   927   *
   928   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   929   * relatively straightforward as this program goes.
   930   *
   931   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   932   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   933   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   934   * Zilo_m is the same but a long (32-bit) immediate.
   935   */
   936  var optab =
   937  /*	as, ytab, andproto, opcode */
   938  []Optab{
   939  	{obj.AXXX, nil, 0, [23]uint8{}},
   940  	{AAAA, ynone, P32, [23]uint8{0x37}},
   941  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   942  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   943  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   944  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   945  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   946  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   947  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   948  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   949  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   950  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   951  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   952  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   953  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   954  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   955  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   956  	{AADJSP, nil, 0, [23]uint8{}},
   957  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   958  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   959  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   960  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   961  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   962  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   963  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   964  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   965  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   966  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   967  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   968  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   969  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   970  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   971  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   972  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   973  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   974  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   975  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   976  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   977  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   978  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   979  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   980  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   981  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   982  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   983  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   984  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   985  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   986  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   987  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   988  	{ABYTE, ybyte, Px, [23]uint8{1}},
   989  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   990  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   991  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   992  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   993  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   994  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   995  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   996  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   997  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   998  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   999  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1000  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1001  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1002  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1003  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1004  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1005  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1006  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1007  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1008  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1009  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1010  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1011  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1012  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1013  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1014  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1015  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1016  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1017  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1018  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1019  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1020  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1021  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1022  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1023  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1024  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1025  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1026  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1027  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1028  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1029  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1030  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1031  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1032  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1033  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1034  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1035  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1036  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1037  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1038  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1039  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1040  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1041  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1042  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1043  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1044  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1045  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1046  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1047  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1048  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1049  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1050  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1051  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1052  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1053  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1054  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1055  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1056  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1057  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1058  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1059  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1060  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1061  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1062  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1063  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1064  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1065  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1066  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1067  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1068  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1069  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1070  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1071  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1072  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1073  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1074  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1075  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1076  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1077  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1078  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1079  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1080  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1081  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1082  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1083  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1084  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1085  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1086  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1087  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1088  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1089  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1090  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1091  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1092  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1093  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1094  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1095  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1096  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1097  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1098  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1099  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1100  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1101  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1102  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1103  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1104  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1105  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1106  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1107  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1108  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1109  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1110  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1111  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1112  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1113  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1114  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1115  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1116  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1117  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1118  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1119  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1120  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1121  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1122  	{AINT, yint, Px, [23]uint8{0xcd}},
  1123  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1124  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1125  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1126  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1127  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1128  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1129  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1130  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1131  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1132  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1133  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1134  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1135  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1136  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1137  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1138  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1139  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1140  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1141  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1142  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1143  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1144  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1145  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1146  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1147  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1148  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1149  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1150  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1151  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1152  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1153  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1154  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1155  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1156  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1157  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1158  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1159  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1160  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1161  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1162  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1163  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1164  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1165  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1166  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1167  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1168  	{ALONG, ybyte, Px, [23]uint8{4}},
  1169  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1170  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1171  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1172  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1173  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1174  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1175  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1176  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1177  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1178  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1179  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1180  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1181  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1182  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1183  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1184  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1185  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1186  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1187  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1188  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1189  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1190  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1191  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1192  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1193  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1194  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1195  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1196  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1197  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1198  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1199  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1200  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1201  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1202  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1203  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1204  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1205  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1206  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1207  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1208  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1209  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1210  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1211  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1212  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1213  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1214  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1215  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1216  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1217  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1218  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1219  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1220  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1221  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1222  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1223  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1224  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1225  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1226  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1227  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1228  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1229  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1230  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1231  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1232  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1233  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1234  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1235  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1236  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1237  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1238  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1239  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1240  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1241  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1242  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1243  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1244  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1245  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1246  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1247  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1249  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1250  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1251  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1252  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1253  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1254  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1255  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1256  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1257  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1258  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1259  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1260  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1261  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1262  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1263  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1264  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1265  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1266  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1267  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1268  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1269  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1270  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1271  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1272  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1273  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1274  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1275  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1276  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1277  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1278  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1279  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1280  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1281  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1282  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1283  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1284  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1285  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1286  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1287  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1288  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1289  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1290  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1291  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1292  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1293  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1294  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1295  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1296  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1297  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1298  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1299  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1300  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1301  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1302  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1303  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1304  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1305  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1306  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1307  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1308  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1309  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1310  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1311  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1312  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1313  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1314  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1315  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1316  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1317  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1318  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1319  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1320  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1321  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1322  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1323  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1324  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1325  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1326  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1327  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1328  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1329  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1330  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1331  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1332  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1333  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1334  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1335  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1336  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1337  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1338  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1339  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1340  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1341  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1342  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1343  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1344  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1345  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1346  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1347  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1348  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1349  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1350  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1351  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1352  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1353  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1354  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1355  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1356  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1357  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1358  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1359  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1360  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1361  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1362  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1363  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1364  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1365  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1366  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1367  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1368  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1369  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1370  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1371  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1372  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1373  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1374  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1375  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1376  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1377  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1378  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1379  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1380  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1381  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1382  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1383  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1384  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1385  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1386  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1387  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1388  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1389  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1390  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1391  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1392  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1393  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1394  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1395  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1396  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1397  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1398  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1399  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1400  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1401  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1402  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1403  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1404  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1405  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1406  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1407  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1408  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1409  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1410  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1411  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1412  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1413  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1414  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1415  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1416  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1417  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1418  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1419  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1420  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1421  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1422  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1423  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1424  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1425  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1426  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1427  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1428  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1429  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1430  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1431  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1432  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1433  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1434  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1435  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1436  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1437  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1438  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1439  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1440  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1441  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1442  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1443  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1444  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1445  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1446  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1447  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1448  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1449  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1450  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1451  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1452  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1453  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1454  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1455  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1456  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1457  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1458  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1459  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1460  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1461  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1462  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1463  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1464  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1465  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1466  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1467  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1468  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1469  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1470  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1471  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1472  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1473  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1474  	{AWORD, ybyte, Px, [23]uint8{2}},
  1475  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1476  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1477  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1478  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1479  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1480  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1481  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1482  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1483  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1484  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1485  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1487  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1488  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1489  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1490  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1491  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1492  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1493  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1494  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1495  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1496  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1497  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1498  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1499  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1500  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1501  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1502  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1503  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1504  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1505  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1506  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1507  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1508  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1509  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1510  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1511  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1512  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1513  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1514  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1515  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1516  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1517  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1518  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1519  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1520  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1521  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1522  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1523  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1524  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1525  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1526  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1527  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1528  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1529  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1530  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1531  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1532  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1533  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1534  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1535  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1536  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1537  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1538  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1539  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1541  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1542  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1543  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1544  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1545  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1546  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1547  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1548  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1549  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1551  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1552  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1553  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1554  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1555  	{AFFREE, nil, 0, [23]uint8{}},
  1556  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1557  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1558  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1559  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1560  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1561  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1562  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1563  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1564  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1565  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1566  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1567  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1568  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1569  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1570  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1571  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1572  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1573  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1574  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1575  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1576  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1577  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1578  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1579  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1580  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1581  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1582  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1583  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1584  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1585  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1586  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1587  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1588  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1589  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1590  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1591  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1592  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1593  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1594  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1595  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1596  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1597  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1598  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1599  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1600  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1601  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1602  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1603  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1604  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1605  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1606  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1607  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1608  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1609  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1610  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1611  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1612  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1613  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1614  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1615  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1616  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1617  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1618  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1619  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1620  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1621  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1622  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1623  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1624  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1625  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1626  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1627  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1628  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1629  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1630  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1631  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1632  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1633  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1634  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1635  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1636  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1637  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1638  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1639  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1640  
  1641  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1642  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1643  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1644  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1645  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1646  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1647  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1648  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1649  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1650  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1651  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1652  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1653  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1654  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1655  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1656  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1657  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1658  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1659  
  1660  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1661  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1662  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1663  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1664  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1665  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1666  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1667  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1668  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1669  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1670  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1671  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1672  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1673  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1674  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1675  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1676  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1677  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1678  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1679  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1680  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1681  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1682  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1683  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1684  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1685  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1686  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1687  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1688  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1689  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1690  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1691  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1692  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1693  
  1694  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1695  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1696  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1697  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1698  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1699  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1700  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1701  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1702  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1703  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1704  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1705  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1706  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1707  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.AEND, nil, 0, [23]uint8{}},
  1710  	{0, nil, 0, [23]uint8{}},
  1711  }
  1712  
  1713  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1714  
  1715  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1716  // This happens on systems like Solaris that call .so functions instead of system calls.
  1717  // It does not seem to be necessary for any other systems. This is probably working
  1718  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1719  // what that bug is. And this does fix it.
  1720  func isextern(s *obj.LSym) bool {
  1721  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1722  	return strings.HasPrefix(s.Name, "libc_")
  1723  }
  1724  
  1725  // single-instruction no-ops of various lengths.
  1726  // constructed by hand and disassembled with gdb to verify.
  1727  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1728  var nop = [][16]uint8{
  1729  	{0x90},
  1730  	{0x66, 0x90},
  1731  	{0x0F, 0x1F, 0x00},
  1732  	{0x0F, 0x1F, 0x40, 0x00},
  1733  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1734  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1735  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1736  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1737  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1738  }
  1739  
  1740  // Native Client rejects the repeated 0x66 prefix.
  1741  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1742  func fillnop(p []byte, n int) {
  1743  	var m int
  1744  
  1745  	for n > 0 {
  1746  		m = n
  1747  		if m > len(nop) {
  1748  			m = len(nop)
  1749  		}
  1750  		copy(p[:m], nop[m-1][:m])
  1751  		p = p[m:]
  1752  		n -= m
  1753  	}
  1754  }
  1755  
  1756  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1757  	s.Grow(int64(c) + int64(pad))
  1758  	fillnop(s.P[c:], int(pad))
  1759  	return c + pad
  1760  }
  1761  
  1762  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1763  	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
  1764  		return l
  1765  	}
  1766  	return q
  1767  }
  1768  
  1769  func span6(ctxt *obj.Link, s *obj.LSym) {
  1770  	ctxt.Cursym = s
  1771  
  1772  	if s.P != nil {
  1773  		return
  1774  	}
  1775  
  1776  	if ycover[0] == 0 {
  1777  		instinit()
  1778  	}
  1779  
  1780  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1781  		if p.To.Type == obj.TYPE_BRANCH {
  1782  			if p.Pcond == nil {
  1783  				p.Pcond = p
  1784  			}
  1785  		}
  1786  		if p.As == AADJSP {
  1787  			p.To.Type = obj.TYPE_REG
  1788  			p.To.Reg = REG_SP
  1789  			v := int32(-p.From.Offset)
  1790  			p.From.Offset = int64(v)
  1791  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1792  			if v < 0 {
  1793  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1794  				v = -v
  1795  				p.From.Offset = int64(v)
  1796  			}
  1797  
  1798  			if v == 0 {
  1799  				p.As = obj.ANOP
  1800  			}
  1801  		}
  1802  	}
  1803  
  1804  	var q *obj.Prog
  1805  	var count int64 // rough count of number of instructions
  1806  	for p := s.Text; p != nil; p = p.Link {
  1807  		count++
  1808  		p.Back = 2 // use short branches first time through
  1809  		q = p.Pcond
  1810  		if q != nil && (q.Back&2 != 0) {
  1811  			p.Back |= 1 // backward jump
  1812  			q.Back |= 4 // loop head
  1813  		}
  1814  
  1815  		if p.As == AADJSP {
  1816  			p.To.Type = obj.TYPE_REG
  1817  			p.To.Reg = REG_SP
  1818  			v := int32(-p.From.Offset)
  1819  			p.From.Offset = int64(v)
  1820  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1821  			if v < 0 {
  1822  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1823  				v = -v
  1824  				p.From.Offset = int64(v)
  1825  			}
  1826  
  1827  			if v == 0 {
  1828  				p.As = obj.ANOP
  1829  			}
  1830  		}
  1831  	}
  1832  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1833  
  1834  	n := 0
  1835  	var c int32
  1836  	errors := ctxt.Errors
  1837  	var deferreturn *obj.LSym
  1838  	if ctxt.Headtype == obj.Hnacl {
  1839  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1840  	}
  1841  	for {
  1842  		loop := int32(0)
  1843  		for i := range s.R {
  1844  			s.R[i] = obj.Reloc{}
  1845  		}
  1846  		s.R = s.R[:0]
  1847  		s.P = s.P[:0]
  1848  		c = 0
  1849  		for p := s.Text; p != nil; p = p.Link {
  1850  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1851  
  1852  				// pad everything to avoid crossing 32-byte boundary
  1853  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1854  					c = naclpad(ctxt, s, c, -c&31)
  1855  				}
  1856  
  1857  				// pad call deferreturn to start at 32-byte boundary
  1858  				// so that subtracting 5 in jmpdefer will jump back
  1859  				// to that boundary and rerun the call.
  1860  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1861  					c = naclpad(ctxt, s, c, -c&31)
  1862  				}
  1863  
  1864  				// pad call to end at 32-byte boundary
  1865  				if p.As == obj.ACALL {
  1866  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1867  				}
  1868  
  1869  				// the linker treats REP and STOSQ as different instructions
  1870  				// but in fact the REP is a prefix on the STOSQ.
  1871  				// make sure REP has room for 2 more bytes, so that
  1872  				// padding will not be inserted before the next instruction.
  1873  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1874  					c = naclpad(ctxt, s, c, -c&31)
  1875  				}
  1876  
  1877  				// same for LOCK.
  1878  				// various instructions follow; the longest is 4 bytes.
  1879  				// give ourselves 8 bytes so as to avoid surprises.
  1880  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1881  					c = naclpad(ctxt, s, c, -c&31)
  1882  				}
  1883  			}
  1884  
  1885  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1886  				// pad with NOPs
  1887  				v := -c & (LoopAlign - 1)
  1888  
  1889  				if v <= MaxLoopPad {
  1890  					s.Grow(int64(c) + int64(v))
  1891  					fillnop(s.P[c:], int(v))
  1892  					c += v
  1893  				}
  1894  			}
  1895  
  1896  			p.Pc = int64(c)
  1897  
  1898  			// process forward jumps to p
  1899  			for q = p.Rel; q != nil; q = q.Forwd {
  1900  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1901  				if q.Back&2 != 0 { // short
  1902  					if v > 127 {
  1903  						loop++
  1904  						q.Back ^= 2
  1905  					}
  1906  
  1907  					if q.As == AJCXZL || q.As == AXBEGIN {
  1908  						s.P[q.Pc+2] = byte(v)
  1909  					} else {
  1910  						s.P[q.Pc+1] = byte(v)
  1911  					}
  1912  				} else {
  1913  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1914  				}
  1915  			}
  1916  
  1917  			p.Rel = nil
  1918  
  1919  			p.Pc = int64(c)
  1920  			asmins(ctxt, p)
  1921  			m := ctxt.AsmBuf.Len()
  1922  			if int(p.Isize) != m {
  1923  				p.Isize = uint8(m)
  1924  				loop++
  1925  			}
  1926  
  1927  			s.Grow(p.Pc + int64(m))
  1928  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1929  			c += int32(m)
  1930  		}
  1931  
  1932  		n++
  1933  		if n > 20 {
  1934  			ctxt.Diag("span must be looping")
  1935  			log.Fatalf("loop")
  1936  		}
  1937  		if loop == 0 {
  1938  			break
  1939  		}
  1940  		if ctxt.Errors > errors {
  1941  			return
  1942  		}
  1943  	}
  1944  
  1945  	if ctxt.Headtype == obj.Hnacl {
  1946  		c = naclpad(ctxt, s, c, -c&31)
  1947  	}
  1948  
  1949  	s.Size = int64(c)
  1950  
  1951  	if false { /* debug['a'] > 1 */
  1952  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1953  		var i int
  1954  		for i = 0; i < len(s.P); i++ {
  1955  			fmt.Printf(" %.2x", s.P[i])
  1956  			if i%16 == 15 {
  1957  				fmt.Printf("\n  %.6x", uint(i+1))
  1958  			}
  1959  		}
  1960  
  1961  		if i%16 != 0 {
  1962  			fmt.Printf("\n")
  1963  		}
  1964  
  1965  		for i := 0; i < len(s.R); i++ {
  1966  			r := &s.R[i]
  1967  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1968  		}
  1969  	}
  1970  }
  1971  
  1972  func instinit() {
  1973  	for i := 1; optab[i].as != 0; i++ {
  1974  		c := optab[i].as
  1975  		if opindex[c&obj.AMask] != nil {
  1976  			log.Fatalf("phase error in optab: %d (%v)", i, c)
  1977  		}
  1978  		opindex[c&obj.AMask] = &optab[i]
  1979  	}
  1980  
  1981  	for i := 0; i < Ymax; i++ {
  1982  		ycover[i*Ymax+i] = 1
  1983  	}
  1984  
  1985  	ycover[Yi0*Ymax+Yi8] = 1
  1986  	ycover[Yi1*Ymax+Yi8] = 1
  1987  	ycover[Yu7*Ymax+Yi8] = 1
  1988  
  1989  	ycover[Yi0*Ymax+Yu7] = 1
  1990  	ycover[Yi1*Ymax+Yu7] = 1
  1991  
  1992  	ycover[Yi0*Ymax+Yu8] = 1
  1993  	ycover[Yi1*Ymax+Yu8] = 1
  1994  	ycover[Yu7*Ymax+Yu8] = 1
  1995  
  1996  	ycover[Yi0*Ymax+Ys32] = 1
  1997  	ycover[Yi1*Ymax+Ys32] = 1
  1998  	ycover[Yu7*Ymax+Ys32] = 1
  1999  	ycover[Yu8*Ymax+Ys32] = 1
  2000  	ycover[Yi8*Ymax+Ys32] = 1
  2001  
  2002  	ycover[Yi0*Ymax+Yi32] = 1
  2003  	ycover[Yi1*Ymax+Yi32] = 1
  2004  	ycover[Yu7*Ymax+Yi32] = 1
  2005  	ycover[Yu8*Ymax+Yi32] = 1
  2006  	ycover[Yi8*Ymax+Yi32] = 1
  2007  	ycover[Ys32*Ymax+Yi32] = 1
  2008  
  2009  	ycover[Yi0*Ymax+Yi64] = 1
  2010  	ycover[Yi1*Ymax+Yi64] = 1
  2011  	ycover[Yu7*Ymax+Yi64] = 1
  2012  	ycover[Yu8*Ymax+Yi64] = 1
  2013  	ycover[Yi8*Ymax+Yi64] = 1
  2014  	ycover[Ys32*Ymax+Yi64] = 1
  2015  	ycover[Yi32*Ymax+Yi64] = 1
  2016  
  2017  	ycover[Yal*Ymax+Yrb] = 1
  2018  	ycover[Ycl*Ymax+Yrb] = 1
  2019  	ycover[Yax*Ymax+Yrb] = 1
  2020  	ycover[Ycx*Ymax+Yrb] = 1
  2021  	ycover[Yrx*Ymax+Yrb] = 1
  2022  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2023  
  2024  	ycover[Ycl*Ymax+Ycx] = 1
  2025  
  2026  	ycover[Yax*Ymax+Yrx] = 1
  2027  	ycover[Ycx*Ymax+Yrx] = 1
  2028  
  2029  	ycover[Yax*Ymax+Yrl] = 1
  2030  	ycover[Ycx*Ymax+Yrl] = 1
  2031  	ycover[Yrx*Ymax+Yrl] = 1
  2032  	ycover[Yrl32*Ymax+Yrl] = 1
  2033  
  2034  	ycover[Yf0*Ymax+Yrf] = 1
  2035  
  2036  	ycover[Yal*Ymax+Ymb] = 1
  2037  	ycover[Ycl*Ymax+Ymb] = 1
  2038  	ycover[Yax*Ymax+Ymb] = 1
  2039  	ycover[Ycx*Ymax+Ymb] = 1
  2040  	ycover[Yrx*Ymax+Ymb] = 1
  2041  	ycover[Yrb*Ymax+Ymb] = 1
  2042  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2043  	ycover[Ym*Ymax+Ymb] = 1
  2044  
  2045  	ycover[Yax*Ymax+Yml] = 1
  2046  	ycover[Ycx*Ymax+Yml] = 1
  2047  	ycover[Yrx*Ymax+Yml] = 1
  2048  	ycover[Yrl*Ymax+Yml] = 1
  2049  	ycover[Yrl32*Ymax+Yml] = 1
  2050  	ycover[Ym*Ymax+Yml] = 1
  2051  
  2052  	ycover[Yax*Ymax+Ymm] = 1
  2053  	ycover[Ycx*Ymax+Ymm] = 1
  2054  	ycover[Yrx*Ymax+Ymm] = 1
  2055  	ycover[Yrl*Ymax+Ymm] = 1
  2056  	ycover[Yrl32*Ymax+Ymm] = 1
  2057  	ycover[Ym*Ymax+Ymm] = 1
  2058  	ycover[Ymr*Ymax+Ymm] = 1
  2059  
  2060  	ycover[Ym*Ymax+Yxm] = 1
  2061  	ycover[Yxr*Ymax+Yxm] = 1
  2062  
  2063  	ycover[Ym*Ymax+Yym] = 1
  2064  	ycover[Yyr*Ymax+Yym] = 1
  2065  
  2066  	for i := 0; i < MAXREG; i++ {
  2067  		reg[i] = -1
  2068  		if i >= REG_AL && i <= REG_R15B {
  2069  			reg[i] = (i - REG_AL) & 7
  2070  			if i >= REG_SPB && i <= REG_DIB {
  2071  				regrex[i] = 0x40
  2072  			}
  2073  			if i >= REG_R8B && i <= REG_R15B {
  2074  				regrex[i] = Rxr | Rxx | Rxb
  2075  			}
  2076  		}
  2077  
  2078  		if i >= REG_AH && i <= REG_BH {
  2079  			reg[i] = 4 + ((i - REG_AH) & 7)
  2080  		}
  2081  		if i >= REG_AX && i <= REG_R15 {
  2082  			reg[i] = (i - REG_AX) & 7
  2083  			if i >= REG_R8 {
  2084  				regrex[i] = Rxr | Rxx | Rxb
  2085  			}
  2086  		}
  2087  
  2088  		if i >= REG_F0 && i <= REG_F0+7 {
  2089  			reg[i] = (i - REG_F0) & 7
  2090  		}
  2091  		if i >= REG_M0 && i <= REG_M0+7 {
  2092  			reg[i] = (i - REG_M0) & 7
  2093  		}
  2094  		if i >= REG_X0 && i <= REG_X0+15 {
  2095  			reg[i] = (i - REG_X0) & 7
  2096  			if i >= REG_X0+8 {
  2097  				regrex[i] = Rxr | Rxx | Rxb
  2098  			}
  2099  		}
  2100  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2101  			reg[i] = (i - REG_Y0) & 7
  2102  			if i >= REG_Y0+8 {
  2103  				regrex[i] = Rxr | Rxx | Rxb
  2104  			}
  2105  		}
  2106  
  2107  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2108  			regrex[i] = Rxr
  2109  		}
  2110  	}
  2111  }
  2112  
  2113  var isAndroid = (obj.GOOS == "android")
  2114  
  2115  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2116  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2117  		return 0
  2118  	}
  2119  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2120  		switch a.Reg {
  2121  		case REG_CS:
  2122  			return 0x2e
  2123  
  2124  		case REG_DS:
  2125  			return 0x3e
  2126  
  2127  		case REG_ES:
  2128  			return 0x26
  2129  
  2130  		case REG_FS:
  2131  			return 0x64
  2132  
  2133  		case REG_GS:
  2134  			return 0x65
  2135  
  2136  		case REG_TLS:
  2137  			// NOTE: Systems listed here should be only systems that
  2138  			// support direct TLS references like 8(TLS) implemented as
  2139  			// direct references from FS or GS. Systems that require
  2140  			// the initial-exec model, where you load the TLS base into
  2141  			// a register and then index from that register, do not reach
  2142  			// this code and should not be listed.
  2143  			if p.Mode == 32 {
  2144  				switch ctxt.Headtype {
  2145  				default:
  2146  					if isAndroid {
  2147  						return 0x65 // GS
  2148  					}
  2149  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2150  
  2151  				case obj.Hdarwin,
  2152  					obj.Hdragonfly,
  2153  					obj.Hfreebsd,
  2154  					obj.Hnetbsd,
  2155  					obj.Hopenbsd:
  2156  					return 0x65 // GS
  2157  				}
  2158  			}
  2159  
  2160  			switch ctxt.Headtype {
  2161  			default:
  2162  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2163  
  2164  			case obj.Hlinux:
  2165  				if isAndroid {
  2166  					return 0x64 // FS
  2167  				}
  2168  
  2169  				if ctxt.Flag_shared {
  2170  					log.Fatalf("unknown TLS base register for linux with -shared")
  2171  				} else {
  2172  					return 0x64 // FS
  2173  				}
  2174  
  2175  			case obj.Hdragonfly,
  2176  				obj.Hfreebsd,
  2177  				obj.Hnetbsd,
  2178  				obj.Hopenbsd,
  2179  				obj.Hsolaris:
  2180  				return 0x64 // FS
  2181  
  2182  			case obj.Hdarwin:
  2183  				return 0x65 // GS
  2184  			}
  2185  		}
  2186  	}
  2187  
  2188  	if p.Mode == 32 {
  2189  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2190  			// When building for inclusion into a shared library, an instruction of the form
  2191  			//     MOVL 0(CX)(TLS*1), AX
  2192  			// becomes
  2193  			//     mov %gs:(%ecx), %eax
  2194  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2195  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2196  			// a shared library the instruction it becomes
  2197  			//     mov 0x0(%ecx), $eax
  2198  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2199  			if a.Offset != 0 {
  2200  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2201  			}
  2202  			return 0x65 // GS
  2203  		}
  2204  		return 0
  2205  	}
  2206  
  2207  	switch a.Index {
  2208  	case REG_CS:
  2209  		return 0x2e
  2210  
  2211  	case REG_DS:
  2212  		return 0x3e
  2213  
  2214  	case REG_ES:
  2215  		return 0x26
  2216  
  2217  	case REG_TLS:
  2218  		if ctxt.Flag_shared {
  2219  			// When building for inclusion into a shared library, an instruction of the form
  2220  			//     MOV 0(CX)(TLS*1), AX
  2221  			// becomes
  2222  			//     mov %fs:(%rcx), %rax
  2223  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2224  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2225  			// a shared library the instruction does not require a prefix.
  2226  			if a.Offset != 0 {
  2227  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2228  			}
  2229  			return 0x64
  2230  		}
  2231  
  2232  	case REG_FS:
  2233  		return 0x64
  2234  
  2235  	case REG_GS:
  2236  		return 0x65
  2237  	}
  2238  
  2239  	return 0
  2240  }
  2241  
  2242  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2243  	switch a.Type {
  2244  	case obj.TYPE_NONE:
  2245  		return Ynone
  2246  
  2247  	case obj.TYPE_BRANCH:
  2248  		return Ybr
  2249  
  2250  	case obj.TYPE_INDIR:
  2251  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2252  			return Yindir
  2253  		}
  2254  		return Yxxx
  2255  
  2256  	case obj.TYPE_MEM:
  2257  		if a.Index == REG_SP {
  2258  			// Can't use SP as the index register
  2259  			return Yxxx
  2260  		}
  2261  		if ctxt.Asmode == 64 {
  2262  			switch a.Name {
  2263  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2264  				// Global variables can't use index registers and their
  2265  				// base register is %rip (%rip is encoded as REG_NONE).
  2266  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2267  					return Yxxx
  2268  				}
  2269  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2270  				// These names must have a base of SP.  The old compiler
  2271  				// uses 0 for the base register. SSA uses REG_SP.
  2272  				if a.Reg != REG_SP && a.Reg != 0 {
  2273  					return Yxxx
  2274  				}
  2275  			case obj.NAME_NONE:
  2276  				// everything is ok
  2277  			default:
  2278  				// unknown name
  2279  				return Yxxx
  2280  			}
  2281  		}
  2282  		return Ym
  2283  
  2284  	case obj.TYPE_ADDR:
  2285  		switch a.Name {
  2286  		case obj.NAME_GOTREF:
  2287  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2288  			return Yxxx
  2289  
  2290  		case obj.NAME_EXTERN,
  2291  			obj.NAME_STATIC:
  2292  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
  2293  				return Yi32
  2294  			}
  2295  			return Yiauto // use pc-relative addressing
  2296  
  2297  		case obj.NAME_AUTO,
  2298  			obj.NAME_PARAM:
  2299  			return Yiauto
  2300  		}
  2301  
  2302  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2303  		// and got Yi32 in an earlier version of this code.
  2304  		// Keep doing that until we fix yduff etc.
  2305  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2306  			return Yi32
  2307  		}
  2308  
  2309  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2310  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2311  		}
  2312  		fallthrough
  2313  
  2314  		// fall through
  2315  
  2316  	case obj.TYPE_CONST:
  2317  		if a.Sym != nil {
  2318  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2319  		}
  2320  
  2321  		v := a.Offset
  2322  		if p.Mode == 32 {
  2323  			v = int64(int32(v))
  2324  		}
  2325  		if v == 0 {
  2326  			if p.Mark&PRESERVEFLAGS != 0 {
  2327  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2328  				return Yu7
  2329  			}
  2330  			return Yi0
  2331  		}
  2332  		if v == 1 {
  2333  			return Yi1
  2334  		}
  2335  		if v >= 0 && v <= 127 {
  2336  			return Yu7
  2337  		}
  2338  		if v >= 0 && v <= 255 {
  2339  			return Yu8
  2340  		}
  2341  		if v >= -128 && v <= 127 {
  2342  			return Yi8
  2343  		}
  2344  		if p.Mode == 32 {
  2345  			return Yi32
  2346  		}
  2347  		l := int32(v)
  2348  		if int64(l) == v {
  2349  			return Ys32 /* can sign extend */
  2350  		}
  2351  		if v>>32 == 0 {
  2352  			return Yi32 /* unsigned */
  2353  		}
  2354  		return Yi64
  2355  
  2356  	case obj.TYPE_TEXTSIZE:
  2357  		return Ytextsize
  2358  	}
  2359  
  2360  	if a.Type != obj.TYPE_REG {
  2361  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2362  		return Yxxx
  2363  	}
  2364  
  2365  	switch a.Reg {
  2366  	case REG_AL:
  2367  		return Yal
  2368  
  2369  	case REG_AX:
  2370  		return Yax
  2371  
  2372  		/*
  2373  			case REG_SPB:
  2374  		*/
  2375  	case REG_BPB,
  2376  		REG_SIB,
  2377  		REG_DIB,
  2378  		REG_R8B,
  2379  		REG_R9B,
  2380  		REG_R10B,
  2381  		REG_R11B,
  2382  		REG_R12B,
  2383  		REG_R13B,
  2384  		REG_R14B,
  2385  		REG_R15B:
  2386  		if ctxt.Asmode != 64 {
  2387  			return Yxxx
  2388  		}
  2389  		fallthrough
  2390  
  2391  	case REG_DL,
  2392  		REG_BL,
  2393  		REG_AH,
  2394  		REG_CH,
  2395  		REG_DH,
  2396  		REG_BH:
  2397  		return Yrb
  2398  
  2399  	case REG_CL:
  2400  		return Ycl
  2401  
  2402  	case REG_CX:
  2403  		return Ycx
  2404  
  2405  	case REG_DX, REG_BX:
  2406  		return Yrx
  2407  
  2408  	case REG_R8, /* not really Yrl */
  2409  		REG_R9,
  2410  		REG_R10,
  2411  		REG_R11,
  2412  		REG_R12,
  2413  		REG_R13,
  2414  		REG_R14,
  2415  		REG_R15:
  2416  		if ctxt.Asmode != 64 {
  2417  			return Yxxx
  2418  		}
  2419  		fallthrough
  2420  
  2421  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2422  		if p.Mode == 32 {
  2423  			return Yrl32
  2424  		}
  2425  		return Yrl
  2426  
  2427  	case REG_F0 + 0:
  2428  		return Yf0
  2429  
  2430  	case REG_F0 + 1,
  2431  		REG_F0 + 2,
  2432  		REG_F0 + 3,
  2433  		REG_F0 + 4,
  2434  		REG_F0 + 5,
  2435  		REG_F0 + 6,
  2436  		REG_F0 + 7:
  2437  		return Yrf
  2438  
  2439  	case REG_M0 + 0,
  2440  		REG_M0 + 1,
  2441  		REG_M0 + 2,
  2442  		REG_M0 + 3,
  2443  		REG_M0 + 4,
  2444  		REG_M0 + 5,
  2445  		REG_M0 + 6,
  2446  		REG_M0 + 7:
  2447  		return Ymr
  2448  
  2449  	case REG_X0 + 0,
  2450  		REG_X0 + 1,
  2451  		REG_X0 + 2,
  2452  		REG_X0 + 3,
  2453  		REG_X0 + 4,
  2454  		REG_X0 + 5,
  2455  		REG_X0 + 6,
  2456  		REG_X0 + 7,
  2457  		REG_X0 + 8,
  2458  		REG_X0 + 9,
  2459  		REG_X0 + 10,
  2460  		REG_X0 + 11,
  2461  		REG_X0 + 12,
  2462  		REG_X0 + 13,
  2463  		REG_X0 + 14,
  2464  		REG_X0 + 15:
  2465  		return Yxr
  2466  
  2467  	case REG_Y0 + 0,
  2468  		REG_Y0 + 1,
  2469  		REG_Y0 + 2,
  2470  		REG_Y0 + 3,
  2471  		REG_Y0 + 4,
  2472  		REG_Y0 + 5,
  2473  		REG_Y0 + 6,
  2474  		REG_Y0 + 7,
  2475  		REG_Y0 + 8,
  2476  		REG_Y0 + 9,
  2477  		REG_Y0 + 10,
  2478  		REG_Y0 + 11,
  2479  		REG_Y0 + 12,
  2480  		REG_Y0 + 13,
  2481  		REG_Y0 + 14,
  2482  		REG_Y0 + 15:
  2483  		return Yyr
  2484  
  2485  	case REG_CS:
  2486  		return Ycs
  2487  	case REG_SS:
  2488  		return Yss
  2489  	case REG_DS:
  2490  		return Yds
  2491  	case REG_ES:
  2492  		return Yes
  2493  	case REG_FS:
  2494  		return Yfs
  2495  	case REG_GS:
  2496  		return Ygs
  2497  	case REG_TLS:
  2498  		return Ytls
  2499  
  2500  	case REG_GDTR:
  2501  		return Ygdtr
  2502  	case REG_IDTR:
  2503  		return Yidtr
  2504  	case REG_LDTR:
  2505  		return Yldtr
  2506  	case REG_MSW:
  2507  		return Ymsw
  2508  	case REG_TASK:
  2509  		return Ytask
  2510  
  2511  	case REG_CR + 0:
  2512  		return Ycr0
  2513  	case REG_CR + 1:
  2514  		return Ycr1
  2515  	case REG_CR + 2:
  2516  		return Ycr2
  2517  	case REG_CR + 3:
  2518  		return Ycr3
  2519  	case REG_CR + 4:
  2520  		return Ycr4
  2521  	case REG_CR + 5:
  2522  		return Ycr5
  2523  	case REG_CR + 6:
  2524  		return Ycr6
  2525  	case REG_CR + 7:
  2526  		return Ycr7
  2527  	case REG_CR + 8:
  2528  		return Ycr8
  2529  
  2530  	case REG_DR + 0:
  2531  		return Ydr0
  2532  	case REG_DR + 1:
  2533  		return Ydr1
  2534  	case REG_DR + 2:
  2535  		return Ydr2
  2536  	case REG_DR + 3:
  2537  		return Ydr3
  2538  	case REG_DR + 4:
  2539  		return Ydr4
  2540  	case REG_DR + 5:
  2541  		return Ydr5
  2542  	case REG_DR + 6:
  2543  		return Ydr6
  2544  	case REG_DR + 7:
  2545  		return Ydr7
  2546  
  2547  	case REG_TR + 0:
  2548  		return Ytr0
  2549  	case REG_TR + 1:
  2550  		return Ytr1
  2551  	case REG_TR + 2:
  2552  		return Ytr2
  2553  	case REG_TR + 3:
  2554  		return Ytr3
  2555  	case REG_TR + 4:
  2556  		return Ytr4
  2557  	case REG_TR + 5:
  2558  		return Ytr5
  2559  	case REG_TR + 6:
  2560  		return Ytr6
  2561  	case REG_TR + 7:
  2562  		return Ytr7
  2563  	}
  2564  
  2565  	return Yxxx
  2566  }
  2567  
  2568  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2569  	var i int
  2570  
  2571  	switch index {
  2572  	default:
  2573  		goto bad
  2574  
  2575  	case REG_NONE:
  2576  		i = 4 << 3
  2577  		goto bas
  2578  
  2579  	case REG_R8,
  2580  		REG_R9,
  2581  		REG_R10,
  2582  		REG_R11,
  2583  		REG_R12,
  2584  		REG_R13,
  2585  		REG_R14,
  2586  		REG_R15:
  2587  		if ctxt.Asmode != 64 {
  2588  			goto bad
  2589  		}
  2590  		fallthrough
  2591  
  2592  	case REG_AX,
  2593  		REG_CX,
  2594  		REG_DX,
  2595  		REG_BX,
  2596  		REG_BP,
  2597  		REG_SI,
  2598  		REG_DI:
  2599  		i = reg[index] << 3
  2600  	}
  2601  
  2602  	switch scale {
  2603  	default:
  2604  		goto bad
  2605  
  2606  	case 1:
  2607  		break
  2608  
  2609  	case 2:
  2610  		i |= 1 << 6
  2611  
  2612  	case 4:
  2613  		i |= 2 << 6
  2614  
  2615  	case 8:
  2616  		i |= 3 << 6
  2617  	}
  2618  
  2619  bas:
  2620  	switch base {
  2621  	default:
  2622  		goto bad
  2623  
  2624  	case REG_NONE: /* must be mod=00 */
  2625  		i |= 5
  2626  
  2627  	case REG_R8,
  2628  		REG_R9,
  2629  		REG_R10,
  2630  		REG_R11,
  2631  		REG_R12,
  2632  		REG_R13,
  2633  		REG_R14,
  2634  		REG_R15:
  2635  		if ctxt.Asmode != 64 {
  2636  			goto bad
  2637  		}
  2638  		fallthrough
  2639  
  2640  	case REG_AX,
  2641  		REG_CX,
  2642  		REG_DX,
  2643  		REG_BX,
  2644  		REG_SP,
  2645  		REG_BP,
  2646  		REG_SI,
  2647  		REG_DI:
  2648  		i |= reg[base]
  2649  	}
  2650  
  2651  	ctxt.AsmBuf.Put1(byte(i))
  2652  	return
  2653  
  2654  bad:
  2655  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2656  	ctxt.AsmBuf.Put1(0)
  2657  	return
  2658  }
  2659  
  2660  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2661  	var rel obj.Reloc
  2662  
  2663  	v := vaddr(ctxt, p, a, &rel)
  2664  	if rel.Siz != 0 {
  2665  		if rel.Siz != 4 {
  2666  			ctxt.Diag("bad reloc")
  2667  		}
  2668  		r := obj.Addrel(ctxt.Cursym)
  2669  		*r = rel
  2670  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2671  	}
  2672  
  2673  	ctxt.AsmBuf.PutInt32(int32(v))
  2674  }
  2675  
  2676  /*
  2677  static void
  2678  relput8(Prog *p, Addr *a)
  2679  {
  2680  	vlong v;
  2681  	Reloc rel, *r;
  2682  
  2683  	v = vaddr(ctxt, p, a, &rel);
  2684  	if(rel.siz != 0) {
  2685  		r = addrel(ctxt->cursym);
  2686  		*r = rel;
  2687  		r->siz = 8;
  2688  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2689  	}
  2690  	put8(ctxt, v);
  2691  }
  2692  */
  2693  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2694  	if r != nil {
  2695  		*r = obj.Reloc{}
  2696  	}
  2697  
  2698  	switch a.Name {
  2699  	case obj.NAME_STATIC,
  2700  		obj.NAME_GOTREF,
  2701  		obj.NAME_EXTERN:
  2702  		s := a.Sym
  2703  		if r == nil {
  2704  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2705  			log.Fatalf("reloc")
  2706  		}
  2707  
  2708  		if a.Name == obj.NAME_GOTREF {
  2709  			r.Siz = 4
  2710  			r.Type = obj.R_GOTPCREL
  2711  		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
  2712  			r.Siz = 4
  2713  			r.Type = obj.R_ADDR
  2714  		} else {
  2715  			r.Siz = 4
  2716  			r.Type = obj.R_PCREL
  2717  		}
  2718  
  2719  		r.Off = -1 // caller must fill in
  2720  		r.Sym = s
  2721  		r.Add = a.Offset
  2722  
  2723  		return 0
  2724  	}
  2725  
  2726  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2727  		if r == nil {
  2728  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2729  			log.Fatalf("reloc")
  2730  		}
  2731  
  2732  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == obj.Hdarwin {
  2733  			r.Type = obj.R_TLS_LE
  2734  			r.Siz = 4
  2735  			r.Off = -1 // caller must fill in
  2736  			r.Add = a.Offset
  2737  		}
  2738  		return 0
  2739  	}
  2740  
  2741  	return a.Offset
  2742  }
  2743  
  2744  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2745  	var base int
  2746  	var rel obj.Reloc
  2747  
  2748  	rex &= 0x40 | Rxr
  2749  	switch {
  2750  	case int64(int32(a.Offset)) == a.Offset:
  2751  		// Offset fits in sign-extended 32 bits.
  2752  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2753  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2754  		// This is allowed for assembly that wants to use 32-bit hex
  2755  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2756  	default:
  2757  		ctxt.Diag("offset too large in %s", p)
  2758  	}
  2759  	v := int32(a.Offset)
  2760  	rel.Siz = 0
  2761  
  2762  	switch a.Type {
  2763  	case obj.TYPE_ADDR:
  2764  		if a.Name == obj.NAME_NONE {
  2765  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2766  		}
  2767  		if a.Index == REG_TLS {
  2768  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2769  		}
  2770  		goto bad
  2771  
  2772  	case obj.TYPE_REG:
  2773  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2774  			goto bad
  2775  		}
  2776  		if v != 0 {
  2777  			goto bad
  2778  		}
  2779  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2780  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2781  		return
  2782  	}
  2783  
  2784  	if a.Type != obj.TYPE_MEM {
  2785  		goto bad
  2786  	}
  2787  
  2788  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2789  		base := int(a.Reg)
  2790  		switch a.Name {
  2791  		case obj.NAME_EXTERN,
  2792  			obj.NAME_GOTREF,
  2793  			obj.NAME_STATIC:
  2794  			if !isextern(a.Sym) && p.Mode == 64 {
  2795  				goto bad
  2796  			}
  2797  			if p.Mode == 32 && ctxt.Flag_shared {
  2798  				// The base register has already been set. It holds the PC
  2799  				// of this instruction returned by a PC-reading thunk.
  2800  				// See obj6.go:rewriteToPcrel.
  2801  			} else {
  2802  				base = REG_NONE
  2803  			}
  2804  			v = int32(vaddr(ctxt, p, a, &rel))
  2805  
  2806  		case obj.NAME_AUTO,
  2807  			obj.NAME_PARAM:
  2808  			base = REG_SP
  2809  		}
  2810  
  2811  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2812  		if base == REG_NONE {
  2813  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2814  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2815  			goto putrelv
  2816  		}
  2817  
  2818  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2819  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2820  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2821  			return
  2822  		}
  2823  
  2824  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2825  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2826  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2827  			ctxt.AsmBuf.Put1(byte(v))
  2828  			return
  2829  		}
  2830  
  2831  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2832  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2833  		goto putrelv
  2834  	}
  2835  
  2836  	base = int(a.Reg)
  2837  	switch a.Name {
  2838  	case obj.NAME_STATIC,
  2839  		obj.NAME_GOTREF,
  2840  		obj.NAME_EXTERN:
  2841  		if a.Sym == nil {
  2842  			ctxt.Diag("bad addr: %v", p)
  2843  		}
  2844  		if p.Mode == 32 && ctxt.Flag_shared {
  2845  			// The base register has already been set. It holds the PC
  2846  			// of this instruction returned by a PC-reading thunk.
  2847  			// See obj6.go:rewriteToPcrel.
  2848  		} else {
  2849  			base = REG_NONE
  2850  		}
  2851  		v = int32(vaddr(ctxt, p, a, &rel))
  2852  
  2853  	case obj.NAME_AUTO,
  2854  		obj.NAME_PARAM:
  2855  		base = REG_SP
  2856  	}
  2857  
  2858  	if base == REG_TLS {
  2859  		v = int32(vaddr(ctxt, p, a, &rel))
  2860  	}
  2861  
  2862  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2863  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2864  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2865  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2866  				ctxt.Diag("%v has offset against gotref", p)
  2867  			}
  2868  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2869  			goto putrelv
  2870  		}
  2871  
  2872  		// temporary
  2873  		ctxt.AsmBuf.Put2(
  2874  			byte(0<<6|4<<0|r<<3), // sib present
  2875  			0<<6|4<<3|5<<0,       // DS:d32
  2876  		)
  2877  		goto putrelv
  2878  	}
  2879  
  2880  	if base == REG_SP || base == REG_R12 {
  2881  		if v == 0 {
  2882  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2883  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2884  			return
  2885  		}
  2886  
  2887  		if v >= -128 && v < 128 {
  2888  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2889  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2890  			ctxt.AsmBuf.Put1(byte(v))
  2891  			return
  2892  		}
  2893  
  2894  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2895  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2896  		goto putrelv
  2897  	}
  2898  
  2899  	if REG_AX <= base && base <= REG_R15 {
  2900  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2901  			rel = obj.Reloc{}
  2902  			rel.Type = obj.R_TLS_LE
  2903  			rel.Siz = 4
  2904  			rel.Sym = nil
  2905  			rel.Add = int64(v)
  2906  			v = 0
  2907  		}
  2908  
  2909  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2910  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2911  			return
  2912  		}
  2913  
  2914  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2915  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2916  			return
  2917  		}
  2918  
  2919  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2920  		goto putrelv
  2921  	}
  2922  
  2923  	goto bad
  2924  
  2925  putrelv:
  2926  	if rel.Siz != 0 {
  2927  		if rel.Siz != 4 {
  2928  			ctxt.Diag("bad rel")
  2929  			goto bad
  2930  		}
  2931  
  2932  		r := obj.Addrel(ctxt.Cursym)
  2933  		*r = rel
  2934  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2935  	}
  2936  
  2937  	ctxt.AsmBuf.PutInt32(v)
  2938  	return
  2939  
  2940  bad:
  2941  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2942  	return
  2943  }
  2944  
  2945  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2946  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2947  }
  2948  
  2949  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2950  	asmandsz(ctxt, p, a, o, 0, 0)
  2951  }
  2952  
  2953  func bytereg(a *obj.Addr, t *uint8) {
  2954  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2955  		a.Reg += REG_AL - REG_AX
  2956  		*t = 0
  2957  	}
  2958  }
  2959  
  2960  func unbytereg(a *obj.Addr, t *uint8) {
  2961  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2962  		a.Reg += REG_AX - REG_AL
  2963  		*t = 0
  2964  	}
  2965  }
  2966  
  2967  const (
  2968  	E = 0xff
  2969  )
  2970  
  2971  var ymovtab = []Movtab{
  2972  	/* push */
  2973  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2974  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2975  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2976  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2977  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2978  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2979  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2980  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2981  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2982  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2983  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2984  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2985  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2986  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2987  
  2988  	/* pop */
  2989  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2990  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2991  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2992  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2993  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2994  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2995  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2996  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2997  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2998  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2999  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3000  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3001  
  3002  	/* mov seg */
  3003  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3004  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3005  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3006  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3007  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3008  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3009  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3010  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3011  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3012  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3013  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3014  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3015  
  3016  	/* mov cr */
  3017  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3018  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3019  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3020  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3021  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3022  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3023  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3024  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3025  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3026  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3027  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3028  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3029  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3030  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3031  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3032  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3033  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3034  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3035  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3036  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3037  
  3038  	/* mov dr */
  3039  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3040  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3041  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3042  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3043  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3044  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3045  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3046  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3047  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3048  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3049  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3050  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3051  
  3052  	/* mov tr */
  3053  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3054  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3055  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3056  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3057  
  3058  	/* lgdt, sgdt, lidt, sidt */
  3059  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3060  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3061  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3062  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3063  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3064  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3065  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3066  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3067  
  3068  	/* lldt, sldt */
  3069  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3070  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3071  
  3072  	/* lmsw, smsw */
  3073  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3074  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3075  
  3076  	/* ltr, str */
  3077  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3078  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3079  
  3080  	/* load full pointer - unsupported
  3081  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3082  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3083  	*/
  3084  
  3085  	/* double shift */
  3086  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3087  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3088  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3089  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3090  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3091  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3092  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3093  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3094  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3095  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3096  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3097  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3098  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3099  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3100  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3101  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3102  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3103  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3104  
  3105  	/* load TLS base */
  3106  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3107  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3108  	{0, 0, 0, 0, 0, [4]uint8{}},
  3109  }
  3110  
  3111  func isax(a *obj.Addr) bool {
  3112  	switch a.Reg {
  3113  	case REG_AX, REG_AL, REG_AH:
  3114  		return true
  3115  	}
  3116  
  3117  	if a.Index == REG_AX {
  3118  		return true
  3119  	}
  3120  	return false
  3121  }
  3122  
  3123  func subreg(p *obj.Prog, from int, to int) {
  3124  	if false { /* debug['Q'] */
  3125  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3126  	}
  3127  
  3128  	if int(p.From.Reg) == from {
  3129  		p.From.Reg = int16(to)
  3130  		p.Ft = 0
  3131  	}
  3132  
  3133  	if int(p.To.Reg) == from {
  3134  		p.To.Reg = int16(to)
  3135  		p.Tt = 0
  3136  	}
  3137  
  3138  	if int(p.From.Index) == from {
  3139  		p.From.Index = int16(to)
  3140  		p.Ft = 0
  3141  	}
  3142  
  3143  	if int(p.To.Index) == from {
  3144  		p.To.Index = int16(to)
  3145  		p.Tt = 0
  3146  	}
  3147  
  3148  	if false { /* debug['Q'] */
  3149  		fmt.Printf("%v\n", p)
  3150  	}
  3151  }
  3152  
  3153  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3154  	switch op {
  3155  	case Pm, Pe, Pf2, Pf3:
  3156  		if osize != 1 {
  3157  			if op != Pm {
  3158  				ctxt.AsmBuf.Put1(byte(op))
  3159  			}
  3160  			ctxt.AsmBuf.Put1(Pm)
  3161  			z++
  3162  			op = int(o.op[z])
  3163  			break
  3164  		}
  3165  		fallthrough
  3166  
  3167  	default:
  3168  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3169  			ctxt.AsmBuf.Put1(Pm)
  3170  		}
  3171  	}
  3172  
  3173  	ctxt.AsmBuf.Put1(byte(op))
  3174  	return z
  3175  }
  3176  
  3177  var bpduff1 = []byte{
  3178  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3179  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3180  }
  3181  
  3182  var bpduff2 = []byte{
  3183  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3184  }
  3185  
  3186  // Emit VEX prefix and opcode byte.
  3187  // The three addresses are the r/m, vvvv, and reg fields.
  3188  // The reg and rm arguments appear in the same order as the
  3189  // arguments to asmand, which typically follows the call to asmvex.
  3190  // The final two arguments are the VEX prefix (see encoding above)
  3191  // and the opcode byte.
  3192  // For details about vex prefix see:
  3193  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3194  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3195  	ctxt.Vexflag = 1
  3196  	rexR := 0
  3197  	if r != nil {
  3198  		rexR = regrex[r.Reg] & Rxr
  3199  	}
  3200  	rexB := 0
  3201  	rexX := 0
  3202  	if rm != nil {
  3203  		rexB = regrex[rm.Reg] & Rxb
  3204  		rexX = regrex[rm.Index] & Rxx
  3205  	}
  3206  	vexM := (vex >> 3) & 0xF
  3207  	vexWLP := vex & 0x87
  3208  	vexV := byte(0)
  3209  	if v != nil {
  3210  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3211  	}
  3212  	vexV ^= 0xF
  3213  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3214  		// Can use 2-byte encoding.
  3215  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3216  	} else {
  3217  		// Must use 3-byte encoding.
  3218  		ctxt.AsmBuf.Put3(0xc4,
  3219  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3220  			vexV<<3|vexWLP,
  3221  		)
  3222  	}
  3223  	ctxt.AsmBuf.Put1(opcode)
  3224  }
  3225  
  3226  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3227  	ctxt.Curp = p // TODO
  3228  
  3229  	o := opindex[p.As&obj.AMask]
  3230  
  3231  	if o == nil {
  3232  		ctxt.Diag("asmins: missing op %v", p)
  3233  		return
  3234  	}
  3235  
  3236  	pre := prefixof(ctxt, p, &p.From)
  3237  	if pre != 0 {
  3238  		ctxt.AsmBuf.Put1(byte(pre))
  3239  	}
  3240  	pre = prefixof(ctxt, p, &p.To)
  3241  	if pre != 0 {
  3242  		ctxt.AsmBuf.Put1(byte(pre))
  3243  	}
  3244  
  3245  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3246  	// which encodes as SHRQ $32(DX*0), AX.
  3247  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3248  	// Change encoding generated by assemblers and compilers and remove.
  3249  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3250  		p.From3 = new(obj.Addr)
  3251  		p.From3.Type = obj.TYPE_REG
  3252  		p.From3.Reg = p.From.Index
  3253  		p.From.Index = 0
  3254  	}
  3255  
  3256  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3257  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3258  	switch p.As {
  3259  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3260  		if p.From3Type() == obj.TYPE_NONE {
  3261  			p.From3 = new(obj.Addr)
  3262  			*p.From3 = p.From
  3263  			p.From = obj.Addr{}
  3264  			p.From.Type = obj.TYPE_CONST
  3265  			p.From.Offset = p.To.Offset
  3266  			p.To.Offset = 0
  3267  		}
  3268  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3269  		if p.From3Type() == obj.TYPE_NONE {
  3270  			p.From3 = new(obj.Addr)
  3271  			*p.From3 = p.To
  3272  			p.To = obj.Addr{}
  3273  			p.To.Type = obj.TYPE_CONST
  3274  			p.To.Offset = p.From3.Offset
  3275  			p.From3.Offset = 0
  3276  		}
  3277  	}
  3278  
  3279  	if p.Ft == 0 {
  3280  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3281  	}
  3282  	if p.Tt == 0 {
  3283  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3284  	}
  3285  
  3286  	ft := int(p.Ft) * Ymax
  3287  	f3t := Ynone * Ymax
  3288  	if p.From3 != nil {
  3289  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3290  	}
  3291  	tt := int(p.Tt) * Ymax
  3292  
  3293  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3294  	z := 0
  3295  	var a *obj.Addr
  3296  	var l int
  3297  	var op int
  3298  	var q *obj.Prog
  3299  	var r *obj.Reloc
  3300  	var rel obj.Reloc
  3301  	var v int64
  3302  	for i := range o.ytab {
  3303  		yt := &o.ytab[i]
  3304  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3305  			switch o.prefix {
  3306  			case Px1: /* first option valid only in 32-bit mode */
  3307  				if ctxt.Mode == 64 && z == 0 {
  3308  					z += int(yt.zoffset) + xo
  3309  					continue
  3310  				}
  3311  			case Pq: /* 16 bit escape and opcode escape */
  3312  				ctxt.AsmBuf.Put2(Pe, Pm)
  3313  
  3314  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3315  				ctxt.Rexflag |= Pw
  3316  				ctxt.AsmBuf.Put2(Pe, Pm)
  3317  
  3318  			case Pq4: /*  66 0F 38 */
  3319  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3320  
  3321  			case Pf2, /* xmm opcode escape */
  3322  				Pf3:
  3323  				ctxt.AsmBuf.Put2(o.prefix, Pm)
  3324  
  3325  			case Pef3:
  3326  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3327  
  3328  			case Pfw: /* xmm opcode escape + REX.W */
  3329  				ctxt.Rexflag |= Pw
  3330  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3331  
  3332  			case Pm: /* opcode escape */
  3333  				ctxt.AsmBuf.Put1(Pm)
  3334  
  3335  			case Pe: /* 16 bit escape */
  3336  				ctxt.AsmBuf.Put1(Pe)
  3337  
  3338  			case Pw: /* 64-bit escape */
  3339  				if p.Mode != 64 {
  3340  					ctxt.Diag("asmins: illegal 64: %v", p)
  3341  				}
  3342  				ctxt.Rexflag |= Pw
  3343  
  3344  			case Pw8: /* 64-bit escape if z >= 8 */
  3345  				if z >= 8 {
  3346  					if p.Mode != 64 {
  3347  						ctxt.Diag("asmins: illegal 64: %v", p)
  3348  					}
  3349  					ctxt.Rexflag |= Pw
  3350  				}
  3351  
  3352  			case Pb: /* botch */
  3353  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3354  					goto bad
  3355  				}
  3356  				// NOTE(rsc): This is probably safe to do always,
  3357  				// but when enabled it chooses different encodings
  3358  				// than the old cmd/internal/obj/i386 code did,
  3359  				// which breaks our "same bits out" checks.
  3360  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3361  				// in the original obj/i386, and it would encode
  3362  				// (using a valid, shorter form) as 3c 00 if we enabled
  3363  				// the call to bytereg here.
  3364  				if p.Mode == 64 {
  3365  					bytereg(&p.From, &p.Ft)
  3366  					bytereg(&p.To, &p.Tt)
  3367  				}
  3368  
  3369  			case P32: /* 32 bit but illegal if 64-bit mode */
  3370  				if p.Mode == 64 {
  3371  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3372  				}
  3373  
  3374  			case Py: /* 64-bit only, no prefix */
  3375  				if p.Mode != 64 {
  3376  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3377  				}
  3378  
  3379  			case Py1: /* 64-bit only if z < 1, no prefix */
  3380  				if z < 1 && p.Mode != 64 {
  3381  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3382  				}
  3383  
  3384  			case Py3: /* 64-bit only if z < 3, no prefix */
  3385  				if z < 3 && p.Mode != 64 {
  3386  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3387  				}
  3388  			}
  3389  
  3390  			if z >= len(o.op) {
  3391  				log.Fatalf("asmins bad table %v", p)
  3392  			}
  3393  			op = int(o.op[z])
  3394  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3395  			if op == 0x0f && o.prefix != Pvex {
  3396  				ctxt.AsmBuf.Put1(byte(op))
  3397  				z++
  3398  				op = int(o.op[z])
  3399  			}
  3400  
  3401  			switch yt.zcase {
  3402  			default:
  3403  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3404  				return
  3405  
  3406  			case Zpseudo:
  3407  				break
  3408  
  3409  			case Zlit:
  3410  				for ; ; z++ {
  3411  					op = int(o.op[z])
  3412  					if op == 0 {
  3413  						break
  3414  					}
  3415  					ctxt.AsmBuf.Put1(byte(op))
  3416  				}
  3417  
  3418  			case Zlitm_r:
  3419  				for ; ; z++ {
  3420  					op = int(o.op[z])
  3421  					if op == 0 {
  3422  						break
  3423  					}
  3424  					ctxt.AsmBuf.Put1(byte(op))
  3425  				}
  3426  				asmand(ctxt, p, &p.From, &p.To)
  3427  
  3428  			case Zmb_r:
  3429  				bytereg(&p.From, &p.Ft)
  3430  				fallthrough
  3431  
  3432  			case Zm_r:
  3433  				ctxt.AsmBuf.Put1(byte(op))
  3434  				asmand(ctxt, p, &p.From, &p.To)
  3435  
  3436  			case Zm2_r:
  3437  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3438  				asmand(ctxt, p, &p.From, &p.To)
  3439  
  3440  			case Zm_r_xm:
  3441  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3442  				asmand(ctxt, p, &p.From, &p.To)
  3443  
  3444  			case Zm_r_xm_nr:
  3445  				ctxt.Rexflag = 0
  3446  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3447  				asmand(ctxt, p, &p.From, &p.To)
  3448  
  3449  			case Zm_r_i_xm:
  3450  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3451  				asmand(ctxt, p, &p.From, p.From3)
  3452  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3453  
  3454  			case Zibm_r, Zibr_m:
  3455  				for {
  3456  					tmp1 := z
  3457  					z++
  3458  					op = int(o.op[tmp1])
  3459  					if op == 0 {
  3460  						break
  3461  					}
  3462  					ctxt.AsmBuf.Put1(byte(op))
  3463  				}
  3464  				if yt.zcase == Zibr_m {
  3465  					asmand(ctxt, p, &p.To, p.From3)
  3466  				} else {
  3467  					asmand(ctxt, p, p.From3, &p.To)
  3468  				}
  3469  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3470  
  3471  			case Zaut_r:
  3472  				ctxt.AsmBuf.Put1(0x8d) // leal
  3473  				if p.From.Type != obj.TYPE_ADDR {
  3474  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3475  				}
  3476  				p.From.Type = obj.TYPE_MEM
  3477  				asmand(ctxt, p, &p.From, &p.To)
  3478  				p.From.Type = obj.TYPE_ADDR
  3479  
  3480  			case Zm_o:
  3481  				ctxt.AsmBuf.Put1(byte(op))
  3482  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3483  
  3484  			case Zr_m:
  3485  				ctxt.AsmBuf.Put1(byte(op))
  3486  				asmand(ctxt, p, &p.To, &p.From)
  3487  
  3488  			case Zvex_rm_v_r:
  3489  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3490  				asmand(ctxt, p, &p.From, &p.To)
  3491  
  3492  			case Zvex_i_r_v:
  3493  				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3494  				regnum := byte(0x7)
  3495  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3496  					regnum &= byte(p.From3.Reg - REG_X0)
  3497  				} else {
  3498  					regnum &= byte(p.From3.Reg - REG_Y0)
  3499  				}
  3500  				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
  3501  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3502  
  3503  			case Zvex_i_rm_v_r:
  3504  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3505  				asmand(ctxt, p, &p.From, &p.To)
  3506  				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
  3507  
  3508  			case Zvex_i_rm_r:
  3509  				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3510  				asmand(ctxt, p, p.From3, &p.To)
  3511  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3512  
  3513  			case Zvex_v_rm_r:
  3514  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3515  				asmand(ctxt, p, p.From3, &p.To)
  3516  
  3517  			case Zvex_r_v_rm:
  3518  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3519  				asmand(ctxt, p, &p.To, &p.From)
  3520  
  3521  			case Zr_m_xm:
  3522  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3523  				asmand(ctxt, p, &p.To, &p.From)
  3524  
  3525  			case Zr_m_xm_nr:
  3526  				ctxt.Rexflag = 0
  3527  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3528  				asmand(ctxt, p, &p.To, &p.From)
  3529  
  3530  			case Zo_m:
  3531  				ctxt.AsmBuf.Put1(byte(op))
  3532  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3533  
  3534  			case Zcallindreg:
  3535  				r = obj.Addrel(ctxt.Cursym)
  3536  				r.Off = int32(p.Pc)
  3537  				r.Type = obj.R_CALLIND
  3538  				r.Siz = 0
  3539  				fallthrough
  3540  
  3541  			case Zo_m64:
  3542  				ctxt.AsmBuf.Put1(byte(op))
  3543  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3544  
  3545  			case Zm_ibo:
  3546  				ctxt.AsmBuf.Put1(byte(op))
  3547  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3548  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3549  
  3550  			case Zibo_m:
  3551  				ctxt.AsmBuf.Put1(byte(op))
  3552  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3553  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3554  
  3555  			case Zibo_m_xm:
  3556  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3557  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3558  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3559  
  3560  			case Z_ib, Zib_:
  3561  				if yt.zcase == Zib_ {
  3562  					a = &p.From
  3563  				} else {
  3564  					a = &p.To
  3565  				}
  3566  				ctxt.AsmBuf.Put1(byte(op))
  3567  				if p.As == AXABORT {
  3568  					ctxt.AsmBuf.Put1(o.op[z+1])
  3569  				}
  3570  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3571  
  3572  			case Zib_rp:
  3573  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3574  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3575  
  3576  			case Zil_rp:
  3577  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3578  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3579  				if o.prefix == Pe {
  3580  					v = vaddr(ctxt, p, &p.From, nil)
  3581  					ctxt.AsmBuf.PutInt16(int16(v))
  3582  				} else {
  3583  					relput4(ctxt, p, &p.From)
  3584  				}
  3585  
  3586  			case Zo_iw:
  3587  				ctxt.AsmBuf.Put1(byte(op))
  3588  				if p.From.Type != obj.TYPE_NONE {
  3589  					v = vaddr(ctxt, p, &p.From, nil)
  3590  					ctxt.AsmBuf.PutInt16(int16(v))
  3591  				}
  3592  
  3593  			case Ziq_rp:
  3594  				v = vaddr(ctxt, p, &p.From, &rel)
  3595  				l = int(v >> 32)
  3596  				if l == 0 && rel.Siz != 8 {
  3597  					//p->mark |= 0100;
  3598  					//print("zero: %llux %v\n", v, p);
  3599  					ctxt.Rexflag &^= (0x40 | Rxw)
  3600  
  3601  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3602  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3603  					if rel.Type != 0 {
  3604  						r = obj.Addrel(ctxt.Cursym)
  3605  						*r = rel
  3606  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3607  					}
  3608  
  3609  					ctxt.AsmBuf.PutInt32(int32(v))
  3610  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3611  
  3612  					//p->mark |= 0100;
  3613  					//print("sign: %llux %v\n", v, p);
  3614  					ctxt.AsmBuf.Put1(0xc7)
  3615  					asmando(ctxt, p, &p.To, 0)
  3616  
  3617  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3618  				} else {
  3619  					//print("all: %llux %v\n", v, p);
  3620  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3621  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3622  					if rel.Type != 0 {
  3623  						r = obj.Addrel(ctxt.Cursym)
  3624  						*r = rel
  3625  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3626  					}
  3627  
  3628  					ctxt.AsmBuf.PutInt64(v)
  3629  				}
  3630  
  3631  			case Zib_rr:
  3632  				ctxt.AsmBuf.Put1(byte(op))
  3633  				asmand(ctxt, p, &p.To, &p.To)
  3634  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3635  
  3636  			case Z_il, Zil_:
  3637  				if yt.zcase == Zil_ {
  3638  					a = &p.From
  3639  				} else {
  3640  					a = &p.To
  3641  				}
  3642  				ctxt.AsmBuf.Put1(byte(op))
  3643  				if o.prefix == Pe {
  3644  					v = vaddr(ctxt, p, a, nil)
  3645  					ctxt.AsmBuf.PutInt16(int16(v))
  3646  				} else {
  3647  					relput4(ctxt, p, a)
  3648  				}
  3649  
  3650  			case Zm_ilo, Zilo_m:
  3651  				ctxt.AsmBuf.Put1(byte(op))
  3652  				if yt.zcase == Zilo_m {
  3653  					a = &p.From
  3654  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3655  				} else {
  3656  					a = &p.To
  3657  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3658  				}
  3659  
  3660  				if o.prefix == Pe {
  3661  					v = vaddr(ctxt, p, a, nil)
  3662  					ctxt.AsmBuf.PutInt16(int16(v))
  3663  				} else {
  3664  					relput4(ctxt, p, a)
  3665  				}
  3666  
  3667  			case Zil_rr:
  3668  				ctxt.AsmBuf.Put1(byte(op))
  3669  				asmand(ctxt, p, &p.To, &p.To)
  3670  				if o.prefix == Pe {
  3671  					v = vaddr(ctxt, p, &p.From, nil)
  3672  					ctxt.AsmBuf.PutInt16(int16(v))
  3673  				} else {
  3674  					relput4(ctxt, p, &p.From)
  3675  				}
  3676  
  3677  			case Z_rp:
  3678  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3679  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3680  
  3681  			case Zrp_:
  3682  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3683  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3684  
  3685  			case Zclr:
  3686  				ctxt.Rexflag &^= Pw
  3687  				ctxt.AsmBuf.Put1(byte(op))
  3688  				asmand(ctxt, p, &p.To, &p.To)
  3689  
  3690  			case Zcallcon, Zjmpcon:
  3691  				if yt.zcase == Zcallcon {
  3692  					ctxt.AsmBuf.Put1(byte(op))
  3693  				} else {
  3694  					ctxt.AsmBuf.Put1(o.op[z+1])
  3695  				}
  3696  				r = obj.Addrel(ctxt.Cursym)
  3697  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3698  				r.Type = obj.R_PCREL
  3699  				r.Siz = 4
  3700  				r.Add = p.To.Offset
  3701  				ctxt.AsmBuf.PutInt32(0)
  3702  
  3703  			case Zcallind:
  3704  				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
  3705  				r = obj.Addrel(ctxt.Cursym)
  3706  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3707  				if p.Mode == 64 {
  3708  					r.Type = obj.R_PCREL
  3709  				} else {
  3710  					r.Type = obj.R_ADDR
  3711  				}
  3712  				r.Siz = 4
  3713  				r.Add = p.To.Offset
  3714  				r.Sym = p.To.Sym
  3715  				ctxt.AsmBuf.PutInt32(0)
  3716  
  3717  			case Zcall, Zcallduff:
  3718  				if p.To.Sym == nil {
  3719  					ctxt.Diag("call without target")
  3720  					log.Fatalf("bad code")
  3721  				}
  3722  
  3723  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3724  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3725  				}
  3726  
  3727  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3728  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3729  					// (the call jumps into the middle of the function).
  3730  					// This makes it possible to see call sites for duffcopy/duffzero in
  3731  					// BP-based profiling tools like Linux perf (which is the
  3732  					// whole point of obj.Framepointer_enabled).
  3733  					// MOVQ BP, -16(SP)
  3734  					// LEAQ -16(SP), BP
  3735  					ctxt.AsmBuf.Put(bpduff1)
  3736  				}
  3737  				ctxt.AsmBuf.Put1(byte(op))
  3738  				r = obj.Addrel(ctxt.Cursym)
  3739  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3740  				r.Sym = p.To.Sym
  3741  				r.Add = p.To.Offset
  3742  				r.Type = obj.R_CALL
  3743  				r.Siz = 4
  3744  				ctxt.AsmBuf.PutInt32(0)
  3745  
  3746  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
  3747  					// Pop BP pushed above.
  3748  					// MOVQ 0(BP), BP
  3749  					ctxt.AsmBuf.Put(bpduff2)
  3750  				}
  3751  
  3752  			// TODO: jump across functions needs reloc
  3753  			case Zbr, Zjmp, Zloop:
  3754  				if p.As == AXBEGIN {
  3755  					ctxt.AsmBuf.Put1(byte(op))
  3756  				}
  3757  				if p.To.Sym != nil {
  3758  					if yt.zcase != Zjmp {
  3759  						ctxt.Diag("branch to ATEXT")
  3760  						log.Fatalf("bad code")
  3761  					}
  3762  
  3763  					ctxt.AsmBuf.Put1(o.op[z+1])
  3764  					r = obj.Addrel(ctxt.Cursym)
  3765  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3766  					r.Sym = p.To.Sym
  3767  					r.Type = obj.R_PCREL
  3768  					r.Siz = 4
  3769  					ctxt.AsmBuf.PutInt32(0)
  3770  					break
  3771  				}
  3772  
  3773  				// Assumes q is in this function.
  3774  				// TODO: Check in input, preserve in brchain.
  3775  
  3776  				// Fill in backward jump now.
  3777  				q = p.Pcond
  3778  
  3779  				if q == nil {
  3780  					ctxt.Diag("jmp/branch/loop without target")
  3781  					log.Fatalf("bad code")
  3782  				}
  3783  
  3784  				if p.Back&1 != 0 {
  3785  					v = q.Pc - (p.Pc + 2)
  3786  					if v >= -128 && p.As != AXBEGIN {
  3787  						if p.As == AJCXZL {
  3788  							ctxt.AsmBuf.Put1(0x67)
  3789  						}
  3790  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3791  					} else if yt.zcase == Zloop {
  3792  						ctxt.Diag("loop too far: %v", p)
  3793  					} else {
  3794  						v -= 5 - 2
  3795  						if p.As == AXBEGIN {
  3796  							v--
  3797  						}
  3798  						if yt.zcase == Zbr {
  3799  							ctxt.AsmBuf.Put1(0x0f)
  3800  							v--
  3801  						}
  3802  
  3803  						ctxt.AsmBuf.Put1(o.op[z+1])
  3804  						ctxt.AsmBuf.PutInt32(int32(v))
  3805  					}
  3806  
  3807  					break
  3808  				}
  3809  
  3810  				// Annotate target; will fill in later.
  3811  				p.Forwd = q.Rel
  3812  
  3813  				q.Rel = p
  3814  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3815  					if p.As == AJCXZL {
  3816  						ctxt.AsmBuf.Put1(0x67)
  3817  					}
  3818  					ctxt.AsmBuf.Put2(byte(op), 0)
  3819  				} else if yt.zcase == Zloop {
  3820  					ctxt.Diag("loop too far: %v", p)
  3821  				} else {
  3822  					if yt.zcase == Zbr {
  3823  						ctxt.AsmBuf.Put1(0x0f)
  3824  					}
  3825  					ctxt.AsmBuf.Put1(o.op[z+1])
  3826  					ctxt.AsmBuf.PutInt32(0)
  3827  				}
  3828  
  3829  				break
  3830  
  3831  			/*
  3832  				v = q->pc - p->pc - 2;
  3833  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3834  					*ctxt->andptr++ = op;
  3835  					*ctxt->andptr++ = v;
  3836  				} else {
  3837  					v -= 5-2;
  3838  					if(yt.zcase == Zbr) {
  3839  						*ctxt->andptr++ = 0x0f;
  3840  						v--;
  3841  					}
  3842  					*ctxt->andptr++ = o->op[z+1];
  3843  					*ctxt->andptr++ = v;
  3844  					*ctxt->andptr++ = v>>8;
  3845  					*ctxt->andptr++ = v>>16;
  3846  					*ctxt->andptr++ = v>>24;
  3847  				}
  3848  			*/
  3849  
  3850  			case Zbyte:
  3851  				v = vaddr(ctxt, p, &p.From, &rel)
  3852  				if rel.Siz != 0 {
  3853  					rel.Siz = uint8(op)
  3854  					r = obj.Addrel(ctxt.Cursym)
  3855  					*r = rel
  3856  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3857  				}
  3858  
  3859  				ctxt.AsmBuf.Put1(byte(v))
  3860  				if op > 1 {
  3861  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3862  					if op > 2 {
  3863  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3864  						if op > 4 {
  3865  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3866  						}
  3867  					}
  3868  				}
  3869  			}
  3870  
  3871  			return
  3872  		}
  3873  		z += int(yt.zoffset) + xo
  3874  	}
  3875  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3876  		var pp obj.Prog
  3877  		var t []byte
  3878  		if p.As == mo[0].as {
  3879  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3880  				t = mo[0].op[:]
  3881  				switch mo[0].code {
  3882  				default:
  3883  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3884  
  3885  				case 0: /* lit */
  3886  					for z = 0; t[z] != E; z++ {
  3887  						ctxt.AsmBuf.Put1(t[z])
  3888  					}
  3889  
  3890  				case 1: /* r,m */
  3891  					ctxt.AsmBuf.Put1(t[0])
  3892  					asmando(ctxt, p, &p.To, int(t[1]))
  3893  
  3894  				case 2: /* m,r */
  3895  					ctxt.AsmBuf.Put1(t[0])
  3896  					asmando(ctxt, p, &p.From, int(t[1]))
  3897  
  3898  				case 3: /* r,m - 2op */
  3899  					ctxt.AsmBuf.Put2(t[0], t[1])
  3900  					asmando(ctxt, p, &p.To, int(t[2]))
  3901  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3902  
  3903  				case 4: /* m,r - 2op */
  3904  					ctxt.AsmBuf.Put2(t[0], t[1])
  3905  					asmando(ctxt, p, &p.From, int(t[2]))
  3906  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3907  
  3908  				case 5: /* load full pointer, trash heap */
  3909  					if t[0] != 0 {
  3910  						ctxt.AsmBuf.Put1(t[0])
  3911  					}
  3912  					switch p.To.Index {
  3913  					default:
  3914  						goto bad
  3915  
  3916  					case REG_DS:
  3917  						ctxt.AsmBuf.Put1(0xc5)
  3918  
  3919  					case REG_SS:
  3920  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3921  
  3922  					case REG_ES:
  3923  						ctxt.AsmBuf.Put1(0xc4)
  3924  
  3925  					case REG_FS:
  3926  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3927  
  3928  					case REG_GS:
  3929  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3930  					}
  3931  
  3932  					asmand(ctxt, p, &p.From, &p.To)
  3933  
  3934  				case 6: /* double shift */
  3935  					if t[0] == Pw {
  3936  						if p.Mode != 64 {
  3937  							ctxt.Diag("asmins: illegal 64: %v", p)
  3938  						}
  3939  						ctxt.Rexflag |= Pw
  3940  						t = t[1:]
  3941  					} else if t[0] == Pe {
  3942  						ctxt.AsmBuf.Put1(Pe)
  3943  						t = t[1:]
  3944  					}
  3945  
  3946  					switch p.From.Type {
  3947  					default:
  3948  						goto bad
  3949  
  3950  					case obj.TYPE_CONST:
  3951  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3952  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3953  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3954  
  3955  					case obj.TYPE_REG:
  3956  						switch p.From.Reg {
  3957  						default:
  3958  							goto bad
  3959  
  3960  						case REG_CL, REG_CX:
  3961  							ctxt.AsmBuf.Put2(0x0f, t[1])
  3962  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3963  						}
  3964  					}
  3965  
  3966  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3967  				// where you load the TLS base register into a register and then index off that
  3968  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3969  				// are handled in prefixof above and should not be listed here.
  3970  				case 7: /* mov tls, r */
  3971  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3972  						ctxt.Diag("invalid load of TLS: %v", p)
  3973  					}
  3974  
  3975  					if p.Mode == 32 {
  3976  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3977  						// where you load the TLS base register into a register and then index off that
  3978  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3979  						// are handled in prefixof above and should not be listed here.
  3980  						switch ctxt.Headtype {
  3981  						default:
  3982  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  3983  
  3984  						case obj.Hlinux,
  3985  							obj.Hnacl:
  3986  							if ctxt.Flag_shared {
  3987  								// Note that this is not generating the same insns as the other cases.
  3988  								//     MOV TLS, dst
  3989  								// becomes
  3990  								//     call __x86.get_pc_thunk.dst
  3991  								//     movl (gotpc + g@gotntpoff)(dst), dst
  3992  								// which is encoded as
  3993  								//     call __x86.get_pc_thunk.dst
  3994  								//     movq 0(dst), dst
  3995  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  3996  								// is g, which we can't check here, but will when we assemble the second
  3997  								// instruction.
  3998  								dst := p.To.Reg
  3999  								ctxt.AsmBuf.Put1(0xe8)
  4000  								r = obj.Addrel(ctxt.Cursym)
  4001  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4002  								r.Type = obj.R_CALL
  4003  								r.Siz = 4
  4004  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
  4005  								ctxt.AsmBuf.PutInt32(0)
  4006  
  4007  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4008  								r = obj.Addrel(ctxt.Cursym)
  4009  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4010  								r.Type = obj.R_TLS_IE
  4011  								r.Siz = 4
  4012  								r.Add = 2
  4013  								ctxt.AsmBuf.PutInt32(0)
  4014  							} else {
  4015  								// ELF TLS base is 0(GS).
  4016  								pp.From = p.From
  4017  
  4018  								pp.From.Type = obj.TYPE_MEM
  4019  								pp.From.Reg = REG_GS
  4020  								pp.From.Offset = 0
  4021  								pp.From.Index = REG_NONE
  4022  								pp.From.Scale = 0
  4023  								ctxt.AsmBuf.Put2(0x65, // GS
  4024  									0x8B)
  4025  								asmand(ctxt, p, &pp.From, &p.To)
  4026  							}
  4027  						case obj.Hplan9:
  4028  							if ctxt.Plan9privates == nil {
  4029  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4030  							}
  4031  							pp.From = obj.Addr{}
  4032  							pp.From.Type = obj.TYPE_MEM
  4033  							pp.From.Name = obj.NAME_EXTERN
  4034  							pp.From.Sym = ctxt.Plan9privates
  4035  							pp.From.Offset = 0
  4036  							pp.From.Index = REG_NONE
  4037  							ctxt.AsmBuf.Put1(0x8B)
  4038  							asmand(ctxt, p, &pp.From, &p.To)
  4039  
  4040  						case obj.Hwindows, obj.Hwindowsgui:
  4041  							// Windows TLS base is always 0x14(FS).
  4042  							pp.From = p.From
  4043  
  4044  							pp.From.Type = obj.TYPE_MEM
  4045  							pp.From.Reg = REG_FS
  4046  							pp.From.Offset = 0x14
  4047  							pp.From.Index = REG_NONE
  4048  							pp.From.Scale = 0
  4049  							ctxt.AsmBuf.Put2(0x64, // FS
  4050  								0x8B)
  4051  							asmand(ctxt, p, &pp.From, &p.To)
  4052  						}
  4053  						break
  4054  					}
  4055  
  4056  					switch ctxt.Headtype {
  4057  					default:
  4058  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4059  
  4060  					case obj.Hlinux:
  4061  						if !ctxt.Flag_shared {
  4062  							log.Fatalf("unknown TLS base location for linux without -shared")
  4063  						}
  4064  						// Note that this is not generating the same insn as the other cases.
  4065  						//     MOV TLS, R_to
  4066  						// becomes
  4067  						//     movq g@gottpoff(%rip), R_to
  4068  						// which is encoded as
  4069  						//     movq 0(%rip), R_to
  4070  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4071  						// is g, which we can't check here, but will when we assemble the second
  4072  						// instruction.
  4073  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4074  
  4075  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4076  						r = obj.Addrel(ctxt.Cursym)
  4077  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4078  						r.Type = obj.R_TLS_IE
  4079  						r.Siz = 4
  4080  						r.Add = -4
  4081  						ctxt.AsmBuf.PutInt32(0)
  4082  
  4083  					case obj.Hplan9:
  4084  						if ctxt.Plan9privates == nil {
  4085  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4086  						}
  4087  						pp.From = obj.Addr{}
  4088  						pp.From.Type = obj.TYPE_MEM
  4089  						pp.From.Name = obj.NAME_EXTERN
  4090  						pp.From.Sym = ctxt.Plan9privates
  4091  						pp.From.Offset = 0
  4092  						pp.From.Index = REG_NONE
  4093  						ctxt.Rexflag |= Pw
  4094  						ctxt.AsmBuf.Put1(0x8B)
  4095  						asmand(ctxt, p, &pp.From, &p.To)
  4096  
  4097  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4098  						// TLS base is 0(FS).
  4099  						pp.From = p.From
  4100  
  4101  						pp.From.Type = obj.TYPE_MEM
  4102  						pp.From.Name = obj.NAME_NONE
  4103  						pp.From.Reg = REG_NONE
  4104  						pp.From.Offset = 0
  4105  						pp.From.Index = REG_NONE
  4106  						pp.From.Scale = 0
  4107  						ctxt.Rexflag |= Pw
  4108  						ctxt.AsmBuf.Put2(0x64, // FS
  4109  							0x8B)
  4110  						asmand(ctxt, p, &pp.From, &p.To)
  4111  
  4112  					case obj.Hwindows, obj.Hwindowsgui:
  4113  						// Windows TLS base is always 0x28(GS).
  4114  						pp.From = p.From
  4115  
  4116  						pp.From.Type = obj.TYPE_MEM
  4117  						pp.From.Name = obj.NAME_NONE
  4118  						pp.From.Reg = REG_GS
  4119  						pp.From.Offset = 0x28
  4120  						pp.From.Index = REG_NONE
  4121  						pp.From.Scale = 0
  4122  						ctxt.Rexflag |= Pw
  4123  						ctxt.AsmBuf.Put2(0x65, // GS
  4124  							0x8B)
  4125  						asmand(ctxt, p, &pp.From, &p.To)
  4126  					}
  4127  				}
  4128  				return
  4129  			}
  4130  		}
  4131  	}
  4132  	goto bad
  4133  
  4134  bad:
  4135  	if p.Mode != 64 {
  4136  		/*
  4137  		 * here, the assembly has failed.
  4138  		 * if its a byte instruction that has
  4139  		 * unaddressable registers, try to
  4140  		 * exchange registers and reissue the
  4141  		 * instruction with the operands renamed.
  4142  		 */
  4143  		pp := *p
  4144  
  4145  		unbytereg(&pp.From, &pp.Ft)
  4146  		unbytereg(&pp.To, &pp.Tt)
  4147  
  4148  		z := int(p.From.Reg)
  4149  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4150  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4151  			// For now, different to keep bit-for-bit compatibility.
  4152  			if p.Mode == 32 {
  4153  				breg := byteswapreg(ctxt, &p.To)
  4154  				if breg != REG_AX {
  4155  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4156  					asmando(ctxt, p, &p.From, reg[breg])
  4157  					subreg(&pp, z, breg)
  4158  					doasm(ctxt, &pp)
  4159  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4160  					asmando(ctxt, p, &p.From, reg[breg])
  4161  				} else {
  4162  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4163  					subreg(&pp, z, REG_AX)
  4164  					doasm(ctxt, &pp)
  4165  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4166  				}
  4167  				return
  4168  			}
  4169  
  4170  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4171  				// We certainly don't want to exchange
  4172  				// with AX if the op is MUL or DIV.
  4173  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4174  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4175  				subreg(&pp, z, REG_BX)
  4176  				doasm(ctxt, &pp)
  4177  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4178  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4179  			} else {
  4180  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4181  				subreg(&pp, z, REG_AX)
  4182  				doasm(ctxt, &pp)
  4183  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4184  			}
  4185  			return
  4186  		}
  4187  
  4188  		z = int(p.To.Reg)
  4189  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4190  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4191  			// For now, different to keep bit-for-bit compatibility.
  4192  			if p.Mode == 32 {
  4193  				breg := byteswapreg(ctxt, &p.From)
  4194  				if breg != REG_AX {
  4195  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4196  					asmando(ctxt, p, &p.To, reg[breg])
  4197  					subreg(&pp, z, breg)
  4198  					doasm(ctxt, &pp)
  4199  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4200  					asmando(ctxt, p, &p.To, reg[breg])
  4201  				} else {
  4202  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4203  					subreg(&pp, z, REG_AX)
  4204  					doasm(ctxt, &pp)
  4205  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4206  				}
  4207  				return
  4208  			}
  4209  
  4210  			if isax(&p.From) {
  4211  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4212  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4213  				subreg(&pp, z, REG_BX)
  4214  				doasm(ctxt, &pp)
  4215  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4216  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4217  			} else {
  4218  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4219  				subreg(&pp, z, REG_AX)
  4220  				doasm(ctxt, &pp)
  4221  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4222  			}
  4223  			return
  4224  		}
  4225  	}
  4226  
  4227  	ctxt.Diag("invalid instruction: %v", p)
  4228  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4229  	return
  4230  }
  4231  
  4232  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4233  // which is not referenced in a.
  4234  // If a is empty, it returns BX to account for MULB-like instructions
  4235  // that might use DX and AX.
  4236  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4237  	cand := 1
  4238  	canc := cand
  4239  	canb := canc
  4240  	cana := canb
  4241  
  4242  	if a.Type == obj.TYPE_NONE {
  4243  		cand = 0
  4244  		cana = cand
  4245  	}
  4246  
  4247  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4248  		switch a.Reg {
  4249  		case REG_NONE:
  4250  			cand = 0
  4251  			cana = cand
  4252  
  4253  		case REG_AX, REG_AL, REG_AH:
  4254  			cana = 0
  4255  
  4256  		case REG_BX, REG_BL, REG_BH:
  4257  			canb = 0
  4258  
  4259  		case REG_CX, REG_CL, REG_CH:
  4260  			canc = 0
  4261  
  4262  		case REG_DX, REG_DL, REG_DH:
  4263  			cand = 0
  4264  		}
  4265  	}
  4266  
  4267  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4268  		switch a.Index {
  4269  		case REG_AX:
  4270  			cana = 0
  4271  
  4272  		case REG_BX:
  4273  			canb = 0
  4274  
  4275  		case REG_CX:
  4276  			canc = 0
  4277  
  4278  		case REG_DX:
  4279  			cand = 0
  4280  		}
  4281  	}
  4282  
  4283  	if cana != 0 {
  4284  		return REG_AX
  4285  	}
  4286  	if canb != 0 {
  4287  		return REG_BX
  4288  	}
  4289  	if canc != 0 {
  4290  		return REG_CX
  4291  	}
  4292  	if cand != 0 {
  4293  		return REG_DX
  4294  	}
  4295  
  4296  	ctxt.Diag("impossible byte register")
  4297  	log.Fatalf("bad code")
  4298  	return 0
  4299  }
  4300  
  4301  func isbadbyte(a *obj.Addr) bool {
  4302  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4303  }
  4304  
  4305  var naclret = []uint8{
  4306  	0x5e, // POPL SI
  4307  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4308  	0x83,
  4309  	0xe6,
  4310  	0xe0, // ANDL $~31, SI
  4311  	0x4c,
  4312  	0x01,
  4313  	0xfe, // ADDQ R15, SI
  4314  	0xff,
  4315  	0xe6, // JMP SI
  4316  }
  4317  
  4318  var naclret8 = []uint8{
  4319  	0x5d, // POPL BP
  4320  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4321  	0x83,
  4322  	0xe5,
  4323  	0xe0, // ANDL $~31, BP
  4324  	0xff,
  4325  	0xe5, // JMP BP
  4326  }
  4327  
  4328  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4329  
  4330  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4331  
  4332  var naclmovs = []uint8{
  4333  	0x89,
  4334  	0xf6, // MOVL SI, SI
  4335  	0x49,
  4336  	0x8d,
  4337  	0x34,
  4338  	0x37, // LEAQ (R15)(SI*1), SI
  4339  	0x89,
  4340  	0xff, // MOVL DI, DI
  4341  	0x49,
  4342  	0x8d,
  4343  	0x3c,
  4344  	0x3f, // LEAQ (R15)(DI*1), DI
  4345  }
  4346  
  4347  var naclstos = []uint8{
  4348  	0x89,
  4349  	0xff, // MOVL DI, DI
  4350  	0x49,
  4351  	0x8d,
  4352  	0x3c,
  4353  	0x3f, // LEAQ (R15)(DI*1), DI
  4354  }
  4355  
  4356  func nacltrunc(ctxt *obj.Link, reg int) {
  4357  	if reg >= REG_R8 {
  4358  		ctxt.AsmBuf.Put1(0x45)
  4359  	}
  4360  	reg = (reg - REG_AX) & 7
  4361  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4362  }
  4363  
  4364  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4365  	ctxt.AsmBuf.Reset()
  4366  	ctxt.Asmode = int(p.Mode)
  4367  
  4368  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4369  		switch p.As {
  4370  		case obj.ARET:
  4371  			ctxt.AsmBuf.Put(naclret8)
  4372  			return
  4373  
  4374  		case obj.ACALL,
  4375  			obj.AJMP:
  4376  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4377  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4378  			}
  4379  
  4380  		case AINT:
  4381  			ctxt.AsmBuf.Put1(0xf4)
  4382  			return
  4383  		}
  4384  	}
  4385  
  4386  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4387  		if p.As == AREP {
  4388  			ctxt.Rep++
  4389  			return
  4390  		}
  4391  
  4392  		if p.As == AREPN {
  4393  			ctxt.Repn++
  4394  			return
  4395  		}
  4396  
  4397  		if p.As == ALOCK {
  4398  			ctxt.Lock++
  4399  			return
  4400  		}
  4401  
  4402  		if p.As != ALEAQ && p.As != ALEAL {
  4403  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4404  				nacltrunc(ctxt, int(p.From.Index))
  4405  			}
  4406  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4407  				nacltrunc(ctxt, int(p.To.Index))
  4408  			}
  4409  		}
  4410  
  4411  		switch p.As {
  4412  		case obj.ARET:
  4413  			ctxt.AsmBuf.Put(naclret)
  4414  			return
  4415  
  4416  		case obj.ACALL,
  4417  			obj.AJMP:
  4418  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4419  				// ANDL $~31, reg
  4420  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4421  				// ADDQ R15, reg
  4422  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4423  			}
  4424  
  4425  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4426  				// ANDL $~31, reg
  4427  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4428  				// ADDQ R15, reg
  4429  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4430  			}
  4431  
  4432  		case AINT:
  4433  			ctxt.AsmBuf.Put1(0xf4)
  4434  			return
  4435  
  4436  		case ASCASB,
  4437  			ASCASW,
  4438  			ASCASL,
  4439  			ASCASQ,
  4440  			ASTOSB,
  4441  			ASTOSW,
  4442  			ASTOSL,
  4443  			ASTOSQ:
  4444  			ctxt.AsmBuf.Put(naclstos)
  4445  
  4446  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4447  			ctxt.AsmBuf.Put(naclmovs)
  4448  		}
  4449  
  4450  		if ctxt.Rep != 0 {
  4451  			ctxt.AsmBuf.Put1(0xf3)
  4452  			ctxt.Rep = 0
  4453  		}
  4454  
  4455  		if ctxt.Repn != 0 {
  4456  			ctxt.AsmBuf.Put1(0xf2)
  4457  			ctxt.Repn = 0
  4458  		}
  4459  
  4460  		if ctxt.Lock != 0 {
  4461  			ctxt.AsmBuf.Put1(0xf0)
  4462  			ctxt.Lock = 0
  4463  		}
  4464  	}
  4465  
  4466  	ctxt.Rexflag = 0
  4467  	ctxt.Vexflag = 0
  4468  	mark := ctxt.AsmBuf.Len()
  4469  	ctxt.Asmode = int(p.Mode)
  4470  	doasm(ctxt, p)
  4471  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4472  		/*
  4473  		 * as befits the whole approach of the architecture,
  4474  		 * the rex prefix must appear before the first opcode byte
  4475  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4476  		 * before the 0f opcode escape!), or it might be ignored.
  4477  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4478  		 */
  4479  		if p.Mode != 64 {
  4480  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4481  		}
  4482  		n := ctxt.AsmBuf.Len()
  4483  		var np int
  4484  		for np = mark; np < n; np++ {
  4485  			c := ctxt.AsmBuf.Peek(np)
  4486  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4487  				break
  4488  			}
  4489  		}
  4490  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4491  	}
  4492  
  4493  	n := ctxt.AsmBuf.Len()
  4494  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4495  		r := &ctxt.Cursym.R[i]
  4496  		if int64(r.Off) < p.Pc {
  4497  			break
  4498  		}
  4499  		if ctxt.Rexflag != 0 {
  4500  			r.Off++
  4501  		}
  4502  		if r.Type == obj.R_PCREL {
  4503  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4504  				// PC-relative addressing is relative to the end of the instruction,
  4505  				// but the relocations applied by the linker are relative to the end
  4506  				// of the relocation. Because immediate instruction
  4507  				// arguments can follow the PC-relative memory reference in the
  4508  				// instruction encoding, the two may not coincide. In this case,
  4509  				// adjust addend so that linker can keep relocating relative to the
  4510  				// end of the relocation.
  4511  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4512  			} else if p.Mode == 32 {
  4513  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4514  				// assumes that the previous instruction loaded the PC of the end
  4515  				// of that instruction into CX, so the adjustment is relative to
  4516  				// that.
  4517  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4518  			}
  4519  		}
  4520  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4521  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4522  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4523  		}
  4524  
  4525  	}
  4526  
  4527  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4528  		switch p.To.Reg {
  4529  		case REG_SP:
  4530  			ctxt.AsmBuf.Put(naclspfix)
  4531  		case REG_BP:
  4532  			ctxt.AsmBuf.Put(naclbpfix)
  4533  		}
  4534  	}
  4535  }