github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type ytab struct {
    76  	from    uint8
    77  	from3   uint8
    78  	to      uint8
    79  	zcase   uint8
    80  	zoffset uint8
    81  }
    82  
    83  type Movtab struct {
    84  	as   obj.As
    85  	ft   uint8
    86  	f3t  uint8
    87  	tt   uint8
    88  	code uint8
    89  	op   [4]uint8
    90  }
    91  
    92  const (
    93  	Yxxx = iota
    94  	Ynone
    95  	Yi0 // $0
    96  	Yi1 // $1
    97  	Yi8 // $x, x fits in int8
    98  	Yu8 // $x, x fits in uint8
    99  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   100  	Ys32
   101  	Yi32
   102  	Yi64
   103  	Yiauto
   104  	Yal
   105  	Ycl
   106  	Yax
   107  	Ycx
   108  	Yrb
   109  	Yrl
   110  	Yrl32 // Yrl on 32-bit system
   111  	Yrf
   112  	Yf0
   113  	Yrx
   114  	Ymb
   115  	Yml
   116  	Ym
   117  	Ybr
   118  	Ycs
   119  	Yss
   120  	Yds
   121  	Yes
   122  	Yfs
   123  	Ygs
   124  	Ygdtr
   125  	Yidtr
   126  	Yldtr
   127  	Ymsw
   128  	Ytask
   129  	Ycr0
   130  	Ycr1
   131  	Ycr2
   132  	Ycr3
   133  	Ycr4
   134  	Ycr5
   135  	Ycr6
   136  	Ycr7
   137  	Ycr8
   138  	Ydr0
   139  	Ydr1
   140  	Ydr2
   141  	Ydr3
   142  	Ydr4
   143  	Ydr5
   144  	Ydr6
   145  	Ydr7
   146  	Ytr0
   147  	Ytr1
   148  	Ytr2
   149  	Ytr3
   150  	Ytr4
   151  	Ytr5
   152  	Ytr6
   153  	Ytr7
   154  	Ymr
   155  	Ymm
   156  	Yxr
   157  	Yxm
   158  	Yyr
   159  	Yym
   160  	Ytls
   161  	Ytextsize
   162  	Yindir
   163  	Ymax
   164  )
   165  
   166  const (
   167  	Zxxx = iota
   168  	Zlit
   169  	Zlitm_r
   170  	Z_rp
   171  	Zbr
   172  	Zcall
   173  	Zcallcon
   174  	Zcallduff
   175  	Zcallind
   176  	Zcallindreg
   177  	Zib_
   178  	Zib_rp
   179  	Zibo_m
   180  	Zibo_m_xm
   181  	Zil_
   182  	Zil_rp
   183  	Ziq_rp
   184  	Zilo_m
   185  	Zjmp
   186  	Zjmpcon
   187  	Zloop
   188  	Zo_iw
   189  	Zm_o
   190  	Zm_r
   191  	Zm2_r
   192  	Zm_r_xm
   193  	Zm_r_i_xm
   194  	Zm_r_xm_nr
   195  	Zr_m_xm_nr
   196  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   197  	Zibr_m
   198  	Zmb_r
   199  	Zaut_r
   200  	Zo_m
   201  	Zo_m64
   202  	Zpseudo
   203  	Zr_m
   204  	Zr_m_xm
   205  	Zrp_
   206  	Z_ib
   207  	Z_il
   208  	Zm_ibo
   209  	Zm_ilo
   210  	Zib_rr
   211  	Zil_rr
   212  	Zclr
   213  	Zbyte
   214  	Zvex_rm_v_r
   215  	Zvex_r_v_rm
   216  	Zvex_v_rm_r
   217  	Zvex_i_rm_r
   218  	Zvex_i_r_v
   219  	Zvex_i_rm_v_r
   220  	Zmax
   221  )
   222  
   223  const (
   224  	Px   = 0
   225  	Px1  = 1    // symbolic; exact value doesn't matter
   226  	P32  = 0x32 /* 32-bit only */
   227  	Pe   = 0x66 /* operand escape */
   228  	Pm   = 0x0f /* 2byte opcode escape */
   229  	Pq   = 0xff /* both escapes: 66 0f */
   230  	Pb   = 0xfe /* byte operands */
   231  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   232  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   233  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   234  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   235  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   236  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   237  	Pw   = 0x48 /* Rex.w */
   238  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   239  	Py   = 0x80 /* defaults to 64-bit mode */
   240  	Py1  = 0x81 // symbolic; exact value doesn't matter
   241  	Py3  = 0x83 // symbolic; exact value doesn't matter
   242  	Pvex = 0x84 // symbolic: exact value doesn't matter
   243  
   244  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   245  	Rxr = 1 << 2 /* extend modrm reg */
   246  	Rxx = 1 << 1 /* extend sib index */
   247  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   248  )
   249  
   250  const (
   251  	// Encoding for VEX prefix in tables.
   252  	// The P, L, and W fields are chosen to match
   253  	// their eventual locations in the VEX prefix bytes.
   254  
   255  	// P field - 2 bits
   256  	vex66 = 1 << 0
   257  	vexF3 = 2 << 0
   258  	vexF2 = 3 << 0
   259  	// L field - 1 bit
   260  	vexLZ  = 0 << 2
   261  	vexLIG = 0 << 2
   262  	vex128 = 0 << 2
   263  	vex256 = 1 << 2
   264  	// W field - 1 bit
   265  	vexWIG = 0 << 7
   266  	vexW0  = 0 << 7
   267  	vexW1  = 1 << 7
   268  	// M field - 5 bits, but mostly reserved; we can store up to 4
   269  	vex0F   = 1 << 3
   270  	vex0F38 = 2 << 3
   271  	vex0F3A = 3 << 3
   272  
   273  	// Combinations used in the manual.
   274  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   275  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   276  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   277  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   278  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   279  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   280  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   281  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   282  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   283  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   284  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   285  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   286  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   287  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   288  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   289  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   290  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   291  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   292  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   293  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   294  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   295  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   296  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   297  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   298  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   299  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   300  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   301  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   302  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   303  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   304  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   305  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   306  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   307  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   308  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   309  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   310  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   311  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   312  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   313  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   314  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   315  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   316  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   317  )
   318  
   319  var ycover [Ymax * Ymax]uint8
   320  
   321  var reg [MAXREG]int
   322  
   323  var regrex [MAXREG + 1]int
   324  
   325  var ynone = []ytab{
   326  	{Ynone, Ynone, Ynone, Zlit, 1},
   327  }
   328  
   329  var ytext = []ytab{
   330  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   331  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   332  }
   333  
   334  var ynop = []ytab{
   335  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   336  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   337  	{Ynone, Ynone, Yml, Zpseudo, 0},
   338  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   339  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   340  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   341  	{Yml, Ynone, Ynone, Zpseudo, 0},
   342  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   343  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   344  }
   345  
   346  var yfuncdata = []ytab{
   347  	{Yi32, Ynone, Ym, Zpseudo, 0},
   348  }
   349  
   350  var ypcdata = []ytab{
   351  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   352  }
   353  
   354  var yxorb = []ytab{
   355  	{Yi32, Ynone, Yal, Zib_, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  	{Yrb, Ynone, Ymb, Zr_m, 1},
   358  	{Ymb, Ynone, Yrb, Zm_r, 1},
   359  }
   360  
   361  var yaddl = []ytab{
   362  	{Yi8, Ynone, Yml, Zibo_m, 2},
   363  	{Yi32, Ynone, Yax, Zil_, 1},
   364  	{Yi32, Ynone, Yml, Zilo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  	{Yml, Ynone, Yrl, Zm_r, 1},
   367  }
   368  
   369  var yincl = []ytab{
   370  	{Ynone, Ynone, Yrl, Z_rp, 1},
   371  	{Ynone, Ynone, Yml, Zo_m, 2},
   372  }
   373  
   374  var yincq = []ytab{
   375  	{Ynone, Ynone, Yml, Zo_m, 2},
   376  }
   377  
   378  var ycmpb = []ytab{
   379  	{Yal, Ynone, Yi32, Z_ib, 1},
   380  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   381  	{Ymb, Ynone, Yrb, Zm_r, 1},
   382  	{Yrb, Ynone, Ymb, Zr_m, 1},
   383  }
   384  
   385  var ycmpl = []ytab{
   386  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   387  	{Yax, Ynone, Yi32, Z_il, 1},
   388  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   389  	{Yml, Ynone, Yrl, Zm_r, 1},
   390  	{Yrl, Ynone, Yml, Zr_m, 1},
   391  }
   392  
   393  var yshb = []ytab{
   394  	{Yi1, Ynone, Ymb, Zo_m, 2},
   395  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   396  	{Ycx, Ynone, Ymb, Zo_m, 2},
   397  }
   398  
   399  var yshl = []ytab{
   400  	{Yi1, Ynone, Yml, Zo_m, 2},
   401  	{Yi32, Ynone, Yml, Zibo_m, 2},
   402  	{Ycl, Ynone, Yml, Zo_m, 2},
   403  	{Ycx, Ynone, Yml, Zo_m, 2},
   404  }
   405  
   406  var ytestl = []ytab{
   407  	{Yi32, Ynone, Yax, Zil_, 1},
   408  	{Yi32, Ynone, Yml, Zilo_m, 2},
   409  	{Yrl, Ynone, Yml, Zr_m, 1},
   410  	{Yml, Ynone, Yrl, Zm_r, 1},
   411  }
   412  
   413  var ymovb = []ytab{
   414  	{Yrb, Ynone, Ymb, Zr_m, 1},
   415  	{Ymb, Ynone, Yrb, Zm_r, 1},
   416  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   417  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   418  }
   419  
   420  var ybtl = []ytab{
   421  	{Yi8, Ynone, Yml, Zibo_m, 2},
   422  	{Yrl, Ynone, Yml, Zr_m, 1},
   423  }
   424  
   425  var ymovw = []ytab{
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  	{Yi0, Ynone, Yrl, Zclr, 1},
   429  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   430  	{Yi32, Ynone, Yml, Zilo_m, 2},
   431  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   432  }
   433  
   434  var ymovl = []ytab{
   435  	{Yrl, Ynone, Yml, Zr_m, 1},
   436  	{Yml, Ynone, Yrl, Zm_r, 1},
   437  	{Yi0, Ynone, Yrl, Zclr, 1},
   438  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   439  	{Yi32, Ynone, Yml, Zilo_m, 2},
   440  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   441  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   442  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   443  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   444  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   445  }
   446  
   447  var yret = []ytab{
   448  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   449  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   450  }
   451  
   452  var ymovq = []ytab{
   453  	// valid in 32-bit mode
   454  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   455  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   456  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   457  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   458  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   459  
   460  	// valid only in 64-bit mode, usually with 64-bit prefix
   461  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   462  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   463  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   464  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   465  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   466  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   467  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   468  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   469  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   470  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   471  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   472  }
   473  
   474  var ym_rl = []ytab{
   475  	{Ym, Ynone, Yrl, Zm_r, 1},
   476  }
   477  
   478  var yrl_m = []ytab{
   479  	{Yrl, Ynone, Ym, Zr_m, 1},
   480  }
   481  
   482  var ymb_rl = []ytab{
   483  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   484  }
   485  
   486  var yml_rl = []ytab{
   487  	{Yml, Ynone, Yrl, Zm_r, 1},
   488  }
   489  
   490  var yrl_ml = []ytab{
   491  	{Yrl, Ynone, Yml, Zr_m, 1},
   492  }
   493  
   494  var yml_mb = []ytab{
   495  	{Yrb, Ynone, Ymb, Zr_m, 1},
   496  	{Ymb, Ynone, Yrb, Zm_r, 1},
   497  }
   498  
   499  var yrb_mb = []ytab{
   500  	{Yrb, Ynone, Ymb, Zr_m, 1},
   501  }
   502  
   503  var yxchg = []ytab{
   504  	{Yax, Ynone, Yrl, Z_rp, 1},
   505  	{Yrl, Ynone, Yax, Zrp_, 1},
   506  	{Yrl, Ynone, Yml, Zr_m, 1},
   507  	{Yml, Ynone, Yrl, Zm_r, 1},
   508  }
   509  
   510  var ydivl = []ytab{
   511  	{Yml, Ynone, Ynone, Zm_o, 2},
   512  }
   513  
   514  var ydivb = []ytab{
   515  	{Ymb, Ynone, Ynone, Zm_o, 2},
   516  }
   517  
   518  var yimul = []ytab{
   519  	{Yml, Ynone, Ynone, Zm_o, 2},
   520  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   521  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   522  	{Yml, Ynone, Yrl, Zm_r, 2},
   523  }
   524  
   525  var yimul3 = []ytab{
   526  	{Yi8, Yml, Yrl, Zibm_r, 2},
   527  }
   528  
   529  var ybyte = []ytab{
   530  	{Yi64, Ynone, Ynone, Zbyte, 1},
   531  }
   532  
   533  var yin = []ytab{
   534  	{Yi32, Ynone, Ynone, Zib_, 1},
   535  	{Ynone, Ynone, Ynone, Zlit, 1},
   536  }
   537  
   538  var yint = []ytab{
   539  	{Yi32, Ynone, Ynone, Zib_, 1},
   540  }
   541  
   542  var ypushl = []ytab{
   543  	{Yrl, Ynone, Ynone, Zrp_, 1},
   544  	{Ym, Ynone, Ynone, Zm_o, 2},
   545  	{Yi8, Ynone, Ynone, Zib_, 1},
   546  	{Yi32, Ynone, Ynone, Zil_, 1},
   547  }
   548  
   549  var ypopl = []ytab{
   550  	{Ynone, Ynone, Yrl, Z_rp, 1},
   551  	{Ynone, Ynone, Ym, Zo_m, 2},
   552  }
   553  
   554  var ybswap = []ytab{
   555  	{Ynone, Ynone, Yrl, Z_rp, 2},
   556  }
   557  
   558  var yscond = []ytab{
   559  	{Ynone, Ynone, Ymb, Zo_m, 2},
   560  }
   561  
   562  var yjcond = []ytab{
   563  	{Ynone, Ynone, Ybr, Zbr, 0},
   564  	{Yi0, Ynone, Ybr, Zbr, 0},
   565  	{Yi1, Ynone, Ybr, Zbr, 1},
   566  }
   567  
   568  var yloop = []ytab{
   569  	{Ynone, Ynone, Ybr, Zloop, 1},
   570  }
   571  
   572  var ycall = []ytab{
   573  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   574  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   575  	{Ynone, Ynone, Yindir, Zcallind, 2},
   576  	{Ynone, Ynone, Ybr, Zcall, 0},
   577  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   578  }
   579  
   580  var yduff = []ytab{
   581  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   582  }
   583  
   584  var yjmp = []ytab{
   585  	{Ynone, Ynone, Yml, Zo_m64, 2},
   586  	{Ynone, Ynone, Ybr, Zjmp, 0},
   587  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   588  }
   589  
   590  var yfmvd = []ytab{
   591  	{Ym, Ynone, Yf0, Zm_o, 2},
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yrf, Ynone, Yf0, Zm_o, 2},
   594  	{Yf0, Ynone, Yrf, Zo_m, 2},
   595  }
   596  
   597  var yfmvdp = []ytab{
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  	{Yf0, Ynone, Yrf, Zo_m, 2},
   600  }
   601  
   602  var yfmvf = []ytab{
   603  	{Ym, Ynone, Yf0, Zm_o, 2},
   604  	{Yf0, Ynone, Ym, Zo_m, 2},
   605  }
   606  
   607  var yfmvx = []ytab{
   608  	{Ym, Ynone, Yf0, Zm_o, 2},
   609  }
   610  
   611  var yfmvp = []ytab{
   612  	{Yf0, Ynone, Ym, Zo_m, 2},
   613  }
   614  
   615  var yfcmv = []ytab{
   616  	{Yrf, Ynone, Yf0, Zm_o, 2},
   617  }
   618  
   619  var yfadd = []ytab{
   620  	{Ym, Ynone, Yf0, Zm_o, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Yrf, Zo_m, 2},
   623  }
   624  
   625  var yfxch = []ytab{
   626  	{Yf0, Ynone, Yrf, Zo_m, 2},
   627  	{Yrf, Ynone, Yf0, Zm_o, 2},
   628  }
   629  
   630  var ycompp = []ytab{
   631  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   632  }
   633  
   634  var ystsw = []ytab{
   635  	{Ynone, Ynone, Ym, Zo_m, 2},
   636  	{Ynone, Ynone, Yax, Zlit, 1},
   637  }
   638  
   639  var ysvrs = []ytab{
   640  	{Ynone, Ynone, Ym, Zo_m, 2},
   641  	{Ym, Ynone, Ynone, Zm_o, 2},
   642  }
   643  
   644  var ymm = []ytab{
   645  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   647  }
   648  
   649  var yxm = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   651  }
   652  
   653  var yxm_q4 = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r, 1},
   655  }
   656  
   657  var yxcvm1 = []ytab{
   658  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   659  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   660  }
   661  
   662  var yxcvm2 = []ytab{
   663  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   664  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   665  }
   666  
   667  var yxr = []ytab{
   668  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   669  }
   670  
   671  var yxr_ml = []ytab{
   672  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   673  }
   674  
   675  var ymr = []ytab{
   676  	{Ymr, Ynone, Ymr, Zm_r, 1},
   677  }
   678  
   679  var ymr_ml = []ytab{
   680  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   681  }
   682  
   683  var yxcmpi = []ytab{
   684  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   685  }
   686  
   687  var yxmov = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   689  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   690  }
   691  
   692  var yxcvfl = []ytab{
   693  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   694  }
   695  
   696  var yxcvlf = []ytab{
   697  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   698  }
   699  
   700  var yxcvfq = []ytab{
   701  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   702  }
   703  
   704  var yxcvqf = []ytab{
   705  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   706  }
   707  
   708  var yps = []ytab{
   709  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   710  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   711  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   712  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   713  }
   714  
   715  var yxrrl = []ytab{
   716  	{Yxr, Ynone, Yrl, Zm_r, 1},
   717  }
   718  
   719  var ymrxr = []ytab{
   720  	{Ymr, Ynone, Yxr, Zm_r, 1},
   721  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   722  }
   723  
   724  var ymshuf = []ytab{
   725  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   726  }
   727  
   728  var ymshufb = []ytab{
   729  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   730  }
   731  
   732  var yxshuf = []ytab{
   733  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   734  }
   735  
   736  var yextrw = []ytab{
   737  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   738  }
   739  
   740  var yextr = []ytab{
   741  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   742  }
   743  
   744  var yinsrw = []ytab{
   745  	{Yu8, Yml, Yxr, Zibm_r, 2},
   746  }
   747  
   748  var yinsr = []ytab{
   749  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   750  }
   751  
   752  var ypsdq = []ytab{
   753  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   754  }
   755  
   756  var ymskb = []ytab{
   757  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   758  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   759  }
   760  
   761  var ycrc32l = []ytab{
   762  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   763  }
   764  
   765  var yprefetch = []ytab{
   766  	{Ym, Ynone, Ynone, Zm_o, 2},
   767  }
   768  
   769  var yaes = []ytab{
   770  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   771  }
   772  
   773  var yxbegin = []ytab{
   774  	{Ynone, Ynone, Ybr, Zjmp, 1},
   775  }
   776  
   777  var yxabort = []ytab{
   778  	{Yu8, Ynone, Ynone, Zib_, 1},
   779  }
   780  
   781  var ylddqu = []ytab{
   782  	{Ym, Ynone, Yxr, Zm_r, 1},
   783  }
   784  
   785  // VEX instructions that come in two forms:
   786  //	VTHING xmm2/m128, xmmV, xmm1
   787  //	VTHING ymm2/m256, ymmV, ymm1
   788  // The opcode array in the corresponding Optab entry
   789  // should contain the (VEX prefixes, opcode byte) pair
   790  // for each of the two forms.
   791  // For example, the entries for VPXOR are:
   792  //
   793  //	VPXOR xmm2/m128, xmmV, xmm1
   794  //	VEX.NDS.128.66.0F.WIG EF /r
   795  //
   796  //	VPXOR ymm2/m256, ymmV, ymm1
   797  //	VEX.NDS.256.66.0F.WIG EF /r
   798  //
   799  // The NDS/NDD/DDS part can be dropped, producing this
   800  // Optab entry:
   801  //
   802  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   803  //
   804  var yvex_xy3 = []ytab{
   805  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   806  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   807  }
   808  
   809  var yvex_ri3 = []ytab{
   810  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   811  }
   812  
   813  var yvex_xyi3 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   815  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   816  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   817  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   818  }
   819  
   820  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   821  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   822  }
   823  
   824  var yvex_xyi4 = []ytab{
   825  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   826  }
   827  
   828  var yvex_shift = []ytab{
   829  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   830  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   831  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   832  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   833  }
   834  
   835  var yvex_shift_dq = []ytab{
   836  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   837  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   838  }
   839  
   840  var yvex_r3 = []ytab{
   841  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   842  }
   843  
   844  var yvex_vmr3 = []ytab{
   845  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   846  }
   847  
   848  var yvex_xy2 = []ytab{
   849  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_xyr2 = []ytab{
   854  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   855  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   856  }
   857  
   858  var yvex_vmovdqa = []ytab{
   859  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   860  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   861  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   862  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   863  }
   864  
   865  var yvex_vmovntdq = []ytab{
   866  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   867  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   868  }
   869  
   870  var yvex_vpbroadcast = []ytab{
   871  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   872  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   873  }
   874  
   875  var yvex_vpbroadcast_sd = []ytab{
   876  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   877  }
   878  
   879  var ymmxmm0f38 = []ytab{
   880  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   881  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   882  }
   883  
   884  /*
   885   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   886   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   887   * to find the entry with the given p.As and then looks through the ytable for
   888   * that instruction (the second field in the optab struct) for a line whose
   889   * first two values match the Ytypes of the p.From and p.To operands.  The
   890   * function oclass computes the specific Ytype of an operand and then the set
   891   * of more general Ytypes that it satisfies is implied by the ycover table, set
   892   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   893   * from the more general 8-bit constants, but instinit says
   894   *
   895   *        ycover[Yi0*Ymax+Ys32] = 1
   896   *        ycover[Yi1*Ymax+Ys32] = 1
   897   *        ycover[Yi8*Ymax+Ys32] = 1
   898   *
   899   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   900   * if that's what an instruction can handle.
   901   *
   902   * In parallel with the scan through the ytable for the appropriate line, there
   903   * is a z pointer that starts out pointing at the strange magic byte list in
   904   * the Optab struct.  With each step past a non-matching ytable line, z
   905   * advances by the 4th entry in the line.  When a matching line is found, that
   906   * z pointer has the extra data to use in laying down the instruction bytes.
   907   * The actual bytes laid down are a function of the 3rd entry in the line (that
   908   * is, the Ztype) and the z bytes.
   909   *
   910   * For example, let's look at AADDL.  The optab line says:
   911   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   912   *
   913   * and yaddl says
   914   *        var yaddl = []ytab{
   915   *                {Yi8, Ynone, Yml, Zibo_m, 2},
   916   *                {Yi32, Ynone, Yax, Zil_, 1},
   917   *                {Yi32, Ynone, Yml, Zilo_m, 2},
   918   *                {Yrl, Ynone, Yml, Zr_m, 1},
   919   *                {Yml, Ynone, Yrl, Zm_r, 1},
   920   *        }
   921   *
   922   * so there are 5 possible types of ADDL instruction that can be laid down, and
   923   * possible states used to lay them down (Ztype and z pointer, assuming z
   924   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   925   *
   926   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   927   *        Yi32, Yax -> Zil_, z+2 (0x05)
   928   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   929   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   930   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   931   *
   932   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   933   * relatively straightforward as this program goes.
   934   *
   935   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   936   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   937   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   938   * Zilo_m is the same but a long (32-bit) immediate.
   939   */
   940  var optab =
   941  /*	as, ytab, andproto, opcode */
   942  []Optab{
   943  	{obj.AXXX, nil, 0, [23]uint8{}},
   944  	{AAAA, ynone, P32, [23]uint8{0x37}},
   945  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   946  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   947  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   948  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   949  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   950  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   951  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   952  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   953  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   954  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   955  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   956  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   957  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   958  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   959  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   960  	{AADJSP, nil, 0, [23]uint8{}},
   961  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   962  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   963  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   964  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   965  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   966  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   967  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   968  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   969  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   970  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   971  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   972  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   973  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   974  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   975  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   976  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   977  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   978  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   979  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   980  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   981  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   982  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   983  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   984  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   985  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   986  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   987  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   988  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   989  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   990  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   991  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   992  	{ABYTE, ybyte, Px, [23]uint8{1}},
   993  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   994  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   995  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   996  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   997  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   998  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   999  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1000  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1001  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1002  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1003  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1004  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1005  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1006  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1007  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1008  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1009  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1010  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1011  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1012  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1013  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1014  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1015  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1016  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1017  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1018  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1019  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1020  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1021  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1022  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1023  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1024  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1025  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1026  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1027  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1028  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1029  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1030  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1031  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1032  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1033  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1034  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1035  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1036  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1037  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1038  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1039  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1040  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1041  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1042  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1043  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1044  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1045  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1046  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1047  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1048  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1049  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1050  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1051  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1052  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1054  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1055  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1056  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1057  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1058  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1059  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1060  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1061  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1062  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1063  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1064  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1065  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1066  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1067  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1068  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1069  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1070  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1071  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1072  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1073  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1074  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1075  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1076  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1077  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1078  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1079  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1080  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1081  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1082  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1083  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1084  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1085  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1086  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1087  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1088  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1089  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1090  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1091  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1092  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1093  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1094  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1095  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1096  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1097  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1098  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1099  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1100  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1101  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1102  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1103  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1104  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1105  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1106  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1107  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1108  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1109  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1110  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1111  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1112  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1113  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1116  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1117  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1118  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1119  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1120  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1121  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1122  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1123  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1124  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1125  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1126  	{AINT, yint, Px, [23]uint8{0xcd}},
  1127  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1128  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1129  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1130  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1131  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1132  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1133  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1134  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1135  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1136  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1137  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1138  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1139  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1140  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1141  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1142  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1143  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1144  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1145  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1146  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1147  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1148  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1149  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1150  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1151  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1152  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1153  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1154  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1155  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1156  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1157  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1158  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1159  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1160  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1161  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1162  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1163  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1164  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1165  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1166  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1167  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1168  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1169  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1170  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1171  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1172  	{ALONG, ybyte, Px, [23]uint8{4}},
  1173  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1174  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1175  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1176  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1177  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1178  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1179  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1180  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1181  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1182  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1183  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1184  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1185  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1186  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1187  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1188  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1189  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1190  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1191  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1192  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1193  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1194  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1195  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1196  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1197  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1198  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1199  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1200  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1201  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1202  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1203  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1204  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1205  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1206  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1207  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1208  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1209  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1210  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1211  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1212  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1213  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1214  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1216  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1217  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1218  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1219  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1220  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1221  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1222  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1223  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1224  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1225  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1226  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1227  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1228  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1229  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1230  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1231  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1232  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1233  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1234  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1235  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1236  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1237  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1238  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1239  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1240  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1241  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1242  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1243  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1244  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1245  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1246  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1247  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1249  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1250  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1251  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1252  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1253  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1254  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1255  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1256  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1257  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1258  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1259  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1260  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1261  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1262  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1263  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1264  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1265  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1266  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1267  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1268  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1269  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1270  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1271  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1272  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1273  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1274  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1275  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1276  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1277  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1278  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1279  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1280  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1281  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1282  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1283  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1284  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1285  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1286  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1287  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1288  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1289  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1290  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1291  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1292  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1293  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1294  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1295  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1296  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1297  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1298  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1299  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1300  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1301  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1302  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1303  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1304  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1305  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1306  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1307  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1308  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1309  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1310  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1311  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1312  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1313  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1314  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1315  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1316  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1317  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1318  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1319  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1320  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1321  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1322  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1323  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1324  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1325  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1326  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1327  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1328  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1329  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1330  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1331  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1332  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1333  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1334  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1335  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1336  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1337  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1338  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1339  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1340  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1341  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1342  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1343  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1344  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1345  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1346  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1347  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1348  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1349  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1350  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1351  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1352  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1353  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1354  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1355  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1356  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1357  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1358  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1359  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1360  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1361  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1362  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1363  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1364  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1365  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1366  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1367  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1368  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1369  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1370  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1371  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1372  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1373  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1374  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1375  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1376  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1377  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1378  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1379  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1380  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1381  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1382  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1383  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1384  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1385  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1386  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1387  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1388  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1389  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1390  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1391  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1392  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1393  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1394  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1395  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1396  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1397  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1398  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1399  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1400  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1401  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1402  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1403  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1404  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1405  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1406  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1407  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1408  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1409  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1410  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1411  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1412  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1413  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1414  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1415  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1416  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1417  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1418  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1419  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1420  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1421  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1422  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1423  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1424  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1425  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1426  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1427  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1428  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1429  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1430  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1431  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1432  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1433  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1434  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1435  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1436  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1437  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1438  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1439  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1440  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1441  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1442  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1443  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1444  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1445  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1446  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1447  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1448  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1449  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1450  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1451  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1452  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1453  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1454  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1455  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1456  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1457  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1458  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1459  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1460  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1461  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1462  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1463  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1464  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1465  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1466  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1467  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1468  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1469  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1470  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1471  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1472  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1473  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1474  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1475  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1476  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1477  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1478  	{AWORD, ybyte, Px, [23]uint8{2}},
  1479  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1480  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1481  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1482  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1483  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1484  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1485  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1487  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1488  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1489  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1490  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1491  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1492  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1493  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1494  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1495  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1496  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1497  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1498  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1499  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1500  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1501  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1502  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1503  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1504  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1505  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1506  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1507  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1508  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1509  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1510  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1511  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1512  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1513  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1514  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1515  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1516  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1517  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1518  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1519  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1520  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1521  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1522  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1523  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1524  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1525  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1526  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1527  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1528  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1529  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1530  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1531  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1532  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1533  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1534  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1535  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1536  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1537  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1538  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1539  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1541  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1542  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1543  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1544  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1545  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1546  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1547  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1548  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1549  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1551  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1552  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1553  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1554  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1555  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1556  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1557  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1558  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1559  	{AFFREE, nil, 0, [23]uint8{}},
  1560  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1561  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1562  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1563  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1564  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1565  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1566  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1567  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1568  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1569  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1570  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1571  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1572  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1573  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1574  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1575  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1576  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1577  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1578  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1579  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1580  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1581  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1582  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1583  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1584  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1585  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1586  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1587  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1588  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1589  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1590  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1591  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1592  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1593  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1594  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1595  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1596  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1597  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1598  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1599  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1600  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1601  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1602  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1603  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1604  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1605  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1606  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1607  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1608  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1609  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1610  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1611  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1612  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1613  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1614  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1615  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1616  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1617  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1618  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1619  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1620  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1621  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1622  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1623  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1624  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1625  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1626  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1627  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1628  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1629  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1630  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1631  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1632  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1633  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1634  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1635  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1636  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1637  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1638  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1639  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1640  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1641  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1642  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1643  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1644  
  1645  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1646  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1647  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1648  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1649  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1650  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1651  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1652  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1653  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1654  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1655  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1656  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1657  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1658  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1659  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1660  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1661  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1662  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1663  
  1664  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1665  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1666  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1667  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1668  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1669  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1670  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1671  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1672  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1673  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1674  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1675  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1676  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1677  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1678  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1679  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1680  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1681  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1682  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1683  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1684  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1685  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1686  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1687  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1688  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1689  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1690  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1691  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1692  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1693  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1694  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1695  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1696  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1697  
  1698  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1699  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1700  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1701  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1702  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1703  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1704  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1705  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1706  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1707  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.AEND, nil, 0, [23]uint8{}},
  1710  	{0, nil, 0, [23]uint8{}},
  1711  }
  1712  
  1713  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1714  
  1715  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1716  // This happens on systems like Solaris that call .so functions instead of system calls.
  1717  // It does not seem to be necessary for any other systems. This is probably working
  1718  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1719  // what that bug is. And this does fix it.
  1720  func isextern(s *obj.LSym) bool {
  1721  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1722  	return strings.HasPrefix(s.Name, "libc_")
  1723  }
  1724  
  1725  // single-instruction no-ops of various lengths.
  1726  // constructed by hand and disassembled with gdb to verify.
  1727  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1728  var nop = [][16]uint8{
  1729  	{0x90},
  1730  	{0x66, 0x90},
  1731  	{0x0F, 0x1F, 0x00},
  1732  	{0x0F, 0x1F, 0x40, 0x00},
  1733  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1734  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1735  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1736  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1737  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1738  }
  1739  
  1740  // Native Client rejects the repeated 0x66 prefix.
  1741  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1742  func fillnop(p []byte, n int) {
  1743  	var m int
  1744  
  1745  	for n > 0 {
  1746  		m = n
  1747  		if m > len(nop) {
  1748  			m = len(nop)
  1749  		}
  1750  		copy(p[:m], nop[m-1][:m])
  1751  		p = p[m:]
  1752  		n -= m
  1753  	}
  1754  }
  1755  
  1756  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1757  	s.Grow(int64(c) + int64(pad))
  1758  	fillnop(s.P[c:], int(pad))
  1759  	return c + pad
  1760  }
  1761  
  1762  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1763  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1764  		return l
  1765  	}
  1766  	return q
  1767  }
  1768  
  1769  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1770  	if s.P != nil {
  1771  		return
  1772  	}
  1773  
  1774  	if ycover[0] == 0 {
  1775  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1776  	}
  1777  
  1778  	var asmbuf AsmBuf
  1779  
  1780  	for p := s.Func.Text; p != nil; p = p.Link {
  1781  		if p.To.Type == obj.TYPE_BRANCH {
  1782  			if p.Pcond == nil {
  1783  				p.Pcond = p
  1784  			}
  1785  		}
  1786  		if p.As == AADJSP {
  1787  			p.To.Type = obj.TYPE_REG
  1788  			p.To.Reg = REG_SP
  1789  			v := int32(-p.From.Offset)
  1790  			p.From.Offset = int64(v)
  1791  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1792  			if v < 0 {
  1793  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1794  				v = -v
  1795  				p.From.Offset = int64(v)
  1796  			}
  1797  
  1798  			if v == 0 {
  1799  				p.As = obj.ANOP
  1800  			}
  1801  		}
  1802  	}
  1803  
  1804  	var q *obj.Prog
  1805  	var count int64 // rough count of number of instructions
  1806  	for p := s.Func.Text; p != nil; p = p.Link {
  1807  		count++
  1808  		p.Back = 2 // use short branches first time through
  1809  		q = p.Pcond
  1810  		if q != nil && (q.Back&2 != 0) {
  1811  			p.Back |= 1 // backward jump
  1812  			q.Back |= 4 // loop head
  1813  		}
  1814  
  1815  		if p.As == AADJSP {
  1816  			p.To.Type = obj.TYPE_REG
  1817  			p.To.Reg = REG_SP
  1818  			v := int32(-p.From.Offset)
  1819  			p.From.Offset = int64(v)
  1820  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1821  			if v < 0 {
  1822  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1823  				v = -v
  1824  				p.From.Offset = int64(v)
  1825  			}
  1826  
  1827  			if v == 0 {
  1828  				p.As = obj.ANOP
  1829  			}
  1830  		}
  1831  	}
  1832  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1833  
  1834  	n := 0
  1835  	var c int32
  1836  	errors := ctxt.Errors
  1837  	for {
  1838  		loop := int32(0)
  1839  		for i := range s.R {
  1840  			s.R[i] = obj.Reloc{}
  1841  		}
  1842  		s.R = s.R[:0]
  1843  		s.P = s.P[:0]
  1844  		c = 0
  1845  		for p := s.Func.Text; p != nil; p = p.Link {
  1846  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1847  
  1848  				// pad everything to avoid crossing 32-byte boundary
  1849  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1850  					c = naclpad(ctxt, s, c, -c&31)
  1851  				}
  1852  
  1853  				// pad call deferreturn to start at 32-byte boundary
  1854  				// so that subtracting 5 in jmpdefer will jump back
  1855  				// to that boundary and rerun the call.
  1856  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1857  					c = naclpad(ctxt, s, c, -c&31)
  1858  				}
  1859  
  1860  				// pad call to end at 32-byte boundary
  1861  				if p.As == obj.ACALL {
  1862  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1863  				}
  1864  
  1865  				// the linker treats REP and STOSQ as different instructions
  1866  				// but in fact the REP is a prefix on the STOSQ.
  1867  				// make sure REP has room for 2 more bytes, so that
  1868  				// padding will not be inserted before the next instruction.
  1869  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1870  					c = naclpad(ctxt, s, c, -c&31)
  1871  				}
  1872  
  1873  				// same for LOCK.
  1874  				// various instructions follow; the longest is 4 bytes.
  1875  				// give ourselves 8 bytes so as to avoid surprises.
  1876  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1877  					c = naclpad(ctxt, s, c, -c&31)
  1878  				}
  1879  			}
  1880  
  1881  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1882  				// pad with NOPs
  1883  				v := -c & (LoopAlign - 1)
  1884  
  1885  				if v <= MaxLoopPad {
  1886  					s.Grow(int64(c) + int64(v))
  1887  					fillnop(s.P[c:], int(v))
  1888  					c += v
  1889  				}
  1890  			}
  1891  
  1892  			p.Pc = int64(c)
  1893  
  1894  			// process forward jumps to p
  1895  			for q = p.Rel; q != nil; q = q.Forwd {
  1896  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1897  				if q.Back&2 != 0 { // short
  1898  					if v > 127 {
  1899  						loop++
  1900  						q.Back ^= 2
  1901  					}
  1902  
  1903  					if q.As == AJCXZL || q.As == AXBEGIN {
  1904  						s.P[q.Pc+2] = byte(v)
  1905  					} else {
  1906  						s.P[q.Pc+1] = byte(v)
  1907  					}
  1908  				} else {
  1909  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1910  				}
  1911  			}
  1912  
  1913  			p.Rel = nil
  1914  
  1915  			p.Pc = int64(c)
  1916  			asmbuf.asmins(ctxt, s, p)
  1917  			m := asmbuf.Len()
  1918  			if int(p.Isize) != m {
  1919  				p.Isize = uint8(m)
  1920  				loop++
  1921  			}
  1922  
  1923  			s.Grow(p.Pc + int64(m))
  1924  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1925  			c += int32(m)
  1926  		}
  1927  
  1928  		n++
  1929  		if n > 20 {
  1930  			ctxt.Diag("span must be looping")
  1931  			log.Fatalf("loop")
  1932  		}
  1933  		if loop == 0 {
  1934  			break
  1935  		}
  1936  		if ctxt.Errors > errors {
  1937  			return
  1938  		}
  1939  	}
  1940  
  1941  	if ctxt.Headtype == objabi.Hnacl {
  1942  		c = naclpad(ctxt, s, c, -c&31)
  1943  	}
  1944  
  1945  	s.Size = int64(c)
  1946  
  1947  	if false { /* debug['a'] > 1 */
  1948  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1949  		var i int
  1950  		for i = 0; i < len(s.P); i++ {
  1951  			fmt.Printf(" %.2x", s.P[i])
  1952  			if i%16 == 15 {
  1953  				fmt.Printf("\n  %.6x", uint(i+1))
  1954  			}
  1955  		}
  1956  
  1957  		if i%16 != 0 {
  1958  			fmt.Printf("\n")
  1959  		}
  1960  
  1961  		for i := 0; i < len(s.R); i++ {
  1962  			r := &s.R[i]
  1963  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1964  		}
  1965  	}
  1966  }
  1967  
  1968  func instinit(ctxt *obj.Link) {
  1969  	if ycover[0] != 0 {
  1970  		// Already initialized; stop now.
  1971  		// This happens in the cmd/asm tests,
  1972  		// each of which re-initializes the arch.
  1973  		return
  1974  	}
  1975  
  1976  	switch ctxt.Headtype {
  1977  	case objabi.Hplan9:
  1978  		plan9privates = ctxt.Lookup("_privates")
  1979  	case objabi.Hnacl:
  1980  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  1981  	}
  1982  
  1983  	for i := 1; optab[i].as != 0; i++ {
  1984  		c := optab[i].as
  1985  		if opindex[c&obj.AMask] != nil {
  1986  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  1987  		}
  1988  		opindex[c&obj.AMask] = &optab[i]
  1989  	}
  1990  
  1991  	for i := 0; i < Ymax; i++ {
  1992  		ycover[i*Ymax+i] = 1
  1993  	}
  1994  
  1995  	ycover[Yi0*Ymax+Yi8] = 1
  1996  	ycover[Yi1*Ymax+Yi8] = 1
  1997  	ycover[Yu7*Ymax+Yi8] = 1
  1998  
  1999  	ycover[Yi0*Ymax+Yu7] = 1
  2000  	ycover[Yi1*Ymax+Yu7] = 1
  2001  
  2002  	ycover[Yi0*Ymax+Yu8] = 1
  2003  	ycover[Yi1*Ymax+Yu8] = 1
  2004  	ycover[Yu7*Ymax+Yu8] = 1
  2005  
  2006  	ycover[Yi0*Ymax+Ys32] = 1
  2007  	ycover[Yi1*Ymax+Ys32] = 1
  2008  	ycover[Yu7*Ymax+Ys32] = 1
  2009  	ycover[Yu8*Ymax+Ys32] = 1
  2010  	ycover[Yi8*Ymax+Ys32] = 1
  2011  
  2012  	ycover[Yi0*Ymax+Yi32] = 1
  2013  	ycover[Yi1*Ymax+Yi32] = 1
  2014  	ycover[Yu7*Ymax+Yi32] = 1
  2015  	ycover[Yu8*Ymax+Yi32] = 1
  2016  	ycover[Yi8*Ymax+Yi32] = 1
  2017  	ycover[Ys32*Ymax+Yi32] = 1
  2018  
  2019  	ycover[Yi0*Ymax+Yi64] = 1
  2020  	ycover[Yi1*Ymax+Yi64] = 1
  2021  	ycover[Yu7*Ymax+Yi64] = 1
  2022  	ycover[Yu8*Ymax+Yi64] = 1
  2023  	ycover[Yi8*Ymax+Yi64] = 1
  2024  	ycover[Ys32*Ymax+Yi64] = 1
  2025  	ycover[Yi32*Ymax+Yi64] = 1
  2026  
  2027  	ycover[Yal*Ymax+Yrb] = 1
  2028  	ycover[Ycl*Ymax+Yrb] = 1
  2029  	ycover[Yax*Ymax+Yrb] = 1
  2030  	ycover[Ycx*Ymax+Yrb] = 1
  2031  	ycover[Yrx*Ymax+Yrb] = 1
  2032  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2033  
  2034  	ycover[Ycl*Ymax+Ycx] = 1
  2035  
  2036  	ycover[Yax*Ymax+Yrx] = 1
  2037  	ycover[Ycx*Ymax+Yrx] = 1
  2038  
  2039  	ycover[Yax*Ymax+Yrl] = 1
  2040  	ycover[Ycx*Ymax+Yrl] = 1
  2041  	ycover[Yrx*Ymax+Yrl] = 1
  2042  	ycover[Yrl32*Ymax+Yrl] = 1
  2043  
  2044  	ycover[Yf0*Ymax+Yrf] = 1
  2045  
  2046  	ycover[Yal*Ymax+Ymb] = 1
  2047  	ycover[Ycl*Ymax+Ymb] = 1
  2048  	ycover[Yax*Ymax+Ymb] = 1
  2049  	ycover[Ycx*Ymax+Ymb] = 1
  2050  	ycover[Yrx*Ymax+Ymb] = 1
  2051  	ycover[Yrb*Ymax+Ymb] = 1
  2052  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2053  	ycover[Ym*Ymax+Ymb] = 1
  2054  
  2055  	ycover[Yax*Ymax+Yml] = 1
  2056  	ycover[Ycx*Ymax+Yml] = 1
  2057  	ycover[Yrx*Ymax+Yml] = 1
  2058  	ycover[Yrl*Ymax+Yml] = 1
  2059  	ycover[Yrl32*Ymax+Yml] = 1
  2060  	ycover[Ym*Ymax+Yml] = 1
  2061  
  2062  	ycover[Yax*Ymax+Ymm] = 1
  2063  	ycover[Ycx*Ymax+Ymm] = 1
  2064  	ycover[Yrx*Ymax+Ymm] = 1
  2065  	ycover[Yrl*Ymax+Ymm] = 1
  2066  	ycover[Yrl32*Ymax+Ymm] = 1
  2067  	ycover[Ym*Ymax+Ymm] = 1
  2068  	ycover[Ymr*Ymax+Ymm] = 1
  2069  
  2070  	ycover[Ym*Ymax+Yxm] = 1
  2071  	ycover[Yxr*Ymax+Yxm] = 1
  2072  
  2073  	ycover[Ym*Ymax+Yym] = 1
  2074  	ycover[Yyr*Ymax+Yym] = 1
  2075  
  2076  	for i := 0; i < MAXREG; i++ {
  2077  		reg[i] = -1
  2078  		if i >= REG_AL && i <= REG_R15B {
  2079  			reg[i] = (i - REG_AL) & 7
  2080  			if i >= REG_SPB && i <= REG_DIB {
  2081  				regrex[i] = 0x40
  2082  			}
  2083  			if i >= REG_R8B && i <= REG_R15B {
  2084  				regrex[i] = Rxr | Rxx | Rxb
  2085  			}
  2086  		}
  2087  
  2088  		if i >= REG_AH && i <= REG_BH {
  2089  			reg[i] = 4 + ((i - REG_AH) & 7)
  2090  		}
  2091  		if i >= REG_AX && i <= REG_R15 {
  2092  			reg[i] = (i - REG_AX) & 7
  2093  			if i >= REG_R8 {
  2094  				regrex[i] = Rxr | Rxx | Rxb
  2095  			}
  2096  		}
  2097  
  2098  		if i >= REG_F0 && i <= REG_F0+7 {
  2099  			reg[i] = (i - REG_F0) & 7
  2100  		}
  2101  		if i >= REG_M0 && i <= REG_M0+7 {
  2102  			reg[i] = (i - REG_M0) & 7
  2103  		}
  2104  		if i >= REG_X0 && i <= REG_X0+15 {
  2105  			reg[i] = (i - REG_X0) & 7
  2106  			if i >= REG_X0+8 {
  2107  				regrex[i] = Rxr | Rxx | Rxb
  2108  			}
  2109  		}
  2110  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2111  			reg[i] = (i - REG_Y0) & 7
  2112  			if i >= REG_Y0+8 {
  2113  				regrex[i] = Rxr | Rxx | Rxb
  2114  			}
  2115  		}
  2116  
  2117  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2118  			regrex[i] = Rxr
  2119  		}
  2120  	}
  2121  }
  2122  
  2123  var isAndroid = (objabi.GOOS == "android")
  2124  
  2125  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2126  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2127  		return 0
  2128  	}
  2129  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2130  		switch a.Reg {
  2131  		case REG_CS:
  2132  			return 0x2e
  2133  
  2134  		case REG_DS:
  2135  			return 0x3e
  2136  
  2137  		case REG_ES:
  2138  			return 0x26
  2139  
  2140  		case REG_FS:
  2141  			return 0x64
  2142  
  2143  		case REG_GS:
  2144  			return 0x65
  2145  
  2146  		case REG_TLS:
  2147  			// NOTE: Systems listed here should be only systems that
  2148  			// support direct TLS references like 8(TLS) implemented as
  2149  			// direct references from FS or GS. Systems that require
  2150  			// the initial-exec model, where you load the TLS base into
  2151  			// a register and then index from that register, do not reach
  2152  			// this code and should not be listed.
  2153  			if ctxt.Arch.Family == sys.I386 {
  2154  				switch ctxt.Headtype {
  2155  				default:
  2156  					if isAndroid {
  2157  						return 0x65 // GS
  2158  					}
  2159  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2160  
  2161  				case objabi.Hdarwin,
  2162  					objabi.Hdragonfly,
  2163  					objabi.Hfreebsd,
  2164  					objabi.Hnetbsd,
  2165  					objabi.Hopenbsd:
  2166  					return 0x65 // GS
  2167  				}
  2168  			}
  2169  
  2170  			switch ctxt.Headtype {
  2171  			default:
  2172  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2173  
  2174  			case objabi.Hlinux:
  2175  				if isAndroid {
  2176  					return 0x64 // FS
  2177  				}
  2178  
  2179  				if ctxt.Flag_shared {
  2180  					log.Fatalf("unknown TLS base register for linux with -shared")
  2181  				} else {
  2182  					return 0x64 // FS
  2183  				}
  2184  
  2185  			case objabi.Hdragonfly,
  2186  				objabi.Hfreebsd,
  2187  				objabi.Hnetbsd,
  2188  				objabi.Hopenbsd,
  2189  				objabi.Hsolaris:
  2190  				return 0x64 // FS
  2191  
  2192  			case objabi.Hdarwin:
  2193  				return 0x65 // GS
  2194  			}
  2195  		}
  2196  	}
  2197  
  2198  	if ctxt.Arch.Family == sys.I386 {
  2199  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2200  			// When building for inclusion into a shared library, an instruction of the form
  2201  			//     MOVL 0(CX)(TLS*1), AX
  2202  			// becomes
  2203  			//     mov %gs:(%ecx), %eax
  2204  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2205  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2206  			// a shared library the instruction it becomes
  2207  			//     mov 0x0(%ecx), $eax
  2208  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2209  			if a.Offset != 0 {
  2210  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2211  			}
  2212  			return 0x65 // GS
  2213  		}
  2214  		return 0
  2215  	}
  2216  
  2217  	switch a.Index {
  2218  	case REG_CS:
  2219  		return 0x2e
  2220  
  2221  	case REG_DS:
  2222  		return 0x3e
  2223  
  2224  	case REG_ES:
  2225  		return 0x26
  2226  
  2227  	case REG_TLS:
  2228  		if ctxt.Flag_shared {
  2229  			// When building for inclusion into a shared library, an instruction of the form
  2230  			//     MOV 0(CX)(TLS*1), AX
  2231  			// becomes
  2232  			//     mov %fs:(%rcx), %rax
  2233  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2234  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2235  			// a shared library the instruction does not require a prefix.
  2236  			if a.Offset != 0 {
  2237  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2238  			}
  2239  			return 0x64
  2240  		}
  2241  
  2242  	case REG_FS:
  2243  		return 0x64
  2244  
  2245  	case REG_GS:
  2246  		return 0x65
  2247  	}
  2248  
  2249  	return 0
  2250  }
  2251  
  2252  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2253  	switch a.Type {
  2254  	case obj.TYPE_NONE:
  2255  		return Ynone
  2256  
  2257  	case obj.TYPE_BRANCH:
  2258  		return Ybr
  2259  
  2260  	case obj.TYPE_INDIR:
  2261  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2262  			return Yindir
  2263  		}
  2264  		return Yxxx
  2265  
  2266  	case obj.TYPE_MEM:
  2267  		if a.Index == REG_SP {
  2268  			// Can't use SP as the index register
  2269  			return Yxxx
  2270  		}
  2271  		if ctxt.Arch.Family == sys.AMD64 {
  2272  			switch a.Name {
  2273  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2274  				// Global variables can't use index registers and their
  2275  				// base register is %rip (%rip is encoded as REG_NONE).
  2276  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2277  					return Yxxx
  2278  				}
  2279  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2280  				// These names must have a base of SP.  The old compiler
  2281  				// uses 0 for the base register. SSA uses REG_SP.
  2282  				if a.Reg != REG_SP && a.Reg != 0 {
  2283  					return Yxxx
  2284  				}
  2285  			case obj.NAME_NONE:
  2286  				// everything is ok
  2287  			default:
  2288  				// unknown name
  2289  				return Yxxx
  2290  			}
  2291  		}
  2292  		return Ym
  2293  
  2294  	case obj.TYPE_ADDR:
  2295  		switch a.Name {
  2296  		case obj.NAME_GOTREF:
  2297  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2298  			return Yxxx
  2299  
  2300  		case obj.NAME_EXTERN,
  2301  			obj.NAME_STATIC:
  2302  			if a.Sym != nil && isextern(a.Sym) || (ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared) {
  2303  				return Yi32
  2304  			}
  2305  			return Yiauto // use pc-relative addressing
  2306  
  2307  		case obj.NAME_AUTO,
  2308  			obj.NAME_PARAM:
  2309  			return Yiauto
  2310  		}
  2311  
  2312  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2313  		// and got Yi32 in an earlier version of this code.
  2314  		// Keep doing that until we fix yduff etc.
  2315  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2316  			return Yi32
  2317  		}
  2318  
  2319  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2320  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2321  		}
  2322  		fallthrough
  2323  
  2324  		// fall through
  2325  
  2326  	case obj.TYPE_CONST:
  2327  		if a.Sym != nil {
  2328  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2329  		}
  2330  
  2331  		v := a.Offset
  2332  		if ctxt.Arch.Family == sys.I386 {
  2333  			v = int64(int32(v))
  2334  		}
  2335  		if v == 0 {
  2336  			if p.Mark&PRESERVEFLAGS != 0 {
  2337  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2338  				return Yu7
  2339  			}
  2340  			return Yi0
  2341  		}
  2342  		if v == 1 {
  2343  			return Yi1
  2344  		}
  2345  		if v >= 0 && v <= 127 {
  2346  			return Yu7
  2347  		}
  2348  		if v >= 0 && v <= 255 {
  2349  			return Yu8
  2350  		}
  2351  		if v >= -128 && v <= 127 {
  2352  			return Yi8
  2353  		}
  2354  		if ctxt.Arch.Family == sys.I386 {
  2355  			return Yi32
  2356  		}
  2357  		l := int32(v)
  2358  		if int64(l) == v {
  2359  			return Ys32 /* can sign extend */
  2360  		}
  2361  		if v>>32 == 0 {
  2362  			return Yi32 /* unsigned */
  2363  		}
  2364  		return Yi64
  2365  
  2366  	case obj.TYPE_TEXTSIZE:
  2367  		return Ytextsize
  2368  	}
  2369  
  2370  	if a.Type != obj.TYPE_REG {
  2371  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2372  		return Yxxx
  2373  	}
  2374  
  2375  	switch a.Reg {
  2376  	case REG_AL:
  2377  		return Yal
  2378  
  2379  	case REG_AX:
  2380  		return Yax
  2381  
  2382  		/*
  2383  			case REG_SPB:
  2384  		*/
  2385  	case REG_BPB,
  2386  		REG_SIB,
  2387  		REG_DIB,
  2388  		REG_R8B,
  2389  		REG_R9B,
  2390  		REG_R10B,
  2391  		REG_R11B,
  2392  		REG_R12B,
  2393  		REG_R13B,
  2394  		REG_R14B,
  2395  		REG_R15B:
  2396  		if ctxt.Arch.Family == sys.I386 {
  2397  			return Yxxx
  2398  		}
  2399  		fallthrough
  2400  
  2401  	case REG_DL,
  2402  		REG_BL,
  2403  		REG_AH,
  2404  		REG_CH,
  2405  		REG_DH,
  2406  		REG_BH:
  2407  		return Yrb
  2408  
  2409  	case REG_CL:
  2410  		return Ycl
  2411  
  2412  	case REG_CX:
  2413  		return Ycx
  2414  
  2415  	case REG_DX, REG_BX:
  2416  		return Yrx
  2417  
  2418  	case REG_R8, /* not really Yrl */
  2419  		REG_R9,
  2420  		REG_R10,
  2421  		REG_R11,
  2422  		REG_R12,
  2423  		REG_R13,
  2424  		REG_R14,
  2425  		REG_R15:
  2426  		if ctxt.Arch.Family == sys.I386 {
  2427  			return Yxxx
  2428  		}
  2429  		fallthrough
  2430  
  2431  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2432  		if ctxt.Arch.Family == sys.I386 {
  2433  			return Yrl32
  2434  		}
  2435  		return Yrl
  2436  
  2437  	case REG_F0 + 0:
  2438  		return Yf0
  2439  
  2440  	case REG_F0 + 1,
  2441  		REG_F0 + 2,
  2442  		REG_F0 + 3,
  2443  		REG_F0 + 4,
  2444  		REG_F0 + 5,
  2445  		REG_F0 + 6,
  2446  		REG_F0 + 7:
  2447  		return Yrf
  2448  
  2449  	case REG_M0 + 0,
  2450  		REG_M0 + 1,
  2451  		REG_M0 + 2,
  2452  		REG_M0 + 3,
  2453  		REG_M0 + 4,
  2454  		REG_M0 + 5,
  2455  		REG_M0 + 6,
  2456  		REG_M0 + 7:
  2457  		return Ymr
  2458  
  2459  	case REG_X0 + 0,
  2460  		REG_X0 + 1,
  2461  		REG_X0 + 2,
  2462  		REG_X0 + 3,
  2463  		REG_X0 + 4,
  2464  		REG_X0 + 5,
  2465  		REG_X0 + 6,
  2466  		REG_X0 + 7,
  2467  		REG_X0 + 8,
  2468  		REG_X0 + 9,
  2469  		REG_X0 + 10,
  2470  		REG_X0 + 11,
  2471  		REG_X0 + 12,
  2472  		REG_X0 + 13,
  2473  		REG_X0 + 14,
  2474  		REG_X0 + 15:
  2475  		return Yxr
  2476  
  2477  	case REG_Y0 + 0,
  2478  		REG_Y0 + 1,
  2479  		REG_Y0 + 2,
  2480  		REG_Y0 + 3,
  2481  		REG_Y0 + 4,
  2482  		REG_Y0 + 5,
  2483  		REG_Y0 + 6,
  2484  		REG_Y0 + 7,
  2485  		REG_Y0 + 8,
  2486  		REG_Y0 + 9,
  2487  		REG_Y0 + 10,
  2488  		REG_Y0 + 11,
  2489  		REG_Y0 + 12,
  2490  		REG_Y0 + 13,
  2491  		REG_Y0 + 14,
  2492  		REG_Y0 + 15:
  2493  		return Yyr
  2494  
  2495  	case REG_CS:
  2496  		return Ycs
  2497  	case REG_SS:
  2498  		return Yss
  2499  	case REG_DS:
  2500  		return Yds
  2501  	case REG_ES:
  2502  		return Yes
  2503  	case REG_FS:
  2504  		return Yfs
  2505  	case REG_GS:
  2506  		return Ygs
  2507  	case REG_TLS:
  2508  		return Ytls
  2509  
  2510  	case REG_GDTR:
  2511  		return Ygdtr
  2512  	case REG_IDTR:
  2513  		return Yidtr
  2514  	case REG_LDTR:
  2515  		return Yldtr
  2516  	case REG_MSW:
  2517  		return Ymsw
  2518  	case REG_TASK:
  2519  		return Ytask
  2520  
  2521  	case REG_CR + 0:
  2522  		return Ycr0
  2523  	case REG_CR + 1:
  2524  		return Ycr1
  2525  	case REG_CR + 2:
  2526  		return Ycr2
  2527  	case REG_CR + 3:
  2528  		return Ycr3
  2529  	case REG_CR + 4:
  2530  		return Ycr4
  2531  	case REG_CR + 5:
  2532  		return Ycr5
  2533  	case REG_CR + 6:
  2534  		return Ycr6
  2535  	case REG_CR + 7:
  2536  		return Ycr7
  2537  	case REG_CR + 8:
  2538  		return Ycr8
  2539  
  2540  	case REG_DR + 0:
  2541  		return Ydr0
  2542  	case REG_DR + 1:
  2543  		return Ydr1
  2544  	case REG_DR + 2:
  2545  		return Ydr2
  2546  	case REG_DR + 3:
  2547  		return Ydr3
  2548  	case REG_DR + 4:
  2549  		return Ydr4
  2550  	case REG_DR + 5:
  2551  		return Ydr5
  2552  	case REG_DR + 6:
  2553  		return Ydr6
  2554  	case REG_DR + 7:
  2555  		return Ydr7
  2556  
  2557  	case REG_TR + 0:
  2558  		return Ytr0
  2559  	case REG_TR + 1:
  2560  		return Ytr1
  2561  	case REG_TR + 2:
  2562  		return Ytr2
  2563  	case REG_TR + 3:
  2564  		return Ytr3
  2565  	case REG_TR + 4:
  2566  		return Ytr4
  2567  	case REG_TR + 5:
  2568  		return Ytr5
  2569  	case REG_TR + 6:
  2570  		return Ytr6
  2571  	case REG_TR + 7:
  2572  		return Ytr7
  2573  	}
  2574  
  2575  	return Yxxx
  2576  }
  2577  
  2578  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2579  // and hold assembly state.
  2580  type AsmBuf struct {
  2581  	buf     [100]byte
  2582  	off     int
  2583  	rexflag int
  2584  	vexflag int
  2585  	rep     int
  2586  	repn    int
  2587  	lock    bool
  2588  }
  2589  
  2590  // Put1 appends one byte to the end of the buffer.
  2591  func (a *AsmBuf) Put1(x byte) {
  2592  	a.buf[a.off] = x
  2593  	a.off++
  2594  }
  2595  
  2596  // Put2 appends two bytes to the end of the buffer.
  2597  func (a *AsmBuf) Put2(x, y byte) {
  2598  	a.buf[a.off+0] = x
  2599  	a.buf[a.off+1] = y
  2600  	a.off += 2
  2601  }
  2602  
  2603  // Put3 appends three bytes to the end of the buffer.
  2604  func (a *AsmBuf) Put3(x, y, z byte) {
  2605  	a.buf[a.off+0] = x
  2606  	a.buf[a.off+1] = y
  2607  	a.buf[a.off+2] = z
  2608  	a.off += 3
  2609  }
  2610  
  2611  // Put4 appends four bytes to the end of the buffer.
  2612  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2613  	a.buf[a.off+0] = x
  2614  	a.buf[a.off+1] = y
  2615  	a.buf[a.off+2] = z
  2616  	a.buf[a.off+3] = w
  2617  	a.off += 4
  2618  }
  2619  
  2620  // PutInt16 writes v into the buffer using little-endian encoding.
  2621  func (a *AsmBuf) PutInt16(v int16) {
  2622  	a.buf[a.off+0] = byte(v)
  2623  	a.buf[a.off+1] = byte(v >> 8)
  2624  	a.off += 2
  2625  }
  2626  
  2627  // PutInt32 writes v into the buffer using little-endian encoding.
  2628  func (a *AsmBuf) PutInt32(v int32) {
  2629  	a.buf[a.off+0] = byte(v)
  2630  	a.buf[a.off+1] = byte(v >> 8)
  2631  	a.buf[a.off+2] = byte(v >> 16)
  2632  	a.buf[a.off+3] = byte(v >> 24)
  2633  	a.off += 4
  2634  }
  2635  
  2636  // PutInt64 writes v into the buffer using little-endian encoding.
  2637  func (a *AsmBuf) PutInt64(v int64) {
  2638  	a.buf[a.off+0] = byte(v)
  2639  	a.buf[a.off+1] = byte(v >> 8)
  2640  	a.buf[a.off+2] = byte(v >> 16)
  2641  	a.buf[a.off+3] = byte(v >> 24)
  2642  	a.buf[a.off+4] = byte(v >> 32)
  2643  	a.buf[a.off+5] = byte(v >> 40)
  2644  	a.buf[a.off+6] = byte(v >> 48)
  2645  	a.buf[a.off+7] = byte(v >> 56)
  2646  	a.off += 8
  2647  }
  2648  
  2649  // Put copies b into the buffer.
  2650  func (a *AsmBuf) Put(b []byte) {
  2651  	copy(a.buf[a.off:], b)
  2652  	a.off += len(b)
  2653  }
  2654  
  2655  // Insert inserts b at offset i.
  2656  func (a *AsmBuf) Insert(i int, b byte) {
  2657  	a.off++
  2658  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2659  	a.buf[i] = b
  2660  }
  2661  
  2662  // Last returns the byte at the end of the buffer.
  2663  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2664  
  2665  // Len returns the length of the buffer.
  2666  func (a *AsmBuf) Len() int { return a.off }
  2667  
  2668  // Bytes returns the contents of the buffer.
  2669  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2670  
  2671  // Reset empties the buffer.
  2672  func (a *AsmBuf) Reset() { a.off = 0 }
  2673  
  2674  // At returns the byte at offset i.
  2675  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2676  
  2677  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2678  	var i int
  2679  
  2680  	switch index {
  2681  	default:
  2682  		goto bad
  2683  
  2684  	case REG_NONE:
  2685  		i = 4 << 3
  2686  		goto bas
  2687  
  2688  	case REG_R8,
  2689  		REG_R9,
  2690  		REG_R10,
  2691  		REG_R11,
  2692  		REG_R12,
  2693  		REG_R13,
  2694  		REG_R14,
  2695  		REG_R15:
  2696  		if ctxt.Arch.Family == sys.I386 {
  2697  			goto bad
  2698  		}
  2699  		fallthrough
  2700  
  2701  	case REG_AX,
  2702  		REG_CX,
  2703  		REG_DX,
  2704  		REG_BX,
  2705  		REG_BP,
  2706  		REG_SI,
  2707  		REG_DI:
  2708  		i = reg[index] << 3
  2709  	}
  2710  
  2711  	switch scale {
  2712  	default:
  2713  		goto bad
  2714  
  2715  	case 1:
  2716  		break
  2717  
  2718  	case 2:
  2719  		i |= 1 << 6
  2720  
  2721  	case 4:
  2722  		i |= 2 << 6
  2723  
  2724  	case 8:
  2725  		i |= 3 << 6
  2726  	}
  2727  
  2728  bas:
  2729  	switch base {
  2730  	default:
  2731  		goto bad
  2732  
  2733  	case REG_NONE: /* must be mod=00 */
  2734  		i |= 5
  2735  
  2736  	case REG_R8,
  2737  		REG_R9,
  2738  		REG_R10,
  2739  		REG_R11,
  2740  		REG_R12,
  2741  		REG_R13,
  2742  		REG_R14,
  2743  		REG_R15:
  2744  		if ctxt.Arch.Family == sys.I386 {
  2745  			goto bad
  2746  		}
  2747  		fallthrough
  2748  
  2749  	case REG_AX,
  2750  		REG_CX,
  2751  		REG_DX,
  2752  		REG_BX,
  2753  		REG_SP,
  2754  		REG_BP,
  2755  		REG_SI,
  2756  		REG_DI:
  2757  		i |= reg[base]
  2758  	}
  2759  
  2760  	asmbuf.Put1(byte(i))
  2761  	return
  2762  
  2763  bad:
  2764  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2765  	asmbuf.Put1(0)
  2766  	return
  2767  }
  2768  
  2769  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2770  	var rel obj.Reloc
  2771  
  2772  	v := vaddr(ctxt, p, a, &rel)
  2773  	if rel.Siz != 0 {
  2774  		if rel.Siz != 4 {
  2775  			ctxt.Diag("bad reloc")
  2776  		}
  2777  		r := obj.Addrel(cursym)
  2778  		*r = rel
  2779  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2780  	}
  2781  
  2782  	asmbuf.PutInt32(int32(v))
  2783  }
  2784  
  2785  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2786  	if r != nil {
  2787  		*r = obj.Reloc{}
  2788  	}
  2789  
  2790  	switch a.Name {
  2791  	case obj.NAME_STATIC,
  2792  		obj.NAME_GOTREF,
  2793  		obj.NAME_EXTERN:
  2794  		s := a.Sym
  2795  		if r == nil {
  2796  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2797  			log.Fatalf("reloc")
  2798  		}
  2799  
  2800  		if a.Name == obj.NAME_GOTREF {
  2801  			r.Siz = 4
  2802  			r.Type = objabi.R_GOTPCREL
  2803  		} else if isextern(s) || (ctxt.Arch.Family != sys.AMD64 && !ctxt.Flag_shared) {
  2804  			r.Siz = 4
  2805  			r.Type = objabi.R_ADDR
  2806  		} else {
  2807  			r.Siz = 4
  2808  			r.Type = objabi.R_PCREL
  2809  		}
  2810  
  2811  		r.Off = -1 // caller must fill in
  2812  		r.Sym = s
  2813  		r.Add = a.Offset
  2814  
  2815  		return 0
  2816  	}
  2817  
  2818  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2819  		if r == nil {
  2820  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2821  			log.Fatalf("reloc")
  2822  		}
  2823  
  2824  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2825  			r.Type = objabi.R_TLS_LE
  2826  			r.Siz = 4
  2827  			r.Off = -1 // caller must fill in
  2828  			r.Add = a.Offset
  2829  		}
  2830  		return 0
  2831  	}
  2832  
  2833  	return a.Offset
  2834  }
  2835  
  2836  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2837  	var base int
  2838  	var rel obj.Reloc
  2839  
  2840  	rex &= 0x40 | Rxr
  2841  	switch {
  2842  	case int64(int32(a.Offset)) == a.Offset:
  2843  		// Offset fits in sign-extended 32 bits.
  2844  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2845  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2846  		// This is allowed for assembly that wants to use 32-bit hex
  2847  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2848  	default:
  2849  		ctxt.Diag("offset too large in %s", p)
  2850  	}
  2851  	v := int32(a.Offset)
  2852  	rel.Siz = 0
  2853  
  2854  	switch a.Type {
  2855  	case obj.TYPE_ADDR:
  2856  		if a.Name == obj.NAME_NONE {
  2857  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2858  		}
  2859  		if a.Index == REG_TLS {
  2860  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2861  		}
  2862  		goto bad
  2863  
  2864  	case obj.TYPE_REG:
  2865  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2866  			goto bad
  2867  		}
  2868  		if v != 0 {
  2869  			goto bad
  2870  		}
  2871  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2872  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2873  		return
  2874  	}
  2875  
  2876  	if a.Type != obj.TYPE_MEM {
  2877  		goto bad
  2878  	}
  2879  
  2880  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2881  		base := int(a.Reg)
  2882  		switch a.Name {
  2883  		case obj.NAME_EXTERN,
  2884  			obj.NAME_GOTREF,
  2885  			obj.NAME_STATIC:
  2886  			if !isextern(a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2887  				goto bad
  2888  			}
  2889  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2890  				// The base register has already been set. It holds the PC
  2891  				// of this instruction returned by a PC-reading thunk.
  2892  				// See obj6.go:rewriteToPcrel.
  2893  			} else {
  2894  				base = REG_NONE
  2895  			}
  2896  			v = int32(vaddr(ctxt, p, a, &rel))
  2897  
  2898  		case obj.NAME_AUTO,
  2899  			obj.NAME_PARAM:
  2900  			base = REG_SP
  2901  		}
  2902  
  2903  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2904  		if base == REG_NONE {
  2905  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2906  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2907  			goto putrelv
  2908  		}
  2909  
  2910  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2911  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2912  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2913  			return
  2914  		}
  2915  
  2916  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2917  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2918  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2919  			asmbuf.Put1(byte(v))
  2920  			return
  2921  		}
  2922  
  2923  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2924  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2925  		goto putrelv
  2926  	}
  2927  
  2928  	base = int(a.Reg)
  2929  	switch a.Name {
  2930  	case obj.NAME_STATIC,
  2931  		obj.NAME_GOTREF,
  2932  		obj.NAME_EXTERN:
  2933  		if a.Sym == nil {
  2934  			ctxt.Diag("bad addr: %v", p)
  2935  		}
  2936  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2937  			// The base register has already been set. It holds the PC
  2938  			// of this instruction returned by a PC-reading thunk.
  2939  			// See obj6.go:rewriteToPcrel.
  2940  		} else {
  2941  			base = REG_NONE
  2942  		}
  2943  		v = int32(vaddr(ctxt, p, a, &rel))
  2944  
  2945  	case obj.NAME_AUTO,
  2946  		obj.NAME_PARAM:
  2947  		base = REG_SP
  2948  	}
  2949  
  2950  	if base == REG_TLS {
  2951  		v = int32(vaddr(ctxt, p, a, &rel))
  2952  	}
  2953  
  2954  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  2955  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2956  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  2957  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2958  				ctxt.Diag("%v has offset against gotref", p)
  2959  			}
  2960  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2961  			goto putrelv
  2962  		}
  2963  
  2964  		// temporary
  2965  		asmbuf.Put2(
  2966  			byte(0<<6|4<<0|r<<3), // sib present
  2967  			0<<6|4<<3|5<<0,       // DS:d32
  2968  		)
  2969  		goto putrelv
  2970  	}
  2971  
  2972  	if base == REG_SP || base == REG_R12 {
  2973  		if v == 0 {
  2974  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2975  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2976  			return
  2977  		}
  2978  
  2979  		if v >= -128 && v < 128 {
  2980  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2981  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2982  			asmbuf.Put1(byte(v))
  2983  			return
  2984  		}
  2985  
  2986  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2987  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2988  		goto putrelv
  2989  	}
  2990  
  2991  	if REG_AX <= base && base <= REG_R15 {
  2992  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2993  			rel = obj.Reloc{}
  2994  			rel.Type = objabi.R_TLS_LE
  2995  			rel.Siz = 4
  2996  			rel.Sym = nil
  2997  			rel.Add = int64(v)
  2998  			v = 0
  2999  		}
  3000  
  3001  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3002  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3003  			return
  3004  		}
  3005  
  3006  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3007  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3008  			return
  3009  		}
  3010  
  3011  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3012  		goto putrelv
  3013  	}
  3014  
  3015  	goto bad
  3016  
  3017  putrelv:
  3018  	if rel.Siz != 0 {
  3019  		if rel.Siz != 4 {
  3020  			ctxt.Diag("bad rel")
  3021  			goto bad
  3022  		}
  3023  
  3024  		r := obj.Addrel(cursym)
  3025  		*r = rel
  3026  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3027  	}
  3028  
  3029  	asmbuf.PutInt32(v)
  3030  	return
  3031  
  3032  bad:
  3033  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3034  	return
  3035  }
  3036  
  3037  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3038  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3039  }
  3040  
  3041  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3042  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3043  }
  3044  
  3045  func bytereg(a *obj.Addr, t *uint8) {
  3046  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3047  		a.Reg += REG_AL - REG_AX
  3048  		*t = 0
  3049  	}
  3050  }
  3051  
  3052  func unbytereg(a *obj.Addr, t *uint8) {
  3053  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3054  		a.Reg += REG_AX - REG_AL
  3055  		*t = 0
  3056  	}
  3057  }
  3058  
  3059  const (
  3060  	E = 0xff
  3061  )
  3062  
  3063  var ymovtab = []Movtab{
  3064  	/* push */
  3065  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3066  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3067  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3068  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3069  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3070  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3071  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3072  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3073  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3074  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3075  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3076  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3077  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3078  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3079  
  3080  	/* pop */
  3081  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3082  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3083  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3084  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3085  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3086  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3087  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3088  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3089  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3090  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3091  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3092  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3093  
  3094  	/* mov seg */
  3095  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3096  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3097  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3098  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3099  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3100  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3101  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3102  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3103  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3104  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3105  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3106  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3107  
  3108  	/* mov cr */
  3109  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3110  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3111  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3112  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3113  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3114  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3115  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3116  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3117  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3118  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3119  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3120  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3121  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3122  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3123  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3124  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3125  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3126  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3127  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3128  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3129  
  3130  	/* mov dr */
  3131  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3132  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3133  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3134  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3135  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3136  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3137  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3138  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3139  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3140  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3141  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3142  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3143  
  3144  	/* mov tr */
  3145  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3146  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3147  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3148  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3149  
  3150  	/* lgdt, sgdt, lidt, sidt */
  3151  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3152  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3153  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3154  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3155  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3156  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3157  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3158  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3159  
  3160  	/* lldt, sldt */
  3161  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3162  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3163  
  3164  	/* lmsw, smsw */
  3165  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3166  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3167  
  3168  	/* ltr, str */
  3169  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3170  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3171  
  3172  	/* load full pointer - unsupported
  3173  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3174  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3175  	*/
  3176  
  3177  	/* double shift */
  3178  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3179  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3180  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3181  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3182  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3183  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3184  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3185  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3186  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3187  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3188  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3189  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3190  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3191  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3192  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3193  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3194  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3195  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3196  
  3197  	/* load TLS base */
  3198  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3199  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3200  	{0, 0, 0, 0, 0, [4]uint8{}},
  3201  }
  3202  
  3203  func isax(a *obj.Addr) bool {
  3204  	switch a.Reg {
  3205  	case REG_AX, REG_AL, REG_AH:
  3206  		return true
  3207  	}
  3208  
  3209  	if a.Index == REG_AX {
  3210  		return true
  3211  	}
  3212  	return false
  3213  }
  3214  
  3215  func subreg(p *obj.Prog, from int, to int) {
  3216  	if false { /* debug['Q'] */
  3217  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3218  	}
  3219  
  3220  	if int(p.From.Reg) == from {
  3221  		p.From.Reg = int16(to)
  3222  		p.Ft = 0
  3223  	}
  3224  
  3225  	if int(p.To.Reg) == from {
  3226  		p.To.Reg = int16(to)
  3227  		p.Tt = 0
  3228  	}
  3229  
  3230  	if int(p.From.Index) == from {
  3231  		p.From.Index = int16(to)
  3232  		p.Ft = 0
  3233  	}
  3234  
  3235  	if int(p.To.Index) == from {
  3236  		p.To.Index = int16(to)
  3237  		p.Tt = 0
  3238  	}
  3239  
  3240  	if false { /* debug['Q'] */
  3241  		fmt.Printf("%v\n", p)
  3242  	}
  3243  }
  3244  
  3245  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3246  	switch op {
  3247  	case Pm, Pe, Pf2, Pf3:
  3248  		if osize != 1 {
  3249  			if op != Pm {
  3250  				asmbuf.Put1(byte(op))
  3251  			}
  3252  			asmbuf.Put1(Pm)
  3253  			z++
  3254  			op = int(o.op[z])
  3255  			break
  3256  		}
  3257  		fallthrough
  3258  
  3259  	default:
  3260  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3261  			asmbuf.Put1(Pm)
  3262  		}
  3263  	}
  3264  
  3265  	asmbuf.Put1(byte(op))
  3266  	return z
  3267  }
  3268  
  3269  var bpduff1 = []byte{
  3270  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3271  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3272  }
  3273  
  3274  var bpduff2 = []byte{
  3275  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3276  }
  3277  
  3278  // Emit VEX prefix and opcode byte.
  3279  // The three addresses are the r/m, vvvv, and reg fields.
  3280  // The reg and rm arguments appear in the same order as the
  3281  // arguments to asmand, which typically follows the call to asmvex.
  3282  // The final two arguments are the VEX prefix (see encoding above)
  3283  // and the opcode byte.
  3284  // For details about vex prefix see:
  3285  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3286  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3287  	asmbuf.vexflag = 1
  3288  	rexR := 0
  3289  	if r != nil {
  3290  		rexR = regrex[r.Reg] & Rxr
  3291  	}
  3292  	rexB := 0
  3293  	rexX := 0
  3294  	if rm != nil {
  3295  		rexB = regrex[rm.Reg] & Rxb
  3296  		rexX = regrex[rm.Index] & Rxx
  3297  	}
  3298  	vexM := (vex >> 3) & 0xF
  3299  	vexWLP := vex & 0x87
  3300  	vexV := byte(0)
  3301  	if v != nil {
  3302  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3303  	}
  3304  	vexV ^= 0xF
  3305  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3306  		// Can use 2-byte encoding.
  3307  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3308  	} else {
  3309  		// Must use 3-byte encoding.
  3310  		asmbuf.Put3(0xc4,
  3311  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3312  			vexV<<3|vexWLP,
  3313  		)
  3314  	}
  3315  	asmbuf.Put1(opcode)
  3316  }
  3317  
  3318  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3319  	o := opindex[p.As&obj.AMask]
  3320  
  3321  	if o == nil {
  3322  		ctxt.Diag("asmins: missing op %v", p)
  3323  		return
  3324  	}
  3325  
  3326  	pre := prefixof(ctxt, p, &p.From)
  3327  	if pre != 0 {
  3328  		asmbuf.Put1(byte(pre))
  3329  	}
  3330  	pre = prefixof(ctxt, p, &p.To)
  3331  	if pre != 0 {
  3332  		asmbuf.Put1(byte(pre))
  3333  	}
  3334  
  3335  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3336  	// which encodes as SHRQ $32(DX*0), AX.
  3337  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3338  	// Change encoding generated by assemblers and compilers and remove.
  3339  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3340  		p.From3 = new(obj.Addr)
  3341  		p.From3.Type = obj.TYPE_REG
  3342  		p.From3.Reg = p.From.Index
  3343  		p.From.Index = 0
  3344  	}
  3345  
  3346  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3347  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3348  	switch p.As {
  3349  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3350  		if p.From3Type() == obj.TYPE_NONE {
  3351  			p.From3 = new(obj.Addr)
  3352  			*p.From3 = p.From
  3353  			p.From = obj.Addr{}
  3354  			p.From.Type = obj.TYPE_CONST
  3355  			p.From.Offset = p.To.Offset
  3356  			p.To.Offset = 0
  3357  		}
  3358  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3359  		if p.From3Type() == obj.TYPE_NONE {
  3360  			p.From3 = new(obj.Addr)
  3361  			*p.From3 = p.To
  3362  			p.To = obj.Addr{}
  3363  			p.To.Type = obj.TYPE_CONST
  3364  			p.To.Offset = p.From3.Offset
  3365  			p.From3.Offset = 0
  3366  		}
  3367  	}
  3368  
  3369  	if p.Ft == 0 {
  3370  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3371  	}
  3372  	if p.Tt == 0 {
  3373  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3374  	}
  3375  
  3376  	ft := int(p.Ft) * Ymax
  3377  	f3t := Ynone * Ymax
  3378  	if p.From3 != nil {
  3379  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3380  	}
  3381  	tt := int(p.Tt) * Ymax
  3382  
  3383  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3384  	z := 0
  3385  	var a *obj.Addr
  3386  	var l int
  3387  	var op int
  3388  	var q *obj.Prog
  3389  	var r *obj.Reloc
  3390  	var rel obj.Reloc
  3391  	var v int64
  3392  	for i := range o.ytab {
  3393  		yt := &o.ytab[i]
  3394  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3395  			switch o.prefix {
  3396  			case Px1: /* first option valid only in 32-bit mode */
  3397  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3398  					z += int(yt.zoffset) + xo
  3399  					continue
  3400  				}
  3401  			case Pq: /* 16 bit escape and opcode escape */
  3402  				asmbuf.Put2(Pe, Pm)
  3403  
  3404  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3405  				asmbuf.rexflag |= Pw
  3406  				asmbuf.Put2(Pe, Pm)
  3407  
  3408  			case Pq4: /*  66 0F 38 */
  3409  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3410  
  3411  			case Pf2, /* xmm opcode escape */
  3412  				Pf3:
  3413  				asmbuf.Put2(o.prefix, Pm)
  3414  
  3415  			case Pef3:
  3416  				asmbuf.Put3(Pe, Pf3, Pm)
  3417  
  3418  			case Pfw: /* xmm opcode escape + REX.W */
  3419  				asmbuf.rexflag |= Pw
  3420  				asmbuf.Put2(Pf3, Pm)
  3421  
  3422  			case Pm: /* opcode escape */
  3423  				asmbuf.Put1(Pm)
  3424  
  3425  			case Pe: /* 16 bit escape */
  3426  				asmbuf.Put1(Pe)
  3427  
  3428  			case Pw: /* 64-bit escape */
  3429  				if ctxt.Arch.Family != sys.AMD64 {
  3430  					ctxt.Diag("asmins: illegal 64: %v", p)
  3431  				}
  3432  				asmbuf.rexflag |= Pw
  3433  
  3434  			case Pw8: /* 64-bit escape if z >= 8 */
  3435  				if z >= 8 {
  3436  					if ctxt.Arch.Family != sys.AMD64 {
  3437  						ctxt.Diag("asmins: illegal 64: %v", p)
  3438  					}
  3439  					asmbuf.rexflag |= Pw
  3440  				}
  3441  
  3442  			case Pb: /* botch */
  3443  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3444  					goto bad
  3445  				}
  3446  				// NOTE(rsc): This is probably safe to do always,
  3447  				// but when enabled it chooses different encodings
  3448  				// than the old cmd/internal/obj/i386 code did,
  3449  				// which breaks our "same bits out" checks.
  3450  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3451  				// in the original obj/i386, and it would encode
  3452  				// (using a valid, shorter form) as 3c 00 if we enabled
  3453  				// the call to bytereg here.
  3454  				if ctxt.Arch.Family == sys.AMD64 {
  3455  					bytereg(&p.From, &p.Ft)
  3456  					bytereg(&p.To, &p.Tt)
  3457  				}
  3458  
  3459  			case P32: /* 32 bit but illegal if 64-bit mode */
  3460  				if ctxt.Arch.Family == sys.AMD64 {
  3461  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3462  				}
  3463  
  3464  			case Py: /* 64-bit only, no prefix */
  3465  				if ctxt.Arch.Family != sys.AMD64 {
  3466  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3467  				}
  3468  
  3469  			case Py1: /* 64-bit only if z < 1, no prefix */
  3470  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3471  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3472  				}
  3473  
  3474  			case Py3: /* 64-bit only if z < 3, no prefix */
  3475  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3476  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3477  				}
  3478  			}
  3479  
  3480  			if z >= len(o.op) {
  3481  				log.Fatalf("asmins bad table %v", p)
  3482  			}
  3483  			op = int(o.op[z])
  3484  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3485  			if op == 0x0f && o.prefix != Pvex {
  3486  				asmbuf.Put1(byte(op))
  3487  				z++
  3488  				op = int(o.op[z])
  3489  			}
  3490  
  3491  			switch yt.zcase {
  3492  			default:
  3493  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3494  				return
  3495  
  3496  			case Zpseudo:
  3497  				break
  3498  
  3499  			case Zlit:
  3500  				for ; ; z++ {
  3501  					op = int(o.op[z])
  3502  					if op == 0 {
  3503  						break
  3504  					}
  3505  					asmbuf.Put1(byte(op))
  3506  				}
  3507  
  3508  			case Zlitm_r:
  3509  				for ; ; z++ {
  3510  					op = int(o.op[z])
  3511  					if op == 0 {
  3512  						break
  3513  					}
  3514  					asmbuf.Put1(byte(op))
  3515  				}
  3516  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3517  
  3518  			case Zmb_r:
  3519  				bytereg(&p.From, &p.Ft)
  3520  				fallthrough
  3521  
  3522  			case Zm_r:
  3523  				asmbuf.Put1(byte(op))
  3524  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3525  
  3526  			case Zm2_r:
  3527  				asmbuf.Put2(byte(op), o.op[z+1])
  3528  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3529  
  3530  			case Zm_r_xm:
  3531  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3532  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3533  
  3534  			case Zm_r_xm_nr:
  3535  				asmbuf.rexflag = 0
  3536  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3537  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3538  
  3539  			case Zm_r_i_xm:
  3540  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3541  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.From3)
  3542  				asmbuf.Put1(byte(p.To.Offset))
  3543  
  3544  			case Zibm_r, Zibr_m:
  3545  				for {
  3546  					tmp1 := z
  3547  					z++
  3548  					op = int(o.op[tmp1])
  3549  					if op == 0 {
  3550  						break
  3551  					}
  3552  					asmbuf.Put1(byte(op))
  3553  				}
  3554  				if yt.zcase == Zibr_m {
  3555  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.From3)
  3556  				} else {
  3557  					asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3558  				}
  3559  				asmbuf.Put1(byte(p.From.Offset))
  3560  
  3561  			case Zaut_r:
  3562  				asmbuf.Put1(0x8d) // leal
  3563  				if p.From.Type != obj.TYPE_ADDR {
  3564  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3565  				}
  3566  				p.From.Type = obj.TYPE_MEM
  3567  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3568  				p.From.Type = obj.TYPE_ADDR
  3569  
  3570  			case Zm_o:
  3571  				asmbuf.Put1(byte(op))
  3572  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3573  
  3574  			case Zr_m:
  3575  				asmbuf.Put1(byte(op))
  3576  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3577  
  3578  			case Zvex_rm_v_r:
  3579  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3580  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3581  
  3582  			case Zvex_i_r_v:
  3583  				asmbuf.asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3584  				regnum := byte(0x7)
  3585  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3586  					regnum &= byte(p.From3.Reg - REG_X0)
  3587  				} else {
  3588  					regnum &= byte(p.From3.Reg - REG_Y0)
  3589  				}
  3590  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3591  				asmbuf.Put1(byte(p.From.Offset))
  3592  
  3593  			case Zvex_i_rm_v_r:
  3594  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3595  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3596  				asmbuf.Put1(byte(p.From3.Offset))
  3597  
  3598  			case Zvex_i_rm_r:
  3599  				asmbuf.asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3600  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3601  				asmbuf.Put1(byte(p.From.Offset))
  3602  
  3603  			case Zvex_v_rm_r:
  3604  				asmbuf.asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3605  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3606  
  3607  			case Zvex_r_v_rm:
  3608  				asmbuf.asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3609  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3610  
  3611  			case Zr_m_xm:
  3612  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3613  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3614  
  3615  			case Zr_m_xm_nr:
  3616  				asmbuf.rexflag = 0
  3617  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3618  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3619  
  3620  			case Zo_m:
  3621  				asmbuf.Put1(byte(op))
  3622  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3623  
  3624  			case Zcallindreg:
  3625  				r = obj.Addrel(cursym)
  3626  				r.Off = int32(p.Pc)
  3627  				r.Type = objabi.R_CALLIND
  3628  				r.Siz = 0
  3629  				fallthrough
  3630  
  3631  			case Zo_m64:
  3632  				asmbuf.Put1(byte(op))
  3633  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3634  
  3635  			case Zm_ibo:
  3636  				asmbuf.Put1(byte(op))
  3637  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3638  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3639  
  3640  			case Zibo_m:
  3641  				asmbuf.Put1(byte(op))
  3642  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3643  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3644  
  3645  			case Zibo_m_xm:
  3646  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3647  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3648  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3649  
  3650  			case Z_ib, Zib_:
  3651  				if yt.zcase == Zib_ {
  3652  					a = &p.From
  3653  				} else {
  3654  					a = &p.To
  3655  				}
  3656  				asmbuf.Put1(byte(op))
  3657  				if p.As == AXABORT {
  3658  					asmbuf.Put1(o.op[z+1])
  3659  				}
  3660  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3661  
  3662  			case Zib_rp:
  3663  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3664  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3665  
  3666  			case Zil_rp:
  3667  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3668  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3669  				if o.prefix == Pe {
  3670  					v = vaddr(ctxt, p, &p.From, nil)
  3671  					asmbuf.PutInt16(int16(v))
  3672  				} else {
  3673  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3674  				}
  3675  
  3676  			case Zo_iw:
  3677  				asmbuf.Put1(byte(op))
  3678  				if p.From.Type != obj.TYPE_NONE {
  3679  					v = vaddr(ctxt, p, &p.From, nil)
  3680  					asmbuf.PutInt16(int16(v))
  3681  				}
  3682  
  3683  			case Ziq_rp:
  3684  				v = vaddr(ctxt, p, &p.From, &rel)
  3685  				l = int(v >> 32)
  3686  				if l == 0 && rel.Siz != 8 {
  3687  					//p->mark |= 0100;
  3688  					//print("zero: %llux %v\n", v, p);
  3689  					asmbuf.rexflag &^= (0x40 | Rxw)
  3690  
  3691  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3692  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3693  					if rel.Type != 0 {
  3694  						r = obj.Addrel(cursym)
  3695  						*r = rel
  3696  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3697  					}
  3698  
  3699  					asmbuf.PutInt32(int32(v))
  3700  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3701  
  3702  					//p->mark |= 0100;
  3703  					//print("sign: %llux %v\n", v, p);
  3704  					asmbuf.Put1(0xc7)
  3705  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3706  
  3707  					asmbuf.PutInt32(int32(v)) // need all 8
  3708  				} else {
  3709  					//print("all: %llux %v\n", v, p);
  3710  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3711  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3712  					if rel.Type != 0 {
  3713  						r = obj.Addrel(cursym)
  3714  						*r = rel
  3715  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3716  					}
  3717  
  3718  					asmbuf.PutInt64(v)
  3719  				}
  3720  
  3721  			case Zib_rr:
  3722  				asmbuf.Put1(byte(op))
  3723  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3724  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3725  
  3726  			case Z_il, Zil_:
  3727  				if yt.zcase == Zil_ {
  3728  					a = &p.From
  3729  				} else {
  3730  					a = &p.To
  3731  				}
  3732  				asmbuf.Put1(byte(op))
  3733  				if o.prefix == Pe {
  3734  					v = vaddr(ctxt, p, a, nil)
  3735  					asmbuf.PutInt16(int16(v))
  3736  				} else {
  3737  					asmbuf.relput4(ctxt, cursym, p, a)
  3738  				}
  3739  
  3740  			case Zm_ilo, Zilo_m:
  3741  				asmbuf.Put1(byte(op))
  3742  				if yt.zcase == Zilo_m {
  3743  					a = &p.From
  3744  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3745  				} else {
  3746  					a = &p.To
  3747  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3748  				}
  3749  
  3750  				if o.prefix == Pe {
  3751  					v = vaddr(ctxt, p, a, nil)
  3752  					asmbuf.PutInt16(int16(v))
  3753  				} else {
  3754  					asmbuf.relput4(ctxt, cursym, p, a)
  3755  				}
  3756  
  3757  			case Zil_rr:
  3758  				asmbuf.Put1(byte(op))
  3759  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3760  				if o.prefix == Pe {
  3761  					v = vaddr(ctxt, p, &p.From, nil)
  3762  					asmbuf.PutInt16(int16(v))
  3763  				} else {
  3764  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3765  				}
  3766  
  3767  			case Z_rp:
  3768  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3769  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3770  
  3771  			case Zrp_:
  3772  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3773  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3774  
  3775  			case Zclr:
  3776  				asmbuf.rexflag &^= Pw
  3777  				asmbuf.Put1(byte(op))
  3778  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3779  
  3780  			case Zcallcon, Zjmpcon:
  3781  				if yt.zcase == Zcallcon {
  3782  					asmbuf.Put1(byte(op))
  3783  				} else {
  3784  					asmbuf.Put1(o.op[z+1])
  3785  				}
  3786  				r = obj.Addrel(cursym)
  3787  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3788  				r.Type = objabi.R_PCREL
  3789  				r.Siz = 4
  3790  				r.Add = p.To.Offset
  3791  				asmbuf.PutInt32(0)
  3792  
  3793  			case Zcallind:
  3794  				asmbuf.Put2(byte(op), o.op[z+1])
  3795  				r = obj.Addrel(cursym)
  3796  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3797  				if ctxt.Arch.Family == sys.AMD64 {
  3798  					r.Type = objabi.R_PCREL
  3799  				} else {
  3800  					r.Type = objabi.R_ADDR
  3801  				}
  3802  				r.Siz = 4
  3803  				r.Add = p.To.Offset
  3804  				r.Sym = p.To.Sym
  3805  				asmbuf.PutInt32(0)
  3806  
  3807  			case Zcall, Zcallduff:
  3808  				if p.To.Sym == nil {
  3809  					ctxt.Diag("call without target")
  3810  					log.Fatalf("bad code")
  3811  				}
  3812  
  3813  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3814  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3815  				}
  3816  
  3817  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3818  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3819  					// (the call jumps into the middle of the function).
  3820  					// This makes it possible to see call sites for duffcopy/duffzero in
  3821  					// BP-based profiling tools like Linux perf (which is the
  3822  					// whole point of obj.Framepointer_enabled).
  3823  					// MOVQ BP, -16(SP)
  3824  					// LEAQ -16(SP), BP
  3825  					asmbuf.Put(bpduff1)
  3826  				}
  3827  				asmbuf.Put1(byte(op))
  3828  				r = obj.Addrel(cursym)
  3829  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3830  				r.Sym = p.To.Sym
  3831  				r.Add = p.To.Offset
  3832  				r.Type = objabi.R_CALL
  3833  				r.Siz = 4
  3834  				asmbuf.PutInt32(0)
  3835  
  3836  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3837  					// Pop BP pushed above.
  3838  					// MOVQ 0(BP), BP
  3839  					asmbuf.Put(bpduff2)
  3840  				}
  3841  
  3842  			// TODO: jump across functions needs reloc
  3843  			case Zbr, Zjmp, Zloop:
  3844  				if p.As == AXBEGIN {
  3845  					asmbuf.Put1(byte(op))
  3846  				}
  3847  				if p.To.Sym != nil {
  3848  					if yt.zcase != Zjmp {
  3849  						ctxt.Diag("branch to ATEXT")
  3850  						log.Fatalf("bad code")
  3851  					}
  3852  
  3853  					asmbuf.Put1(o.op[z+1])
  3854  					r = obj.Addrel(cursym)
  3855  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3856  					r.Sym = p.To.Sym
  3857  					r.Type = objabi.R_PCREL
  3858  					r.Siz = 4
  3859  					asmbuf.PutInt32(0)
  3860  					break
  3861  				}
  3862  
  3863  				// Assumes q is in this function.
  3864  				// TODO: Check in input, preserve in brchain.
  3865  
  3866  				// Fill in backward jump now.
  3867  				q = p.Pcond
  3868  
  3869  				if q == nil {
  3870  					ctxt.Diag("jmp/branch/loop without target")
  3871  					log.Fatalf("bad code")
  3872  				}
  3873  
  3874  				if p.Back&1 != 0 {
  3875  					v = q.Pc - (p.Pc + 2)
  3876  					if v >= -128 && p.As != AXBEGIN {
  3877  						if p.As == AJCXZL {
  3878  							asmbuf.Put1(0x67)
  3879  						}
  3880  						asmbuf.Put2(byte(op), byte(v))
  3881  					} else if yt.zcase == Zloop {
  3882  						ctxt.Diag("loop too far: %v", p)
  3883  					} else {
  3884  						v -= 5 - 2
  3885  						if p.As == AXBEGIN {
  3886  							v--
  3887  						}
  3888  						if yt.zcase == Zbr {
  3889  							asmbuf.Put1(0x0f)
  3890  							v--
  3891  						}
  3892  
  3893  						asmbuf.Put1(o.op[z+1])
  3894  						asmbuf.PutInt32(int32(v))
  3895  					}
  3896  
  3897  					break
  3898  				}
  3899  
  3900  				// Annotate target; will fill in later.
  3901  				p.Forwd = q.Rel
  3902  
  3903  				q.Rel = p
  3904  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3905  					if p.As == AJCXZL {
  3906  						asmbuf.Put1(0x67)
  3907  					}
  3908  					asmbuf.Put2(byte(op), 0)
  3909  				} else if yt.zcase == Zloop {
  3910  					ctxt.Diag("loop too far: %v", p)
  3911  				} else {
  3912  					if yt.zcase == Zbr {
  3913  						asmbuf.Put1(0x0f)
  3914  					}
  3915  					asmbuf.Put1(o.op[z+1])
  3916  					asmbuf.PutInt32(0)
  3917  				}
  3918  
  3919  				break
  3920  
  3921  			/*
  3922  				v = q->pc - p->pc - 2;
  3923  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3924  					*ctxt->andptr++ = op;
  3925  					*ctxt->andptr++ = v;
  3926  				} else {
  3927  					v -= 5-2;
  3928  					if(yt.zcase == Zbr) {
  3929  						*ctxt->andptr++ = 0x0f;
  3930  						v--;
  3931  					}
  3932  					*ctxt->andptr++ = o->op[z+1];
  3933  					*ctxt->andptr++ = v;
  3934  					*ctxt->andptr++ = v>>8;
  3935  					*ctxt->andptr++ = v>>16;
  3936  					*ctxt->andptr++ = v>>24;
  3937  				}
  3938  			*/
  3939  
  3940  			case Zbyte:
  3941  				v = vaddr(ctxt, p, &p.From, &rel)
  3942  				if rel.Siz != 0 {
  3943  					rel.Siz = uint8(op)
  3944  					r = obj.Addrel(cursym)
  3945  					*r = rel
  3946  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3947  				}
  3948  
  3949  				asmbuf.Put1(byte(v))
  3950  				if op > 1 {
  3951  					asmbuf.Put1(byte(v >> 8))
  3952  					if op > 2 {
  3953  						asmbuf.PutInt16(int16(v >> 16))
  3954  						if op > 4 {
  3955  							asmbuf.PutInt32(int32(v >> 32))
  3956  						}
  3957  					}
  3958  				}
  3959  			}
  3960  
  3961  			return
  3962  		}
  3963  		z += int(yt.zoffset) + xo
  3964  	}
  3965  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3966  		var pp obj.Prog
  3967  		var t []byte
  3968  		if p.As == mo[0].as {
  3969  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3970  				t = mo[0].op[:]
  3971  				switch mo[0].code {
  3972  				default:
  3973  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3974  
  3975  				case 0: /* lit */
  3976  					for z = 0; t[z] != E; z++ {
  3977  						asmbuf.Put1(t[z])
  3978  					}
  3979  
  3980  				case 1: /* r,m */
  3981  					asmbuf.Put1(t[0])
  3982  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  3983  
  3984  				case 2: /* m,r */
  3985  					asmbuf.Put1(t[0])
  3986  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  3987  
  3988  				case 3: /* r,m - 2op */
  3989  					asmbuf.Put2(t[0], t[1])
  3990  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  3991  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3992  
  3993  				case 4: /* m,r - 2op */
  3994  					asmbuf.Put2(t[0], t[1])
  3995  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  3996  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3997  
  3998  				case 5: /* load full pointer, trash heap */
  3999  					if t[0] != 0 {
  4000  						asmbuf.Put1(t[0])
  4001  					}
  4002  					switch p.To.Index {
  4003  					default:
  4004  						goto bad
  4005  
  4006  					case REG_DS:
  4007  						asmbuf.Put1(0xc5)
  4008  
  4009  					case REG_SS:
  4010  						asmbuf.Put2(0x0f, 0xb2)
  4011  
  4012  					case REG_ES:
  4013  						asmbuf.Put1(0xc4)
  4014  
  4015  					case REG_FS:
  4016  						asmbuf.Put2(0x0f, 0xb4)
  4017  
  4018  					case REG_GS:
  4019  						asmbuf.Put2(0x0f, 0xb5)
  4020  					}
  4021  
  4022  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4023  
  4024  				case 6: /* double shift */
  4025  					if t[0] == Pw {
  4026  						if ctxt.Arch.Family != sys.AMD64 {
  4027  							ctxt.Diag("asmins: illegal 64: %v", p)
  4028  						}
  4029  						asmbuf.rexflag |= Pw
  4030  						t = t[1:]
  4031  					} else if t[0] == Pe {
  4032  						asmbuf.Put1(Pe)
  4033  						t = t[1:]
  4034  					}
  4035  
  4036  					switch p.From.Type {
  4037  					default:
  4038  						goto bad
  4039  
  4040  					case obj.TYPE_CONST:
  4041  						asmbuf.Put2(0x0f, t[0])
  4042  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4043  						asmbuf.Put1(byte(p.From.Offset))
  4044  
  4045  					case obj.TYPE_REG:
  4046  						switch p.From.Reg {
  4047  						default:
  4048  							goto bad
  4049  
  4050  						case REG_CL, REG_CX:
  4051  							asmbuf.Put2(0x0f, t[1])
  4052  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4053  						}
  4054  					}
  4055  
  4056  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4057  				// where you load the TLS base register into a register and then index off that
  4058  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4059  				// are handled in prefixof above and should not be listed here.
  4060  				case 7: /* mov tls, r */
  4061  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4062  						ctxt.Diag("invalid load of TLS: %v", p)
  4063  					}
  4064  
  4065  					if ctxt.Arch.Family == sys.I386 {
  4066  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4067  						// where you load the TLS base register into a register and then index off that
  4068  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4069  						// are handled in prefixof above and should not be listed here.
  4070  						switch ctxt.Headtype {
  4071  						default:
  4072  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4073  
  4074  						case objabi.Hlinux,
  4075  							objabi.Hnacl:
  4076  							if ctxt.Flag_shared {
  4077  								// Note that this is not generating the same insns as the other cases.
  4078  								//     MOV TLS, dst
  4079  								// becomes
  4080  								//     call __x86.get_pc_thunk.dst
  4081  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4082  								// which is encoded as
  4083  								//     call __x86.get_pc_thunk.dst
  4084  								//     movq 0(dst), dst
  4085  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4086  								// is g, which we can't check here, but will when we assemble the second
  4087  								// instruction.
  4088  								dst := p.To.Reg
  4089  								asmbuf.Put1(0xe8)
  4090  								r = obj.Addrel(cursym)
  4091  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4092  								r.Type = objabi.R_CALL
  4093  								r.Siz = 4
  4094  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4095  								asmbuf.PutInt32(0)
  4096  
  4097  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4098  								r = obj.Addrel(cursym)
  4099  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4100  								r.Type = objabi.R_TLS_IE
  4101  								r.Siz = 4
  4102  								r.Add = 2
  4103  								asmbuf.PutInt32(0)
  4104  							} else {
  4105  								// ELF TLS base is 0(GS).
  4106  								pp.From = p.From
  4107  
  4108  								pp.From.Type = obj.TYPE_MEM
  4109  								pp.From.Reg = REG_GS
  4110  								pp.From.Offset = 0
  4111  								pp.From.Index = REG_NONE
  4112  								pp.From.Scale = 0
  4113  								asmbuf.Put2(0x65, // GS
  4114  									0x8B)
  4115  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4116  							}
  4117  						case objabi.Hplan9:
  4118  							pp.From = obj.Addr{}
  4119  							pp.From.Type = obj.TYPE_MEM
  4120  							pp.From.Name = obj.NAME_EXTERN
  4121  							pp.From.Sym = plan9privates
  4122  							pp.From.Offset = 0
  4123  							pp.From.Index = REG_NONE
  4124  							asmbuf.Put1(0x8B)
  4125  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4126  
  4127  						case objabi.Hwindows:
  4128  							// Windows TLS base is always 0x14(FS).
  4129  							pp.From = p.From
  4130  
  4131  							pp.From.Type = obj.TYPE_MEM
  4132  							pp.From.Reg = REG_FS
  4133  							pp.From.Offset = 0x14
  4134  							pp.From.Index = REG_NONE
  4135  							pp.From.Scale = 0
  4136  							asmbuf.Put2(0x64, // FS
  4137  								0x8B)
  4138  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4139  						}
  4140  						break
  4141  					}
  4142  
  4143  					switch ctxt.Headtype {
  4144  					default:
  4145  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4146  
  4147  					case objabi.Hlinux:
  4148  						if !ctxt.Flag_shared {
  4149  							log.Fatalf("unknown TLS base location for linux without -shared")
  4150  						}
  4151  						// Note that this is not generating the same insn as the other cases.
  4152  						//     MOV TLS, R_to
  4153  						// becomes
  4154  						//     movq g@gottpoff(%rip), R_to
  4155  						// which is encoded as
  4156  						//     movq 0(%rip), R_to
  4157  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4158  						// is g, which we can't check here, but will when we assemble the second
  4159  						// instruction.
  4160  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4161  
  4162  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4163  						r = obj.Addrel(cursym)
  4164  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4165  						r.Type = objabi.R_TLS_IE
  4166  						r.Siz = 4
  4167  						r.Add = -4
  4168  						asmbuf.PutInt32(0)
  4169  
  4170  					case objabi.Hplan9:
  4171  						pp.From = obj.Addr{}
  4172  						pp.From.Type = obj.TYPE_MEM
  4173  						pp.From.Name = obj.NAME_EXTERN
  4174  						pp.From.Sym = plan9privates
  4175  						pp.From.Offset = 0
  4176  						pp.From.Index = REG_NONE
  4177  						asmbuf.rexflag |= Pw
  4178  						asmbuf.Put1(0x8B)
  4179  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4180  
  4181  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4182  						// TLS base is 0(FS).
  4183  						pp.From = p.From
  4184  
  4185  						pp.From.Type = obj.TYPE_MEM
  4186  						pp.From.Name = obj.NAME_NONE
  4187  						pp.From.Reg = REG_NONE
  4188  						pp.From.Offset = 0
  4189  						pp.From.Index = REG_NONE
  4190  						pp.From.Scale = 0
  4191  						asmbuf.rexflag |= Pw
  4192  						asmbuf.Put2(0x64, // FS
  4193  							0x8B)
  4194  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4195  
  4196  					case objabi.Hwindows:
  4197  						// Windows TLS base is always 0x28(GS).
  4198  						pp.From = p.From
  4199  
  4200  						pp.From.Type = obj.TYPE_MEM
  4201  						pp.From.Name = obj.NAME_NONE
  4202  						pp.From.Reg = REG_GS
  4203  						pp.From.Offset = 0x28
  4204  						pp.From.Index = REG_NONE
  4205  						pp.From.Scale = 0
  4206  						asmbuf.rexflag |= Pw
  4207  						asmbuf.Put2(0x65, // GS
  4208  							0x8B)
  4209  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4210  					}
  4211  				}
  4212  				return
  4213  			}
  4214  		}
  4215  	}
  4216  	goto bad
  4217  
  4218  bad:
  4219  	if ctxt.Arch.Family != sys.AMD64 {
  4220  		/*
  4221  		 * here, the assembly has failed.
  4222  		 * if its a byte instruction that has
  4223  		 * unaddressable registers, try to
  4224  		 * exchange registers and reissue the
  4225  		 * instruction with the operands renamed.
  4226  		 */
  4227  		pp := *p
  4228  
  4229  		unbytereg(&pp.From, &pp.Ft)
  4230  		unbytereg(&pp.To, &pp.Tt)
  4231  
  4232  		z := int(p.From.Reg)
  4233  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4234  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4235  			// For now, different to keep bit-for-bit compatibility.
  4236  			if ctxt.Arch.Family == sys.I386 {
  4237  				breg := byteswapreg(ctxt, &p.To)
  4238  				if breg != REG_AX {
  4239  					asmbuf.Put1(0x87) // xchg lhs,bx
  4240  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4241  					subreg(&pp, z, breg)
  4242  					asmbuf.doasm(ctxt, cursym, &pp)
  4243  					asmbuf.Put1(0x87) // xchg lhs,bx
  4244  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4245  				} else {
  4246  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4247  					subreg(&pp, z, REG_AX)
  4248  					asmbuf.doasm(ctxt, cursym, &pp)
  4249  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4250  				}
  4251  				return
  4252  			}
  4253  
  4254  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4255  				// We certainly don't want to exchange
  4256  				// with AX if the op is MUL or DIV.
  4257  				asmbuf.Put1(0x87) // xchg lhs,bx
  4258  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4259  				subreg(&pp, z, REG_BX)
  4260  				asmbuf.doasm(ctxt, cursym, &pp)
  4261  				asmbuf.Put1(0x87) // xchg lhs,bx
  4262  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4263  			} else {
  4264  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4265  				subreg(&pp, z, REG_AX)
  4266  				asmbuf.doasm(ctxt, cursym, &pp)
  4267  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4268  			}
  4269  			return
  4270  		}
  4271  
  4272  		z = int(p.To.Reg)
  4273  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4274  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4275  			// For now, different to keep bit-for-bit compatibility.
  4276  			if ctxt.Arch.Family == sys.I386 {
  4277  				breg := byteswapreg(ctxt, &p.From)
  4278  				if breg != REG_AX {
  4279  					asmbuf.Put1(0x87) //xchg rhs,bx
  4280  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4281  					subreg(&pp, z, breg)
  4282  					asmbuf.doasm(ctxt, cursym, &pp)
  4283  					asmbuf.Put1(0x87) // xchg rhs,bx
  4284  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4285  				} else {
  4286  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4287  					subreg(&pp, z, REG_AX)
  4288  					asmbuf.doasm(ctxt, cursym, &pp)
  4289  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4290  				}
  4291  				return
  4292  			}
  4293  
  4294  			if isax(&p.From) {
  4295  				asmbuf.Put1(0x87) // xchg rhs,bx
  4296  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4297  				subreg(&pp, z, REG_BX)
  4298  				asmbuf.doasm(ctxt, cursym, &pp)
  4299  				asmbuf.Put1(0x87) // xchg rhs,bx
  4300  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4301  			} else {
  4302  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4303  				subreg(&pp, z, REG_AX)
  4304  				asmbuf.doasm(ctxt, cursym, &pp)
  4305  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4306  			}
  4307  			return
  4308  		}
  4309  	}
  4310  
  4311  	ctxt.Diag("invalid instruction: %v", p)
  4312  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4313  	return
  4314  }
  4315  
  4316  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4317  // which is not referenced in a.
  4318  // If a is empty, it returns BX to account for MULB-like instructions
  4319  // that might use DX and AX.
  4320  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4321  	cana, canb, canc, cand := true, true, true, true
  4322  	if a.Type == obj.TYPE_NONE {
  4323  		cana, cand = false, false
  4324  	}
  4325  
  4326  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4327  		switch a.Reg {
  4328  		case REG_NONE:
  4329  			cana, cand = false, false
  4330  		case REG_AX, REG_AL, REG_AH:
  4331  			cana = false
  4332  		case REG_BX, REG_BL, REG_BH:
  4333  			canb = false
  4334  		case REG_CX, REG_CL, REG_CH:
  4335  			canc = false
  4336  		case REG_DX, REG_DL, REG_DH:
  4337  			cand = false
  4338  		}
  4339  	}
  4340  
  4341  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4342  		switch a.Index {
  4343  		case REG_AX:
  4344  			cana = false
  4345  		case REG_BX:
  4346  			canb = false
  4347  		case REG_CX:
  4348  			canc = false
  4349  		case REG_DX:
  4350  			cand = false
  4351  		}
  4352  	}
  4353  
  4354  	switch {
  4355  	case cana:
  4356  		return REG_AX
  4357  	case canb:
  4358  		return REG_BX
  4359  	case canc:
  4360  		return REG_CX
  4361  	case cand:
  4362  		return REG_DX
  4363  	default:
  4364  		ctxt.Diag("impossible byte register")
  4365  		log.Fatalf("bad code")
  4366  		return 0
  4367  	}
  4368  }
  4369  
  4370  func isbadbyte(a *obj.Addr) bool {
  4371  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4372  }
  4373  
  4374  var naclret = []uint8{
  4375  	0x5e, // POPL SI
  4376  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4377  	0x83,
  4378  	0xe6,
  4379  	0xe0, // ANDL $~31, SI
  4380  	0x4c,
  4381  	0x01,
  4382  	0xfe, // ADDQ R15, SI
  4383  	0xff,
  4384  	0xe6, // JMP SI
  4385  }
  4386  
  4387  var naclret8 = []uint8{
  4388  	0x5d, // POPL BP
  4389  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4390  	0x83,
  4391  	0xe5,
  4392  	0xe0, // ANDL $~31, BP
  4393  	0xff,
  4394  	0xe5, // JMP BP
  4395  }
  4396  
  4397  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4398  
  4399  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4400  
  4401  var naclmovs = []uint8{
  4402  	0x89,
  4403  	0xf6, // MOVL SI, SI
  4404  	0x49,
  4405  	0x8d,
  4406  	0x34,
  4407  	0x37, // LEAQ (R15)(SI*1), SI
  4408  	0x89,
  4409  	0xff, // MOVL DI, DI
  4410  	0x49,
  4411  	0x8d,
  4412  	0x3c,
  4413  	0x3f, // LEAQ (R15)(DI*1), DI
  4414  }
  4415  
  4416  var naclstos = []uint8{
  4417  	0x89,
  4418  	0xff, // MOVL DI, DI
  4419  	0x49,
  4420  	0x8d,
  4421  	0x3c,
  4422  	0x3f, // LEAQ (R15)(DI*1), DI
  4423  }
  4424  
  4425  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4426  	if reg >= REG_R8 {
  4427  		asmbuf.Put1(0x45)
  4428  	}
  4429  	reg = (reg - REG_AX) & 7
  4430  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4431  }
  4432  
  4433  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4434  	asmbuf.Reset()
  4435  
  4436  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4437  		switch p.As {
  4438  		case obj.ARET:
  4439  			asmbuf.Put(naclret8)
  4440  			return
  4441  
  4442  		case obj.ACALL,
  4443  			obj.AJMP:
  4444  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4445  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4446  			}
  4447  
  4448  		case AINT:
  4449  			asmbuf.Put1(0xf4)
  4450  			return
  4451  		}
  4452  	}
  4453  
  4454  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4455  		if p.As == AREP {
  4456  			asmbuf.rep++
  4457  			return
  4458  		}
  4459  
  4460  		if p.As == AREPN {
  4461  			asmbuf.repn++
  4462  			return
  4463  		}
  4464  
  4465  		if p.As == ALOCK {
  4466  			asmbuf.lock = true
  4467  			return
  4468  		}
  4469  
  4470  		if p.As != ALEAQ && p.As != ALEAL {
  4471  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4472  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4473  			}
  4474  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4475  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4476  			}
  4477  		}
  4478  
  4479  		switch p.As {
  4480  		case obj.ARET:
  4481  			asmbuf.Put(naclret)
  4482  			return
  4483  
  4484  		case obj.ACALL,
  4485  			obj.AJMP:
  4486  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4487  				// ANDL $~31, reg
  4488  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4489  				// ADDQ R15, reg
  4490  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4491  			}
  4492  
  4493  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4494  				// ANDL $~31, reg
  4495  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4496  				// ADDQ R15, reg
  4497  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4498  			}
  4499  
  4500  		case AINT:
  4501  			asmbuf.Put1(0xf4)
  4502  			return
  4503  
  4504  		case ASCASB,
  4505  			ASCASW,
  4506  			ASCASL,
  4507  			ASCASQ,
  4508  			ASTOSB,
  4509  			ASTOSW,
  4510  			ASTOSL,
  4511  			ASTOSQ:
  4512  			asmbuf.Put(naclstos)
  4513  
  4514  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4515  			asmbuf.Put(naclmovs)
  4516  		}
  4517  
  4518  		if asmbuf.rep != 0 {
  4519  			asmbuf.Put1(0xf3)
  4520  			asmbuf.rep = 0
  4521  		}
  4522  
  4523  		if asmbuf.repn != 0 {
  4524  			asmbuf.Put1(0xf2)
  4525  			asmbuf.repn = 0
  4526  		}
  4527  
  4528  		if asmbuf.lock {
  4529  			asmbuf.Put1(0xf0)
  4530  			asmbuf.lock = false
  4531  		}
  4532  	}
  4533  
  4534  	asmbuf.rexflag = 0
  4535  	asmbuf.vexflag = 0
  4536  	mark := asmbuf.Len()
  4537  	asmbuf.doasm(ctxt, cursym, p)
  4538  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4539  		/*
  4540  		 * as befits the whole approach of the architecture,
  4541  		 * the rex prefix must appear before the first opcode byte
  4542  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4543  		 * before the 0f opcode escape!), or it might be ignored.
  4544  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4545  		 */
  4546  		if ctxt.Arch.Family != sys.AMD64 {
  4547  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4548  		}
  4549  		n := asmbuf.Len()
  4550  		var np int
  4551  		for np = mark; np < n; np++ {
  4552  			c := asmbuf.At(np)
  4553  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4554  				break
  4555  			}
  4556  		}
  4557  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4558  	}
  4559  
  4560  	n := asmbuf.Len()
  4561  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4562  		r := &cursym.R[i]
  4563  		if int64(r.Off) < p.Pc {
  4564  			break
  4565  		}
  4566  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4567  			r.Off++
  4568  		}
  4569  		if r.Type == objabi.R_PCREL {
  4570  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4571  				// PC-relative addressing is relative to the end of the instruction,
  4572  				// but the relocations applied by the linker are relative to the end
  4573  				// of the relocation. Because immediate instruction
  4574  				// arguments can follow the PC-relative memory reference in the
  4575  				// instruction encoding, the two may not coincide. In this case,
  4576  				// adjust addend so that linker can keep relocating relative to the
  4577  				// end of the relocation.
  4578  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4579  			} else if ctxt.Arch.Family == sys.I386 {
  4580  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4581  				// assumes that the previous instruction loaded the PC of the end
  4582  				// of that instruction into CX, so the adjustment is relative to
  4583  				// that.
  4584  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4585  			}
  4586  		}
  4587  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4588  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4589  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4590  		}
  4591  
  4592  	}
  4593  
  4594  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4595  		switch p.To.Reg {
  4596  		case REG_SP:
  4597  			asmbuf.Put(naclspfix)
  4598  		case REG_BP:
  4599  			asmbuf.Put(naclbpfix)
  4600  		}
  4601  	}
  4602  }