github.com/Filosottile/go@v0.0.0-20170906193555-dbed9972d994/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type ytab struct {
    76  	from    uint8
    77  	from3   uint8
    78  	to      uint8
    79  	zcase   uint8
    80  	zoffset uint8
    81  }
    82  
    83  type Movtab struct {
    84  	as   obj.As
    85  	ft   uint8
    86  	f3t  uint8
    87  	tt   uint8
    88  	code uint8
    89  	op   [4]uint8
    90  }
    91  
    92  const (
    93  	Yxxx = iota
    94  	Ynone
    95  	Yi0 // $0
    96  	Yi1 // $1
    97  	Yi8 // $x, x fits in int8
    98  	Yu8 // $x, x fits in uint8
    99  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   100  	Ys32
   101  	Yi32
   102  	Yi64
   103  	Yiauto
   104  	Yal
   105  	Ycl
   106  	Yax
   107  	Ycx
   108  	Yrb
   109  	Yrl
   110  	Yrl32 // Yrl on 32-bit system
   111  	Yrf
   112  	Yf0
   113  	Yrx
   114  	Ymb
   115  	Yml
   116  	Ym
   117  	Ybr
   118  	Ycs
   119  	Yss
   120  	Yds
   121  	Yes
   122  	Yfs
   123  	Ygs
   124  	Ygdtr
   125  	Yidtr
   126  	Yldtr
   127  	Ymsw
   128  	Ytask
   129  	Ycr0
   130  	Ycr1
   131  	Ycr2
   132  	Ycr3
   133  	Ycr4
   134  	Ycr5
   135  	Ycr6
   136  	Ycr7
   137  	Ycr8
   138  	Ydr0
   139  	Ydr1
   140  	Ydr2
   141  	Ydr3
   142  	Ydr4
   143  	Ydr5
   144  	Ydr6
   145  	Ydr7
   146  	Ytr0
   147  	Ytr1
   148  	Ytr2
   149  	Ytr3
   150  	Ytr4
   151  	Ytr5
   152  	Ytr6
   153  	Ytr7
   154  	Ymr
   155  	Ymm
   156  	Yxr
   157  	Yxm
   158  	Yyr
   159  	Yym
   160  	Ytls
   161  	Ytextsize
   162  	Yindir
   163  	Ymax
   164  )
   165  
   166  const (
   167  	Zxxx = iota
   168  	Zlit
   169  	Zlitm_r
   170  	Z_rp
   171  	Zbr
   172  	Zcall
   173  	Zcallcon
   174  	Zcallduff
   175  	Zcallind
   176  	Zcallindreg
   177  	Zib_
   178  	Zib_rp
   179  	Zibo_m
   180  	Zibo_m_xm
   181  	Zil_
   182  	Zil_rp
   183  	Ziq_rp
   184  	Zilo_m
   185  	Zjmp
   186  	Zjmpcon
   187  	Zloop
   188  	Zo_iw
   189  	Zm_o
   190  	Zm_r
   191  	Zm2_r
   192  	Zm_r_xm
   193  	Zm_r_i_xm
   194  	Zm_r_xm_nr
   195  	Zr_m_xm_nr
   196  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   197  	Zibr_m
   198  	Zmb_r
   199  	Zaut_r
   200  	Zo_m
   201  	Zo_m64
   202  	Zpseudo
   203  	Zr_m
   204  	Zr_m_xm
   205  	Zrp_
   206  	Z_ib
   207  	Z_il
   208  	Zm_ibo
   209  	Zm_ilo
   210  	Zib_rr
   211  	Zil_rr
   212  	Zclr
   213  	Zbyte
   214  	Zvex_rm_v_r
   215  	Zvex_r_v_rm
   216  	Zvex_v_rm_r
   217  	Zvex_i_rm_r
   218  	Zvex_i_r_v
   219  	Zvex_i_rm_v_r
   220  	Zmax
   221  )
   222  
   223  const (
   224  	Px   = 0
   225  	Px1  = 1    // symbolic; exact value doesn't matter
   226  	P32  = 0x32 /* 32-bit only */
   227  	Pe   = 0x66 /* operand escape */
   228  	Pm   = 0x0f /* 2byte opcode escape */
   229  	Pq   = 0xff /* both escapes: 66 0f */
   230  	Pb   = 0xfe /* byte operands */
   231  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   232  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   233  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   234  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   235  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   236  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   237  	Pw   = 0x48 /* Rex.w */
   238  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   239  	Py   = 0x80 /* defaults to 64-bit mode */
   240  	Py1  = 0x81 // symbolic; exact value doesn't matter
   241  	Py3  = 0x83 // symbolic; exact value doesn't matter
   242  	Pvex = 0x84 // symbolic: exact value doesn't matter
   243  
   244  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   245  	Rxr = 1 << 2 /* extend modrm reg */
   246  	Rxx = 1 << 1 /* extend sib index */
   247  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   248  )
   249  
   250  const (
   251  	// Encoding for VEX prefix in tables.
   252  	// The P, L, and W fields are chosen to match
   253  	// their eventual locations in the VEX prefix bytes.
   254  
   255  	// P field - 2 bits
   256  	vex66 = 1 << 0
   257  	vexF3 = 2 << 0
   258  	vexF2 = 3 << 0
   259  	// L field - 1 bit
   260  	vexLZ  = 0 << 2
   261  	vexLIG = 0 << 2
   262  	vex128 = 0 << 2
   263  	vex256 = 1 << 2
   264  	// W field - 1 bit
   265  	vexWIG = 0 << 7
   266  	vexW0  = 0 << 7
   267  	vexW1  = 1 << 7
   268  	// M field - 5 bits, but mostly reserved; we can store up to 4
   269  	vex0F   = 1 << 3
   270  	vex0F38 = 2 << 3
   271  	vex0F3A = 3 << 3
   272  
   273  	// Combinations used in the manual.
   274  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   275  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   276  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   277  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   278  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   279  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   280  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   281  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   282  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   283  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   284  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   285  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   286  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   287  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   288  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   289  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   290  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   291  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   292  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   293  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   294  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   295  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   296  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   297  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   298  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   299  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   300  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   301  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   302  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   303  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   304  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   305  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   306  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   307  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   308  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   309  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   310  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   311  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   312  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   313  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   314  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   315  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   316  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   317  )
   318  
   319  var ycover [Ymax * Ymax]uint8
   320  
   321  var reg [MAXREG]int
   322  
   323  var regrex [MAXREG + 1]int
   324  
   325  var ynone = []ytab{
   326  	{Ynone, Ynone, Ynone, Zlit, 1},
   327  }
   328  
   329  var ytext = []ytab{
   330  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   331  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   332  }
   333  
   334  var ynop = []ytab{
   335  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   336  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   337  	{Ynone, Ynone, Yml, Zpseudo, 0},
   338  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   339  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   340  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   341  	{Yml, Ynone, Ynone, Zpseudo, 0},
   342  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   343  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   344  }
   345  
   346  var yfuncdata = []ytab{
   347  	{Yi32, Ynone, Ym, Zpseudo, 0},
   348  }
   349  
   350  var ypcdata = []ytab{
   351  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   352  }
   353  
   354  var yxorb = []ytab{
   355  	{Yi32, Ynone, Yal, Zib_, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  	{Yrb, Ynone, Ymb, Zr_m, 1},
   358  	{Ymb, Ynone, Yrb, Zm_r, 1},
   359  }
   360  
   361  var yaddl = []ytab{
   362  	{Yi8, Ynone, Yml, Zibo_m, 2},
   363  	{Yi32, Ynone, Yax, Zil_, 1},
   364  	{Yi32, Ynone, Yml, Zilo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  	{Yml, Ynone, Yrl, Zm_r, 1},
   367  }
   368  
   369  var yincl = []ytab{
   370  	{Ynone, Ynone, Yrl, Z_rp, 1},
   371  	{Ynone, Ynone, Yml, Zo_m, 2},
   372  }
   373  
   374  var yincq = []ytab{
   375  	{Ynone, Ynone, Yml, Zo_m, 2},
   376  }
   377  
   378  var ycmpb = []ytab{
   379  	{Yal, Ynone, Yi32, Z_ib, 1},
   380  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   381  	{Ymb, Ynone, Yrb, Zm_r, 1},
   382  	{Yrb, Ynone, Ymb, Zr_m, 1},
   383  }
   384  
   385  var ycmpl = []ytab{
   386  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   387  	{Yax, Ynone, Yi32, Z_il, 1},
   388  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   389  	{Yml, Ynone, Yrl, Zm_r, 1},
   390  	{Yrl, Ynone, Yml, Zr_m, 1},
   391  }
   392  
   393  var yshb = []ytab{
   394  	{Yi1, Ynone, Ymb, Zo_m, 2},
   395  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   396  	{Ycx, Ynone, Ymb, Zo_m, 2},
   397  }
   398  
   399  var yshl = []ytab{
   400  	{Yi1, Ynone, Yml, Zo_m, 2},
   401  	{Yi32, Ynone, Yml, Zibo_m, 2},
   402  	{Ycl, Ynone, Yml, Zo_m, 2},
   403  	{Ycx, Ynone, Yml, Zo_m, 2},
   404  }
   405  
   406  var ytestl = []ytab{
   407  	{Yi32, Ynone, Yax, Zil_, 1},
   408  	{Yi32, Ynone, Yml, Zilo_m, 2},
   409  	{Yrl, Ynone, Yml, Zr_m, 1},
   410  	{Yml, Ynone, Yrl, Zm_r, 1},
   411  }
   412  
   413  var ymovb = []ytab{
   414  	{Yrb, Ynone, Ymb, Zr_m, 1},
   415  	{Ymb, Ynone, Yrb, Zm_r, 1},
   416  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   417  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   418  }
   419  
   420  var ybtl = []ytab{
   421  	{Yi8, Ynone, Yml, Zibo_m, 2},
   422  	{Yrl, Ynone, Yml, Zr_m, 1},
   423  }
   424  
   425  var ymovw = []ytab{
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  	{Yi0, Ynone, Yrl, Zclr, 1},
   429  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   430  	{Yi32, Ynone, Yml, Zilo_m, 2},
   431  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   432  }
   433  
   434  var ymovl = []ytab{
   435  	{Yrl, Ynone, Yml, Zr_m, 1},
   436  	{Yml, Ynone, Yrl, Zm_r, 1},
   437  	{Yi0, Ynone, Yrl, Zclr, 1},
   438  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   439  	{Yi32, Ynone, Yml, Zilo_m, 2},
   440  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   441  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   442  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   443  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   444  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   445  }
   446  
   447  var yret = []ytab{
   448  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   449  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   450  }
   451  
   452  var ymovq = []ytab{
   453  	// valid in 32-bit mode
   454  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   455  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   456  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   457  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   458  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   459  
   460  	// valid only in 64-bit mode, usually with 64-bit prefix
   461  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   462  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   463  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   464  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   465  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   466  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   467  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   468  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   469  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   470  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   471  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   472  }
   473  
   474  var ym_rl = []ytab{
   475  	{Ym, Ynone, Yrl, Zm_r, 1},
   476  }
   477  
   478  var yrl_m = []ytab{
   479  	{Yrl, Ynone, Ym, Zr_m, 1},
   480  }
   481  
   482  var ymb_rl = []ytab{
   483  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   484  }
   485  
   486  var yml_rl = []ytab{
   487  	{Yml, Ynone, Yrl, Zm_r, 1},
   488  }
   489  
   490  var yrl_ml = []ytab{
   491  	{Yrl, Ynone, Yml, Zr_m, 1},
   492  }
   493  
   494  var yml_mb = []ytab{
   495  	{Yrb, Ynone, Ymb, Zr_m, 1},
   496  	{Ymb, Ynone, Yrb, Zm_r, 1},
   497  }
   498  
   499  var yrb_mb = []ytab{
   500  	{Yrb, Ynone, Ymb, Zr_m, 1},
   501  }
   502  
   503  var yxchg = []ytab{
   504  	{Yax, Ynone, Yrl, Z_rp, 1},
   505  	{Yrl, Ynone, Yax, Zrp_, 1},
   506  	{Yrl, Ynone, Yml, Zr_m, 1},
   507  	{Yml, Ynone, Yrl, Zm_r, 1},
   508  }
   509  
   510  var ydivl = []ytab{
   511  	{Yml, Ynone, Ynone, Zm_o, 2},
   512  }
   513  
   514  var ydivb = []ytab{
   515  	{Ymb, Ynone, Ynone, Zm_o, 2},
   516  }
   517  
   518  var yimul = []ytab{
   519  	{Yml, Ynone, Ynone, Zm_o, 2},
   520  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   521  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   522  	{Yml, Ynone, Yrl, Zm_r, 2},
   523  }
   524  
   525  var yimul3 = []ytab{
   526  	{Yi8, Yml, Yrl, Zibm_r, 2},
   527  }
   528  
   529  var ybyte = []ytab{
   530  	{Yi64, Ynone, Ynone, Zbyte, 1},
   531  }
   532  
   533  var yin = []ytab{
   534  	{Yi32, Ynone, Ynone, Zib_, 1},
   535  	{Ynone, Ynone, Ynone, Zlit, 1},
   536  }
   537  
   538  var yint = []ytab{
   539  	{Yi32, Ynone, Ynone, Zib_, 1},
   540  }
   541  
   542  var ypushl = []ytab{
   543  	{Yrl, Ynone, Ynone, Zrp_, 1},
   544  	{Ym, Ynone, Ynone, Zm_o, 2},
   545  	{Yi8, Ynone, Ynone, Zib_, 1},
   546  	{Yi32, Ynone, Ynone, Zil_, 1},
   547  }
   548  
   549  var ypopl = []ytab{
   550  	{Ynone, Ynone, Yrl, Z_rp, 1},
   551  	{Ynone, Ynone, Ym, Zo_m, 2},
   552  }
   553  
   554  var ybswap = []ytab{
   555  	{Ynone, Ynone, Yrl, Z_rp, 2},
   556  }
   557  
   558  var yscond = []ytab{
   559  	{Ynone, Ynone, Ymb, Zo_m, 2},
   560  }
   561  
   562  var yjcond = []ytab{
   563  	{Ynone, Ynone, Ybr, Zbr, 0},
   564  	{Yi0, Ynone, Ybr, Zbr, 0},
   565  	{Yi1, Ynone, Ybr, Zbr, 1},
   566  }
   567  
   568  var yloop = []ytab{
   569  	{Ynone, Ynone, Ybr, Zloop, 1},
   570  }
   571  
   572  var ycall = []ytab{
   573  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   574  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   575  	{Ynone, Ynone, Yindir, Zcallind, 2},
   576  	{Ynone, Ynone, Ybr, Zcall, 0},
   577  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   578  }
   579  
   580  var yduff = []ytab{
   581  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   582  }
   583  
   584  var yjmp = []ytab{
   585  	{Ynone, Ynone, Yml, Zo_m64, 2},
   586  	{Ynone, Ynone, Ybr, Zjmp, 0},
   587  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   588  }
   589  
   590  var yfmvd = []ytab{
   591  	{Ym, Ynone, Yf0, Zm_o, 2},
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yrf, Ynone, Yf0, Zm_o, 2},
   594  	{Yf0, Ynone, Yrf, Zo_m, 2},
   595  }
   596  
   597  var yfmvdp = []ytab{
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  	{Yf0, Ynone, Yrf, Zo_m, 2},
   600  }
   601  
   602  var yfmvf = []ytab{
   603  	{Ym, Ynone, Yf0, Zm_o, 2},
   604  	{Yf0, Ynone, Ym, Zo_m, 2},
   605  }
   606  
   607  var yfmvx = []ytab{
   608  	{Ym, Ynone, Yf0, Zm_o, 2},
   609  }
   610  
   611  var yfmvp = []ytab{
   612  	{Yf0, Ynone, Ym, Zo_m, 2},
   613  }
   614  
   615  var yfcmv = []ytab{
   616  	{Yrf, Ynone, Yf0, Zm_o, 2},
   617  }
   618  
   619  var yfadd = []ytab{
   620  	{Ym, Ynone, Yf0, Zm_o, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Yrf, Zo_m, 2},
   623  }
   624  
   625  var yfxch = []ytab{
   626  	{Yf0, Ynone, Yrf, Zo_m, 2},
   627  	{Yrf, Ynone, Yf0, Zm_o, 2},
   628  }
   629  
   630  var ycompp = []ytab{
   631  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   632  }
   633  
   634  var ystsw = []ytab{
   635  	{Ynone, Ynone, Ym, Zo_m, 2},
   636  	{Ynone, Ynone, Yax, Zlit, 1},
   637  }
   638  
   639  var ysvrs = []ytab{
   640  	{Ynone, Ynone, Ym, Zo_m, 2},
   641  	{Ym, Ynone, Ynone, Zm_o, 2},
   642  }
   643  
   644  var ymm = []ytab{
   645  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   647  }
   648  
   649  var yxm = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   651  }
   652  
   653  var yxm_q4 = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r, 1},
   655  }
   656  
   657  var yxcvm1 = []ytab{
   658  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   659  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   660  }
   661  
   662  var yxcvm2 = []ytab{
   663  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   664  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   665  }
   666  
   667  var yxr = []ytab{
   668  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   669  }
   670  
   671  var yxr_ml = []ytab{
   672  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   673  }
   674  
   675  var ymr = []ytab{
   676  	{Ymr, Ynone, Ymr, Zm_r, 1},
   677  }
   678  
   679  var ymr_ml = []ytab{
   680  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   681  }
   682  
   683  var yxcmpi = []ytab{
   684  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   685  }
   686  
   687  var yxmov = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   689  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   690  }
   691  
   692  var yxcvfl = []ytab{
   693  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   694  }
   695  
   696  var yxcvlf = []ytab{
   697  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   698  }
   699  
   700  var yxcvfq = []ytab{
   701  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   702  }
   703  
   704  var yxcvqf = []ytab{
   705  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   706  }
   707  
   708  var yps = []ytab{
   709  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   710  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   711  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   712  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   713  }
   714  
   715  var yxrrl = []ytab{
   716  	{Yxr, Ynone, Yrl, Zm_r, 1},
   717  }
   718  
   719  var ymrxr = []ytab{
   720  	{Ymr, Ynone, Yxr, Zm_r, 1},
   721  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   722  }
   723  
   724  var ymshuf = []ytab{
   725  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   726  }
   727  
   728  var ymshufb = []ytab{
   729  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   730  }
   731  
   732  var yxshuf = []ytab{
   733  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   734  }
   735  
   736  var yextrw = []ytab{
   737  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   738  }
   739  
   740  var yextr = []ytab{
   741  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   742  }
   743  
   744  var yinsrw = []ytab{
   745  	{Yu8, Yml, Yxr, Zibm_r, 2},
   746  }
   747  
   748  var yinsr = []ytab{
   749  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   750  }
   751  
   752  var ypsdq = []ytab{
   753  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   754  }
   755  
   756  var ymskb = []ytab{
   757  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   758  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   759  }
   760  
   761  var ycrc32l = []ytab{
   762  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   763  }
   764  
   765  var yprefetch = []ytab{
   766  	{Ym, Ynone, Ynone, Zm_o, 2},
   767  }
   768  
   769  var yaes = []ytab{
   770  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   771  }
   772  
   773  var yxbegin = []ytab{
   774  	{Ynone, Ynone, Ybr, Zjmp, 1},
   775  }
   776  
   777  var yxabort = []ytab{
   778  	{Yu8, Ynone, Ynone, Zib_, 1},
   779  }
   780  
   781  var ylddqu = []ytab{
   782  	{Ym, Ynone, Yxr, Zm_r, 1},
   783  }
   784  
   785  // VEX instructions that come in two forms:
   786  //	VTHING xmm2/m128, xmmV, xmm1
   787  //	VTHING ymm2/m256, ymmV, ymm1
   788  // The opcode array in the corresponding Optab entry
   789  // should contain the (VEX prefixes, opcode byte) pair
   790  // for each of the two forms.
   791  // For example, the entries for VPXOR are:
   792  //
   793  //	VPXOR xmm2/m128, xmmV, xmm1
   794  //	VEX.NDS.128.66.0F.WIG EF /r
   795  //
   796  //	VPXOR ymm2/m256, ymmV, ymm1
   797  //	VEX.NDS.256.66.0F.WIG EF /r
   798  //
   799  // The NDS/NDD/DDS part can be dropped, producing this
   800  // Optab entry:
   801  //
   802  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   803  //
   804  var yvex_xy3 = []ytab{
   805  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   806  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   807  }
   808  
   809  var yvex_ri3 = []ytab{
   810  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   811  }
   812  
   813  var yvex_xyi3 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   815  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   816  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   817  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   818  }
   819  
   820  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   821  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   822  }
   823  
   824  var yvex_xyi4 = []ytab{
   825  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   826  }
   827  
   828  var yvex_shift = []ytab{
   829  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   830  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   831  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   832  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   833  }
   834  
   835  var yvex_shift_dq = []ytab{
   836  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   837  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   838  }
   839  
   840  var yvex_r3 = []ytab{
   841  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   842  }
   843  
   844  var yvex_vmr3 = []ytab{
   845  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   846  }
   847  
   848  var yvex_xy2 = []ytab{
   849  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_xyr2 = []ytab{
   854  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   855  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   856  }
   857  
   858  var yvex_vmovdqa = []ytab{
   859  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   860  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   861  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   862  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   863  }
   864  
   865  var yvex_vmovntdq = []ytab{
   866  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   867  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   868  }
   869  
   870  var yvex_vpbroadcast = []ytab{
   871  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   872  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   873  }
   874  
   875  var yvex_vpbroadcast_sd = []ytab{
   876  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   877  }
   878  
   879  var ymmxmm0f38 = []ytab{
   880  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   881  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   882  }
   883  
   884  /*
   885   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   886   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   887   * to find the entry with the given p.As and then looks through the ytable for
   888   * that instruction (the second field in the optab struct) for a line whose
   889   * first two values match the Ytypes of the p.From and p.To operands.  The
   890   * function oclass computes the specific Ytype of an operand and then the set
   891   * of more general Ytypes that it satisfies is implied by the ycover table, set
   892   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   893   * from the more general 8-bit constants, but instinit says
   894   *
   895   *        ycover[Yi0*Ymax+Ys32] = 1
   896   *        ycover[Yi1*Ymax+Ys32] = 1
   897   *        ycover[Yi8*Ymax+Ys32] = 1
   898   *
   899   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   900   * if that's what an instruction can handle.
   901   *
   902   * In parallel with the scan through the ytable for the appropriate line, there
   903   * is a z pointer that starts out pointing at the strange magic byte list in
   904   * the Optab struct.  With each step past a non-matching ytable line, z
   905   * advances by the 4th entry in the line.  When a matching line is found, that
   906   * z pointer has the extra data to use in laying down the instruction bytes.
   907   * The actual bytes laid down are a function of the 3rd entry in the line (that
   908   * is, the Ztype) and the z bytes.
   909   *
   910   * For example, let's look at AADDL.  The optab line says:
   911   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   912   *
   913   * and yaddl says
   914   *        var yaddl = []ytab{
   915   *                {Yi8, Ynone, Yml, Zibo_m, 2},
   916   *                {Yi32, Ynone, Yax, Zil_, 1},
   917   *                {Yi32, Ynone, Yml, Zilo_m, 2},
   918   *                {Yrl, Ynone, Yml, Zr_m, 1},
   919   *                {Yml, Ynone, Yrl, Zm_r, 1},
   920   *        }
   921   *
   922   * so there are 5 possible types of ADDL instruction that can be laid down, and
   923   * possible states used to lay them down (Ztype and z pointer, assuming z
   924   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   925   *
   926   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   927   *        Yi32, Yax -> Zil_, z+2 (0x05)
   928   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   929   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   930   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   931   *
   932   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   933   * relatively straightforward as this program goes.
   934   *
   935   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   936   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   937   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   938   * Zilo_m is the same but a long (32-bit) immediate.
   939   */
   940  var optab =
   941  /*	as, ytab, andproto, opcode */
   942  []Optab{
   943  	{obj.AXXX, nil, 0, [23]uint8{}},
   944  	{AAAA, ynone, P32, [23]uint8{0x37}},
   945  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   946  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   947  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   948  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   949  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   950  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   951  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   952  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   953  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   954  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   955  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   956  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   957  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   958  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   959  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   960  	{AADJSP, nil, 0, [23]uint8{}},
   961  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   962  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   963  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   964  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   965  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   966  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   967  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   968  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   969  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   970  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   971  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   972  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   973  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   974  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   975  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   976  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   977  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   978  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   979  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   980  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   981  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   982  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   983  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   984  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   985  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   986  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   987  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   988  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   989  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   990  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   991  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   992  	{ABYTE, ybyte, Px, [23]uint8{1}},
   993  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   994  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   995  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   996  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   997  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   998  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   999  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1000  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1001  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1002  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1003  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1004  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1005  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1006  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1007  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1008  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1009  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1010  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1011  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1012  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1013  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1014  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1015  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1016  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1017  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1018  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1019  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1020  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1021  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1022  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1023  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1024  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1025  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1026  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1027  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1028  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1029  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1030  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1031  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1032  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1033  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1034  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1035  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1036  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1037  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1038  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1039  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1040  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1041  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1042  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1043  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1044  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1045  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1046  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1047  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1048  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1049  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1050  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1051  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1052  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1054  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1055  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1056  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1057  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1058  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1059  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1060  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1061  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1062  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1063  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1064  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1065  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1066  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1067  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1068  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1069  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1070  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1071  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1072  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1073  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1074  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1075  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1076  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1077  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1078  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1079  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1080  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1081  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1082  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1083  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1084  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1085  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1086  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1087  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1088  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1089  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1090  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1091  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1092  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1093  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1094  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1095  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1096  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1097  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1098  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1099  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1100  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1101  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1102  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1103  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1104  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1105  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1106  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1107  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1108  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1109  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1110  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1111  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1112  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1113  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1116  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1117  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1118  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1119  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1120  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1121  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1122  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1123  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1124  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1125  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1126  	{AINT, yint, Px, [23]uint8{0xcd}},
  1127  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1128  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1129  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1130  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1131  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1132  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1133  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1134  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1135  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1136  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1137  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1138  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1139  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1140  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1141  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1142  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1143  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1144  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1145  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1146  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1147  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1148  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1149  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1150  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1151  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1152  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1153  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1154  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1155  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1156  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1157  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1158  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1159  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1160  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1161  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1162  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1163  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1164  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1165  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1166  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1167  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1168  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1169  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1170  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1171  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1172  	{ALONG, ybyte, Px, [23]uint8{4}},
  1173  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1174  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1175  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1176  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1177  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1178  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1179  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1180  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1181  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1182  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1183  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1184  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1185  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1186  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1187  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1188  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1189  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1190  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1191  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1192  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1193  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1194  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1195  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1196  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1197  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1198  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1199  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1200  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1201  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1202  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1203  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1204  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1205  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1206  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1207  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1208  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1209  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1210  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1211  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1212  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1213  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1214  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1216  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1217  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1218  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1219  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1220  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1221  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1222  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1223  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1224  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1225  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1226  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1227  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1228  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1229  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1230  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1231  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1232  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1233  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1234  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1235  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1236  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1237  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1238  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1239  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1240  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1241  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1242  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1243  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1244  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1245  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1246  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1247  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1249  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1250  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1251  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1252  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1253  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1254  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1255  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1256  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1257  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1258  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1259  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1260  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1261  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1262  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1263  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1264  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1265  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1266  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1267  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1268  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1269  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1270  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1271  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1272  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1273  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1274  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1275  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1276  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1277  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1278  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1279  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1280  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1281  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1282  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1283  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1284  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1285  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1286  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1287  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1288  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1289  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1290  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1291  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1292  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1293  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1294  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1295  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1296  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1297  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1298  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1299  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1300  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1301  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1302  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1303  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1304  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1305  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1306  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1307  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1308  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1309  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1310  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1311  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1312  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1313  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1314  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1315  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1316  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1317  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1318  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1319  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1320  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1321  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1322  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1323  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1324  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1325  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1326  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1327  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1328  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1329  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1330  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1331  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1332  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1333  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1334  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1335  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1336  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1337  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1338  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1339  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1340  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1341  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1342  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1343  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1344  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1345  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1346  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1347  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1348  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1349  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1350  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1351  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1352  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1353  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1354  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1355  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1356  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1357  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1358  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1359  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1360  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1361  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1362  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1363  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1364  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1365  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1366  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1367  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1368  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1369  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1370  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1371  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1372  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1373  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1374  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1375  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1376  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1377  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1378  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1379  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1380  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1381  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1382  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1383  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1384  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1385  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1386  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1387  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1388  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1389  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1390  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1391  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1392  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1393  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1394  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1395  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1396  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1397  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1398  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1399  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1400  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1401  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1402  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1403  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1404  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1405  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1406  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1407  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1408  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1409  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1410  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1411  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1412  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1413  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1414  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1415  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1416  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1417  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1418  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1419  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1420  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1421  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1422  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1423  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1424  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1425  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1426  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1427  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1428  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1429  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1430  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1431  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1432  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1433  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1434  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1435  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1436  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1437  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1438  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1439  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1440  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1441  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1442  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1443  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1444  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1445  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1446  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1447  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1448  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1449  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1450  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1451  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1452  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1453  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1454  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1455  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1456  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1457  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1458  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1459  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1460  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1461  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1462  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1463  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1464  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1465  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1466  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1467  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1468  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1469  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1470  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1471  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1472  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1473  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1474  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1475  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1476  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1477  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1478  	{AWORD, ybyte, Px, [23]uint8{2}},
  1479  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1480  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1481  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1482  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1483  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1484  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1485  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1487  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1488  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1489  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1490  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1491  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1492  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1493  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1494  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1495  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1496  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1497  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1498  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1499  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1500  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1501  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1502  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1503  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1504  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1505  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1506  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1507  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1508  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1509  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1510  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1511  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1512  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1513  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1514  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1515  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1516  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1517  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1518  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1519  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1520  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1521  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1522  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1523  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1524  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1525  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1526  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1527  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1528  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1529  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1530  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1531  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1532  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1533  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1534  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1535  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1536  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1537  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1538  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1539  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1541  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1542  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1543  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1544  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1545  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1546  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1547  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1548  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1549  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1551  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1552  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1553  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1554  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1555  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1556  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1557  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1558  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1559  	{AFFREE, nil, 0, [23]uint8{}},
  1560  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1561  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1562  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1563  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1564  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1565  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1566  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1567  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1568  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1569  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1570  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1571  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1572  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1573  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1574  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1575  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1576  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1577  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1578  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1579  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1580  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1581  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1582  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1583  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1584  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1585  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1586  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1587  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1588  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1589  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1590  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1591  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1592  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1593  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1594  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1595  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1596  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1597  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1598  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1599  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1600  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1601  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1602  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1603  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1604  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1605  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1606  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1607  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1608  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1609  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1610  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1611  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1612  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1613  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1614  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1615  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1616  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1617  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1618  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1619  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1620  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1621  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1622  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1623  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1624  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1625  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1626  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1627  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1628  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1629  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1630  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1631  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1632  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1633  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1634  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1635  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1636  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1637  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1638  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1639  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1640  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1641  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1642  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1643  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1644  
  1645  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1646  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1647  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1648  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1649  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1650  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1651  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1652  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1653  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1654  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1655  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1656  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1657  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1658  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1659  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1660  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1661  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1662  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1663  
  1664  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1665  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1666  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1667  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1668  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1669  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1670  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1671  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1672  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1673  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1674  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1675  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1676  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1677  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1678  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1679  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1680  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1681  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1682  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1683  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1684  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1685  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1686  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1687  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1688  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1689  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1690  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1691  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1692  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1693  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1694  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1695  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1696  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1697  
  1698  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1699  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1700  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1701  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1702  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1703  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1704  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1705  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1706  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1707  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.AEND, nil, 0, [23]uint8{}},
  1710  	{0, nil, 0, [23]uint8{}},
  1711  }
  1712  
  1713  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1714  
  1715  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1716  // This happens on systems like Solaris that call .so functions instead of system calls.
  1717  // It does not seem to be necessary for any other systems. This is probably working
  1718  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1719  // what that bug is. And this does fix it.
  1720  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1721  	if ctxt.Headtype == objabi.Hsolaris {
  1722  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1723  		return strings.HasPrefix(s.Name, "libc_")
  1724  	}
  1725  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1726  }
  1727  
  1728  // single-instruction no-ops of various lengths.
  1729  // constructed by hand and disassembled with gdb to verify.
  1730  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1731  var nop = [][16]uint8{
  1732  	{0x90},
  1733  	{0x66, 0x90},
  1734  	{0x0F, 0x1F, 0x00},
  1735  	{0x0F, 0x1F, 0x40, 0x00},
  1736  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1737  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1738  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1739  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1740  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1741  }
  1742  
  1743  // Native Client rejects the repeated 0x66 prefix.
  1744  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1745  func fillnop(p []byte, n int) {
  1746  	var m int
  1747  
  1748  	for n > 0 {
  1749  		m = n
  1750  		if m > len(nop) {
  1751  			m = len(nop)
  1752  		}
  1753  		copy(p[:m], nop[m-1][:m])
  1754  		p = p[m:]
  1755  		n -= m
  1756  	}
  1757  }
  1758  
  1759  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1760  	s.Grow(int64(c) + int64(pad))
  1761  	fillnop(s.P[c:], int(pad))
  1762  	return c + pad
  1763  }
  1764  
  1765  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1766  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1767  		return l
  1768  	}
  1769  	return q
  1770  }
  1771  
  1772  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1773  	if s.P != nil {
  1774  		return
  1775  	}
  1776  
  1777  	if ycover[0] == 0 {
  1778  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1779  	}
  1780  
  1781  	var asmbuf AsmBuf
  1782  
  1783  	for p := s.Func.Text; p != nil; p = p.Link {
  1784  		if p.To.Type == obj.TYPE_BRANCH {
  1785  			if p.Pcond == nil {
  1786  				p.Pcond = p
  1787  			}
  1788  		}
  1789  		if p.As == AADJSP {
  1790  			p.To.Type = obj.TYPE_REG
  1791  			p.To.Reg = REG_SP
  1792  			v := int32(-p.From.Offset)
  1793  			p.From.Offset = int64(v)
  1794  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1795  			if v < 0 {
  1796  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1797  				v = -v
  1798  				p.From.Offset = int64(v)
  1799  			}
  1800  
  1801  			if v == 0 {
  1802  				p.As = obj.ANOP
  1803  			}
  1804  		}
  1805  	}
  1806  
  1807  	var q *obj.Prog
  1808  	var count int64 // rough count of number of instructions
  1809  	for p := s.Func.Text; p != nil; p = p.Link {
  1810  		count++
  1811  		p.Back = 2 // use short branches first time through
  1812  		q = p.Pcond
  1813  		if q != nil && (q.Back&2 != 0) {
  1814  			p.Back |= 1 // backward jump
  1815  			q.Back |= 4 // loop head
  1816  		}
  1817  
  1818  		if p.As == AADJSP {
  1819  			p.To.Type = obj.TYPE_REG
  1820  			p.To.Reg = REG_SP
  1821  			v := int32(-p.From.Offset)
  1822  			p.From.Offset = int64(v)
  1823  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1824  			if v < 0 {
  1825  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1826  				v = -v
  1827  				p.From.Offset = int64(v)
  1828  			}
  1829  
  1830  			if v == 0 {
  1831  				p.As = obj.ANOP
  1832  			}
  1833  		}
  1834  	}
  1835  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1836  
  1837  	n := 0
  1838  	var c int32
  1839  	errors := ctxt.Errors
  1840  	for {
  1841  		loop := int32(0)
  1842  		for i := range s.R {
  1843  			s.R[i] = obj.Reloc{}
  1844  		}
  1845  		s.R = s.R[:0]
  1846  		s.P = s.P[:0]
  1847  		c = 0
  1848  		for p := s.Func.Text; p != nil; p = p.Link {
  1849  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1850  
  1851  				// pad everything to avoid crossing 32-byte boundary
  1852  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1853  					c = naclpad(ctxt, s, c, -c&31)
  1854  				}
  1855  
  1856  				// pad call deferreturn to start at 32-byte boundary
  1857  				// so that subtracting 5 in jmpdefer will jump back
  1858  				// to that boundary and rerun the call.
  1859  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1860  					c = naclpad(ctxt, s, c, -c&31)
  1861  				}
  1862  
  1863  				// pad call to end at 32-byte boundary
  1864  				if p.As == obj.ACALL {
  1865  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1866  				}
  1867  
  1868  				// the linker treats REP and STOSQ as different instructions
  1869  				// but in fact the REP is a prefix on the STOSQ.
  1870  				// make sure REP has room for 2 more bytes, so that
  1871  				// padding will not be inserted before the next instruction.
  1872  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1873  					c = naclpad(ctxt, s, c, -c&31)
  1874  				}
  1875  
  1876  				// same for LOCK.
  1877  				// various instructions follow; the longest is 4 bytes.
  1878  				// give ourselves 8 bytes so as to avoid surprises.
  1879  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1880  					c = naclpad(ctxt, s, c, -c&31)
  1881  				}
  1882  			}
  1883  
  1884  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1885  				// pad with NOPs
  1886  				v := -c & (LoopAlign - 1)
  1887  
  1888  				if v <= MaxLoopPad {
  1889  					s.Grow(int64(c) + int64(v))
  1890  					fillnop(s.P[c:], int(v))
  1891  					c += v
  1892  				}
  1893  			}
  1894  
  1895  			p.Pc = int64(c)
  1896  
  1897  			// process forward jumps to p
  1898  			for q = p.Rel; q != nil; q = q.Forwd {
  1899  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1900  				if q.Back&2 != 0 { // short
  1901  					if v > 127 {
  1902  						loop++
  1903  						q.Back ^= 2
  1904  					}
  1905  
  1906  					if q.As == AJCXZL || q.As == AXBEGIN {
  1907  						s.P[q.Pc+2] = byte(v)
  1908  					} else {
  1909  						s.P[q.Pc+1] = byte(v)
  1910  					}
  1911  				} else {
  1912  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1913  				}
  1914  			}
  1915  
  1916  			p.Rel = nil
  1917  
  1918  			p.Pc = int64(c)
  1919  			asmbuf.asmins(ctxt, s, p)
  1920  			m := asmbuf.Len()
  1921  			if int(p.Isize) != m {
  1922  				p.Isize = uint8(m)
  1923  				loop++
  1924  			}
  1925  
  1926  			s.Grow(p.Pc + int64(m))
  1927  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1928  			c += int32(m)
  1929  		}
  1930  
  1931  		n++
  1932  		if n > 20 {
  1933  			ctxt.Diag("span must be looping")
  1934  			log.Fatalf("loop")
  1935  		}
  1936  		if loop == 0 {
  1937  			break
  1938  		}
  1939  		if ctxt.Errors > errors {
  1940  			return
  1941  		}
  1942  	}
  1943  
  1944  	if ctxt.Headtype == objabi.Hnacl {
  1945  		c = naclpad(ctxt, s, c, -c&31)
  1946  	}
  1947  
  1948  	s.Size = int64(c)
  1949  
  1950  	if false { /* debug['a'] > 1 */
  1951  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1952  		var i int
  1953  		for i = 0; i < len(s.P); i++ {
  1954  			fmt.Printf(" %.2x", s.P[i])
  1955  			if i%16 == 15 {
  1956  				fmt.Printf("\n  %.6x", uint(i+1))
  1957  			}
  1958  		}
  1959  
  1960  		if i%16 != 0 {
  1961  			fmt.Printf("\n")
  1962  		}
  1963  
  1964  		for i := 0; i < len(s.R); i++ {
  1965  			r := &s.R[i]
  1966  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1967  		}
  1968  	}
  1969  }
  1970  
  1971  func instinit(ctxt *obj.Link) {
  1972  	if ycover[0] != 0 {
  1973  		// Already initialized; stop now.
  1974  		// This happens in the cmd/asm tests,
  1975  		// each of which re-initializes the arch.
  1976  		return
  1977  	}
  1978  
  1979  	switch ctxt.Headtype {
  1980  	case objabi.Hplan9:
  1981  		plan9privates = ctxt.Lookup("_privates")
  1982  	case objabi.Hnacl:
  1983  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  1984  	}
  1985  
  1986  	for i := 1; optab[i].as != 0; i++ {
  1987  		c := optab[i].as
  1988  		if opindex[c&obj.AMask] != nil {
  1989  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  1990  		}
  1991  		opindex[c&obj.AMask] = &optab[i]
  1992  	}
  1993  
  1994  	for i := 0; i < Ymax; i++ {
  1995  		ycover[i*Ymax+i] = 1
  1996  	}
  1997  
  1998  	ycover[Yi0*Ymax+Yi8] = 1
  1999  	ycover[Yi1*Ymax+Yi8] = 1
  2000  	ycover[Yu7*Ymax+Yi8] = 1
  2001  
  2002  	ycover[Yi0*Ymax+Yu7] = 1
  2003  	ycover[Yi1*Ymax+Yu7] = 1
  2004  
  2005  	ycover[Yi0*Ymax+Yu8] = 1
  2006  	ycover[Yi1*Ymax+Yu8] = 1
  2007  	ycover[Yu7*Ymax+Yu8] = 1
  2008  
  2009  	ycover[Yi0*Ymax+Ys32] = 1
  2010  	ycover[Yi1*Ymax+Ys32] = 1
  2011  	ycover[Yu7*Ymax+Ys32] = 1
  2012  	ycover[Yu8*Ymax+Ys32] = 1
  2013  	ycover[Yi8*Ymax+Ys32] = 1
  2014  
  2015  	ycover[Yi0*Ymax+Yi32] = 1
  2016  	ycover[Yi1*Ymax+Yi32] = 1
  2017  	ycover[Yu7*Ymax+Yi32] = 1
  2018  	ycover[Yu8*Ymax+Yi32] = 1
  2019  	ycover[Yi8*Ymax+Yi32] = 1
  2020  	ycover[Ys32*Ymax+Yi32] = 1
  2021  
  2022  	ycover[Yi0*Ymax+Yi64] = 1
  2023  	ycover[Yi1*Ymax+Yi64] = 1
  2024  	ycover[Yu7*Ymax+Yi64] = 1
  2025  	ycover[Yu8*Ymax+Yi64] = 1
  2026  	ycover[Yi8*Ymax+Yi64] = 1
  2027  	ycover[Ys32*Ymax+Yi64] = 1
  2028  	ycover[Yi32*Ymax+Yi64] = 1
  2029  
  2030  	ycover[Yal*Ymax+Yrb] = 1
  2031  	ycover[Ycl*Ymax+Yrb] = 1
  2032  	ycover[Yax*Ymax+Yrb] = 1
  2033  	ycover[Ycx*Ymax+Yrb] = 1
  2034  	ycover[Yrx*Ymax+Yrb] = 1
  2035  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2036  
  2037  	ycover[Ycl*Ymax+Ycx] = 1
  2038  
  2039  	ycover[Yax*Ymax+Yrx] = 1
  2040  	ycover[Ycx*Ymax+Yrx] = 1
  2041  
  2042  	ycover[Yax*Ymax+Yrl] = 1
  2043  	ycover[Ycx*Ymax+Yrl] = 1
  2044  	ycover[Yrx*Ymax+Yrl] = 1
  2045  	ycover[Yrl32*Ymax+Yrl] = 1
  2046  
  2047  	ycover[Yf0*Ymax+Yrf] = 1
  2048  
  2049  	ycover[Yal*Ymax+Ymb] = 1
  2050  	ycover[Ycl*Ymax+Ymb] = 1
  2051  	ycover[Yax*Ymax+Ymb] = 1
  2052  	ycover[Ycx*Ymax+Ymb] = 1
  2053  	ycover[Yrx*Ymax+Ymb] = 1
  2054  	ycover[Yrb*Ymax+Ymb] = 1
  2055  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2056  	ycover[Ym*Ymax+Ymb] = 1
  2057  
  2058  	ycover[Yax*Ymax+Yml] = 1
  2059  	ycover[Ycx*Ymax+Yml] = 1
  2060  	ycover[Yrx*Ymax+Yml] = 1
  2061  	ycover[Yrl*Ymax+Yml] = 1
  2062  	ycover[Yrl32*Ymax+Yml] = 1
  2063  	ycover[Ym*Ymax+Yml] = 1
  2064  
  2065  	ycover[Yax*Ymax+Ymm] = 1
  2066  	ycover[Ycx*Ymax+Ymm] = 1
  2067  	ycover[Yrx*Ymax+Ymm] = 1
  2068  	ycover[Yrl*Ymax+Ymm] = 1
  2069  	ycover[Yrl32*Ymax+Ymm] = 1
  2070  	ycover[Ym*Ymax+Ymm] = 1
  2071  	ycover[Ymr*Ymax+Ymm] = 1
  2072  
  2073  	ycover[Ym*Ymax+Yxm] = 1
  2074  	ycover[Yxr*Ymax+Yxm] = 1
  2075  
  2076  	ycover[Ym*Ymax+Yym] = 1
  2077  	ycover[Yyr*Ymax+Yym] = 1
  2078  
  2079  	for i := 0; i < MAXREG; i++ {
  2080  		reg[i] = -1
  2081  		if i >= REG_AL && i <= REG_R15B {
  2082  			reg[i] = (i - REG_AL) & 7
  2083  			if i >= REG_SPB && i <= REG_DIB {
  2084  				regrex[i] = 0x40
  2085  			}
  2086  			if i >= REG_R8B && i <= REG_R15B {
  2087  				regrex[i] = Rxr | Rxx | Rxb
  2088  			}
  2089  		}
  2090  
  2091  		if i >= REG_AH && i <= REG_BH {
  2092  			reg[i] = 4 + ((i - REG_AH) & 7)
  2093  		}
  2094  		if i >= REG_AX && i <= REG_R15 {
  2095  			reg[i] = (i - REG_AX) & 7
  2096  			if i >= REG_R8 {
  2097  				regrex[i] = Rxr | Rxx | Rxb
  2098  			}
  2099  		}
  2100  
  2101  		if i >= REG_F0 && i <= REG_F0+7 {
  2102  			reg[i] = (i - REG_F0) & 7
  2103  		}
  2104  		if i >= REG_M0 && i <= REG_M0+7 {
  2105  			reg[i] = (i - REG_M0) & 7
  2106  		}
  2107  		if i >= REG_X0 && i <= REG_X0+15 {
  2108  			reg[i] = (i - REG_X0) & 7
  2109  			if i >= REG_X0+8 {
  2110  				regrex[i] = Rxr | Rxx | Rxb
  2111  			}
  2112  		}
  2113  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2114  			reg[i] = (i - REG_Y0) & 7
  2115  			if i >= REG_Y0+8 {
  2116  				regrex[i] = Rxr | Rxx | Rxb
  2117  			}
  2118  		}
  2119  
  2120  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2121  			regrex[i] = Rxr
  2122  		}
  2123  	}
  2124  }
  2125  
  2126  var isAndroid = (objabi.GOOS == "android")
  2127  
  2128  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2129  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2130  		return 0
  2131  	}
  2132  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2133  		switch a.Reg {
  2134  		case REG_CS:
  2135  			return 0x2e
  2136  
  2137  		case REG_DS:
  2138  			return 0x3e
  2139  
  2140  		case REG_ES:
  2141  			return 0x26
  2142  
  2143  		case REG_FS:
  2144  			return 0x64
  2145  
  2146  		case REG_GS:
  2147  			return 0x65
  2148  
  2149  		case REG_TLS:
  2150  			// NOTE: Systems listed here should be only systems that
  2151  			// support direct TLS references like 8(TLS) implemented as
  2152  			// direct references from FS or GS. Systems that require
  2153  			// the initial-exec model, where you load the TLS base into
  2154  			// a register and then index from that register, do not reach
  2155  			// this code and should not be listed.
  2156  			if ctxt.Arch.Family == sys.I386 {
  2157  				switch ctxt.Headtype {
  2158  				default:
  2159  					if isAndroid {
  2160  						return 0x65 // GS
  2161  					}
  2162  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2163  
  2164  				case objabi.Hdarwin,
  2165  					objabi.Hdragonfly,
  2166  					objabi.Hfreebsd,
  2167  					objabi.Hnetbsd,
  2168  					objabi.Hopenbsd:
  2169  					return 0x65 // GS
  2170  				}
  2171  			}
  2172  
  2173  			switch ctxt.Headtype {
  2174  			default:
  2175  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2176  
  2177  			case objabi.Hlinux:
  2178  				if isAndroid {
  2179  					return 0x64 // FS
  2180  				}
  2181  
  2182  				if ctxt.Flag_shared {
  2183  					log.Fatalf("unknown TLS base register for linux with -shared")
  2184  				} else {
  2185  					return 0x64 // FS
  2186  				}
  2187  
  2188  			case objabi.Hdragonfly,
  2189  				objabi.Hfreebsd,
  2190  				objabi.Hnetbsd,
  2191  				objabi.Hopenbsd,
  2192  				objabi.Hsolaris:
  2193  				return 0x64 // FS
  2194  
  2195  			case objabi.Hdarwin:
  2196  				return 0x65 // GS
  2197  			}
  2198  		}
  2199  	}
  2200  
  2201  	if ctxt.Arch.Family == sys.I386 {
  2202  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2203  			// When building for inclusion into a shared library, an instruction of the form
  2204  			//     MOVL 0(CX)(TLS*1), AX
  2205  			// becomes
  2206  			//     mov %gs:(%ecx), %eax
  2207  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2208  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2209  			// a shared library the instruction it becomes
  2210  			//     mov 0x0(%ecx), $eax
  2211  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2212  			if a.Offset != 0 {
  2213  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2214  			}
  2215  			return 0x65 // GS
  2216  		}
  2217  		return 0
  2218  	}
  2219  
  2220  	switch a.Index {
  2221  	case REG_CS:
  2222  		return 0x2e
  2223  
  2224  	case REG_DS:
  2225  		return 0x3e
  2226  
  2227  	case REG_ES:
  2228  		return 0x26
  2229  
  2230  	case REG_TLS:
  2231  		if ctxt.Flag_shared {
  2232  			// When building for inclusion into a shared library, an instruction of the form
  2233  			//     MOV 0(CX)(TLS*1), AX
  2234  			// becomes
  2235  			//     mov %fs:(%rcx), %rax
  2236  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2237  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2238  			// a shared library the instruction does not require a prefix.
  2239  			if a.Offset != 0 {
  2240  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2241  			}
  2242  			return 0x64
  2243  		}
  2244  
  2245  	case REG_FS:
  2246  		return 0x64
  2247  
  2248  	case REG_GS:
  2249  		return 0x65
  2250  	}
  2251  
  2252  	return 0
  2253  }
  2254  
  2255  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2256  	switch a.Type {
  2257  	case obj.TYPE_NONE:
  2258  		return Ynone
  2259  
  2260  	case obj.TYPE_BRANCH:
  2261  		return Ybr
  2262  
  2263  	case obj.TYPE_INDIR:
  2264  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2265  			return Yindir
  2266  		}
  2267  		return Yxxx
  2268  
  2269  	case obj.TYPE_MEM:
  2270  		if a.Index == REG_SP {
  2271  			// Can't use SP as the index register
  2272  			return Yxxx
  2273  		}
  2274  		if ctxt.Arch.Family == sys.AMD64 {
  2275  			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
  2276  			// where the sign extension doesn't matter).
  2277  			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
  2278  			if !(a.Offset == int64(int32(a.Offset)) ||
  2279  				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
  2280  				return Yxxx
  2281  			}
  2282  			switch a.Name {
  2283  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2284  				// Global variables can't use index registers and their
  2285  				// base register is %rip (%rip is encoded as REG_NONE).
  2286  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2287  					return Yxxx
  2288  				}
  2289  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2290  				// These names must have a base of SP.  The old compiler
  2291  				// uses 0 for the base register. SSA uses REG_SP.
  2292  				if a.Reg != REG_SP && a.Reg != 0 {
  2293  					return Yxxx
  2294  				}
  2295  			case obj.NAME_NONE:
  2296  				// everything is ok
  2297  			default:
  2298  				// unknown name
  2299  				return Yxxx
  2300  			}
  2301  		}
  2302  		return Ym
  2303  
  2304  	case obj.TYPE_ADDR:
  2305  		switch a.Name {
  2306  		case obj.NAME_GOTREF:
  2307  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2308  			return Yxxx
  2309  
  2310  		case obj.NAME_EXTERN,
  2311  			obj.NAME_STATIC:
  2312  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2313  				return Yi32
  2314  			}
  2315  			return Yiauto // use pc-relative addressing
  2316  
  2317  		case obj.NAME_AUTO,
  2318  			obj.NAME_PARAM:
  2319  			return Yiauto
  2320  		}
  2321  
  2322  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2323  		// and got Yi32 in an earlier version of this code.
  2324  		// Keep doing that until we fix yduff etc.
  2325  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2326  			return Yi32
  2327  		}
  2328  
  2329  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2330  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2331  		}
  2332  		fallthrough
  2333  
  2334  		// fall through
  2335  
  2336  	case obj.TYPE_CONST:
  2337  		if a.Sym != nil {
  2338  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2339  		}
  2340  
  2341  		v := a.Offset
  2342  		if ctxt.Arch.Family == sys.I386 {
  2343  			v = int64(int32(v))
  2344  		}
  2345  		if v == 0 {
  2346  			if p.Mark&PRESERVEFLAGS != 0 {
  2347  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2348  				return Yu7
  2349  			}
  2350  			return Yi0
  2351  		}
  2352  		if v == 1 {
  2353  			return Yi1
  2354  		}
  2355  		if v >= 0 && v <= 127 {
  2356  			return Yu7
  2357  		}
  2358  		if v >= 0 && v <= 255 {
  2359  			return Yu8
  2360  		}
  2361  		if v >= -128 && v <= 127 {
  2362  			return Yi8
  2363  		}
  2364  		if ctxt.Arch.Family == sys.I386 {
  2365  			return Yi32
  2366  		}
  2367  		l := int32(v)
  2368  		if int64(l) == v {
  2369  			return Ys32 /* can sign extend */
  2370  		}
  2371  		if v>>32 == 0 {
  2372  			return Yi32 /* unsigned */
  2373  		}
  2374  		return Yi64
  2375  
  2376  	case obj.TYPE_TEXTSIZE:
  2377  		return Ytextsize
  2378  	}
  2379  
  2380  	if a.Type != obj.TYPE_REG {
  2381  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2382  		return Yxxx
  2383  	}
  2384  
  2385  	switch a.Reg {
  2386  	case REG_AL:
  2387  		return Yal
  2388  
  2389  	case REG_AX:
  2390  		return Yax
  2391  
  2392  		/*
  2393  			case REG_SPB:
  2394  		*/
  2395  	case REG_BPB,
  2396  		REG_SIB,
  2397  		REG_DIB,
  2398  		REG_R8B,
  2399  		REG_R9B,
  2400  		REG_R10B,
  2401  		REG_R11B,
  2402  		REG_R12B,
  2403  		REG_R13B,
  2404  		REG_R14B,
  2405  		REG_R15B:
  2406  		if ctxt.Arch.Family == sys.I386 {
  2407  			return Yxxx
  2408  		}
  2409  		fallthrough
  2410  
  2411  	case REG_DL,
  2412  		REG_BL,
  2413  		REG_AH,
  2414  		REG_CH,
  2415  		REG_DH,
  2416  		REG_BH:
  2417  		return Yrb
  2418  
  2419  	case REG_CL:
  2420  		return Ycl
  2421  
  2422  	case REG_CX:
  2423  		return Ycx
  2424  
  2425  	case REG_DX, REG_BX:
  2426  		return Yrx
  2427  
  2428  	case REG_R8, /* not really Yrl */
  2429  		REG_R9,
  2430  		REG_R10,
  2431  		REG_R11,
  2432  		REG_R12,
  2433  		REG_R13,
  2434  		REG_R14,
  2435  		REG_R15:
  2436  		if ctxt.Arch.Family == sys.I386 {
  2437  			return Yxxx
  2438  		}
  2439  		fallthrough
  2440  
  2441  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2442  		if ctxt.Arch.Family == sys.I386 {
  2443  			return Yrl32
  2444  		}
  2445  		return Yrl
  2446  
  2447  	case REG_F0 + 0:
  2448  		return Yf0
  2449  
  2450  	case REG_F0 + 1,
  2451  		REG_F0 + 2,
  2452  		REG_F0 + 3,
  2453  		REG_F0 + 4,
  2454  		REG_F0 + 5,
  2455  		REG_F0 + 6,
  2456  		REG_F0 + 7:
  2457  		return Yrf
  2458  
  2459  	case REG_M0 + 0,
  2460  		REG_M0 + 1,
  2461  		REG_M0 + 2,
  2462  		REG_M0 + 3,
  2463  		REG_M0 + 4,
  2464  		REG_M0 + 5,
  2465  		REG_M0 + 6,
  2466  		REG_M0 + 7:
  2467  		return Ymr
  2468  
  2469  	case REG_X0 + 0,
  2470  		REG_X0 + 1,
  2471  		REG_X0 + 2,
  2472  		REG_X0 + 3,
  2473  		REG_X0 + 4,
  2474  		REG_X0 + 5,
  2475  		REG_X0 + 6,
  2476  		REG_X0 + 7,
  2477  		REG_X0 + 8,
  2478  		REG_X0 + 9,
  2479  		REG_X0 + 10,
  2480  		REG_X0 + 11,
  2481  		REG_X0 + 12,
  2482  		REG_X0 + 13,
  2483  		REG_X0 + 14,
  2484  		REG_X0 + 15:
  2485  		return Yxr
  2486  
  2487  	case REG_Y0 + 0,
  2488  		REG_Y0 + 1,
  2489  		REG_Y0 + 2,
  2490  		REG_Y0 + 3,
  2491  		REG_Y0 + 4,
  2492  		REG_Y0 + 5,
  2493  		REG_Y0 + 6,
  2494  		REG_Y0 + 7,
  2495  		REG_Y0 + 8,
  2496  		REG_Y0 + 9,
  2497  		REG_Y0 + 10,
  2498  		REG_Y0 + 11,
  2499  		REG_Y0 + 12,
  2500  		REG_Y0 + 13,
  2501  		REG_Y0 + 14,
  2502  		REG_Y0 + 15:
  2503  		return Yyr
  2504  
  2505  	case REG_CS:
  2506  		return Ycs
  2507  	case REG_SS:
  2508  		return Yss
  2509  	case REG_DS:
  2510  		return Yds
  2511  	case REG_ES:
  2512  		return Yes
  2513  	case REG_FS:
  2514  		return Yfs
  2515  	case REG_GS:
  2516  		return Ygs
  2517  	case REG_TLS:
  2518  		return Ytls
  2519  
  2520  	case REG_GDTR:
  2521  		return Ygdtr
  2522  	case REG_IDTR:
  2523  		return Yidtr
  2524  	case REG_LDTR:
  2525  		return Yldtr
  2526  	case REG_MSW:
  2527  		return Ymsw
  2528  	case REG_TASK:
  2529  		return Ytask
  2530  
  2531  	case REG_CR + 0:
  2532  		return Ycr0
  2533  	case REG_CR + 1:
  2534  		return Ycr1
  2535  	case REG_CR + 2:
  2536  		return Ycr2
  2537  	case REG_CR + 3:
  2538  		return Ycr3
  2539  	case REG_CR + 4:
  2540  		return Ycr4
  2541  	case REG_CR + 5:
  2542  		return Ycr5
  2543  	case REG_CR + 6:
  2544  		return Ycr6
  2545  	case REG_CR + 7:
  2546  		return Ycr7
  2547  	case REG_CR + 8:
  2548  		return Ycr8
  2549  
  2550  	case REG_DR + 0:
  2551  		return Ydr0
  2552  	case REG_DR + 1:
  2553  		return Ydr1
  2554  	case REG_DR + 2:
  2555  		return Ydr2
  2556  	case REG_DR + 3:
  2557  		return Ydr3
  2558  	case REG_DR + 4:
  2559  		return Ydr4
  2560  	case REG_DR + 5:
  2561  		return Ydr5
  2562  	case REG_DR + 6:
  2563  		return Ydr6
  2564  	case REG_DR + 7:
  2565  		return Ydr7
  2566  
  2567  	case REG_TR + 0:
  2568  		return Ytr0
  2569  	case REG_TR + 1:
  2570  		return Ytr1
  2571  	case REG_TR + 2:
  2572  		return Ytr2
  2573  	case REG_TR + 3:
  2574  		return Ytr3
  2575  	case REG_TR + 4:
  2576  		return Ytr4
  2577  	case REG_TR + 5:
  2578  		return Ytr5
  2579  	case REG_TR + 6:
  2580  		return Ytr6
  2581  	case REG_TR + 7:
  2582  		return Ytr7
  2583  	}
  2584  
  2585  	return Yxxx
  2586  }
  2587  
  2588  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2589  // and hold assembly state.
  2590  type AsmBuf struct {
  2591  	buf     [100]byte
  2592  	off     int
  2593  	rexflag int
  2594  	vexflag int
  2595  	rep     int
  2596  	repn    int
  2597  	lock    bool
  2598  }
  2599  
  2600  // Put1 appends one byte to the end of the buffer.
  2601  func (a *AsmBuf) Put1(x byte) {
  2602  	a.buf[a.off] = x
  2603  	a.off++
  2604  }
  2605  
  2606  // Put2 appends two bytes to the end of the buffer.
  2607  func (a *AsmBuf) Put2(x, y byte) {
  2608  	a.buf[a.off+0] = x
  2609  	a.buf[a.off+1] = y
  2610  	a.off += 2
  2611  }
  2612  
  2613  // Put3 appends three bytes to the end of the buffer.
  2614  func (a *AsmBuf) Put3(x, y, z byte) {
  2615  	a.buf[a.off+0] = x
  2616  	a.buf[a.off+1] = y
  2617  	a.buf[a.off+2] = z
  2618  	a.off += 3
  2619  }
  2620  
  2621  // Put4 appends four bytes to the end of the buffer.
  2622  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2623  	a.buf[a.off+0] = x
  2624  	a.buf[a.off+1] = y
  2625  	a.buf[a.off+2] = z
  2626  	a.buf[a.off+3] = w
  2627  	a.off += 4
  2628  }
  2629  
  2630  // PutInt16 writes v into the buffer using little-endian encoding.
  2631  func (a *AsmBuf) PutInt16(v int16) {
  2632  	a.buf[a.off+0] = byte(v)
  2633  	a.buf[a.off+1] = byte(v >> 8)
  2634  	a.off += 2
  2635  }
  2636  
  2637  // PutInt32 writes v into the buffer using little-endian encoding.
  2638  func (a *AsmBuf) PutInt32(v int32) {
  2639  	a.buf[a.off+0] = byte(v)
  2640  	a.buf[a.off+1] = byte(v >> 8)
  2641  	a.buf[a.off+2] = byte(v >> 16)
  2642  	a.buf[a.off+3] = byte(v >> 24)
  2643  	a.off += 4
  2644  }
  2645  
  2646  // PutInt64 writes v into the buffer using little-endian encoding.
  2647  func (a *AsmBuf) PutInt64(v int64) {
  2648  	a.buf[a.off+0] = byte(v)
  2649  	a.buf[a.off+1] = byte(v >> 8)
  2650  	a.buf[a.off+2] = byte(v >> 16)
  2651  	a.buf[a.off+3] = byte(v >> 24)
  2652  	a.buf[a.off+4] = byte(v >> 32)
  2653  	a.buf[a.off+5] = byte(v >> 40)
  2654  	a.buf[a.off+6] = byte(v >> 48)
  2655  	a.buf[a.off+7] = byte(v >> 56)
  2656  	a.off += 8
  2657  }
  2658  
  2659  // Put copies b into the buffer.
  2660  func (a *AsmBuf) Put(b []byte) {
  2661  	copy(a.buf[a.off:], b)
  2662  	a.off += len(b)
  2663  }
  2664  
  2665  // Insert inserts b at offset i.
  2666  func (a *AsmBuf) Insert(i int, b byte) {
  2667  	a.off++
  2668  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2669  	a.buf[i] = b
  2670  }
  2671  
  2672  // Last returns the byte at the end of the buffer.
  2673  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2674  
  2675  // Len returns the length of the buffer.
  2676  func (a *AsmBuf) Len() int { return a.off }
  2677  
  2678  // Bytes returns the contents of the buffer.
  2679  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2680  
  2681  // Reset empties the buffer.
  2682  func (a *AsmBuf) Reset() { a.off = 0 }
  2683  
  2684  // At returns the byte at offset i.
  2685  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2686  
  2687  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2688  	var i int
  2689  
  2690  	switch index {
  2691  	default:
  2692  		goto bad
  2693  
  2694  	case REG_NONE:
  2695  		i = 4 << 3
  2696  		goto bas
  2697  
  2698  	case REG_R8,
  2699  		REG_R9,
  2700  		REG_R10,
  2701  		REG_R11,
  2702  		REG_R12,
  2703  		REG_R13,
  2704  		REG_R14,
  2705  		REG_R15:
  2706  		if ctxt.Arch.Family == sys.I386 {
  2707  			goto bad
  2708  		}
  2709  		fallthrough
  2710  
  2711  	case REG_AX,
  2712  		REG_CX,
  2713  		REG_DX,
  2714  		REG_BX,
  2715  		REG_BP,
  2716  		REG_SI,
  2717  		REG_DI:
  2718  		i = reg[index] << 3
  2719  	}
  2720  
  2721  	switch scale {
  2722  	default:
  2723  		goto bad
  2724  
  2725  	case 1:
  2726  		break
  2727  
  2728  	case 2:
  2729  		i |= 1 << 6
  2730  
  2731  	case 4:
  2732  		i |= 2 << 6
  2733  
  2734  	case 8:
  2735  		i |= 3 << 6
  2736  	}
  2737  
  2738  bas:
  2739  	switch base {
  2740  	default:
  2741  		goto bad
  2742  
  2743  	case REG_NONE: /* must be mod=00 */
  2744  		i |= 5
  2745  
  2746  	case REG_R8,
  2747  		REG_R9,
  2748  		REG_R10,
  2749  		REG_R11,
  2750  		REG_R12,
  2751  		REG_R13,
  2752  		REG_R14,
  2753  		REG_R15:
  2754  		if ctxt.Arch.Family == sys.I386 {
  2755  			goto bad
  2756  		}
  2757  		fallthrough
  2758  
  2759  	case REG_AX,
  2760  		REG_CX,
  2761  		REG_DX,
  2762  		REG_BX,
  2763  		REG_SP,
  2764  		REG_BP,
  2765  		REG_SI,
  2766  		REG_DI:
  2767  		i |= reg[base]
  2768  	}
  2769  
  2770  	asmbuf.Put1(byte(i))
  2771  	return
  2772  
  2773  bad:
  2774  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2775  	asmbuf.Put1(0)
  2776  	return
  2777  }
  2778  
  2779  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2780  	var rel obj.Reloc
  2781  
  2782  	v := vaddr(ctxt, p, a, &rel)
  2783  	if rel.Siz != 0 {
  2784  		if rel.Siz != 4 {
  2785  			ctxt.Diag("bad reloc")
  2786  		}
  2787  		r := obj.Addrel(cursym)
  2788  		*r = rel
  2789  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2790  	}
  2791  
  2792  	asmbuf.PutInt32(int32(v))
  2793  }
  2794  
  2795  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2796  	if r != nil {
  2797  		*r = obj.Reloc{}
  2798  	}
  2799  
  2800  	switch a.Name {
  2801  	case obj.NAME_STATIC,
  2802  		obj.NAME_GOTREF,
  2803  		obj.NAME_EXTERN:
  2804  		s := a.Sym
  2805  		if r == nil {
  2806  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2807  			log.Fatalf("reloc")
  2808  		}
  2809  
  2810  		if a.Name == obj.NAME_GOTREF {
  2811  			r.Siz = 4
  2812  			r.Type = objabi.R_GOTPCREL
  2813  		} else if useAbs(ctxt, s) {
  2814  			r.Siz = 4
  2815  			r.Type = objabi.R_ADDR
  2816  		} else {
  2817  			r.Siz = 4
  2818  			r.Type = objabi.R_PCREL
  2819  		}
  2820  
  2821  		r.Off = -1 // caller must fill in
  2822  		r.Sym = s
  2823  		r.Add = a.Offset
  2824  
  2825  		return 0
  2826  	}
  2827  
  2828  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2829  		if r == nil {
  2830  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2831  			log.Fatalf("reloc")
  2832  		}
  2833  
  2834  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2835  			r.Type = objabi.R_TLS_LE
  2836  			r.Siz = 4
  2837  			r.Off = -1 // caller must fill in
  2838  			r.Add = a.Offset
  2839  		}
  2840  		return 0
  2841  	}
  2842  
  2843  	return a.Offset
  2844  }
  2845  
  2846  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2847  	var base int
  2848  	var rel obj.Reloc
  2849  
  2850  	rex &= 0x40 | Rxr
  2851  	switch {
  2852  	case int64(int32(a.Offset)) == a.Offset:
  2853  		// Offset fits in sign-extended 32 bits.
  2854  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2855  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2856  		// This is allowed for assembly that wants to use 32-bit hex
  2857  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2858  	default:
  2859  		ctxt.Diag("offset too large in %s", p)
  2860  	}
  2861  	v := int32(a.Offset)
  2862  	rel.Siz = 0
  2863  
  2864  	switch a.Type {
  2865  	case obj.TYPE_ADDR:
  2866  		if a.Name == obj.NAME_NONE {
  2867  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2868  		}
  2869  		if a.Index == REG_TLS {
  2870  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2871  		}
  2872  		goto bad
  2873  
  2874  	case obj.TYPE_REG:
  2875  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2876  			goto bad
  2877  		}
  2878  		if v != 0 {
  2879  			goto bad
  2880  		}
  2881  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2882  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2883  		return
  2884  	}
  2885  
  2886  	if a.Type != obj.TYPE_MEM {
  2887  		goto bad
  2888  	}
  2889  
  2890  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2891  		base := int(a.Reg)
  2892  		switch a.Name {
  2893  		case obj.NAME_EXTERN,
  2894  			obj.NAME_GOTREF,
  2895  			obj.NAME_STATIC:
  2896  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2897  				goto bad
  2898  			}
  2899  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2900  				// The base register has already been set. It holds the PC
  2901  				// of this instruction returned by a PC-reading thunk.
  2902  				// See obj6.go:rewriteToPcrel.
  2903  			} else {
  2904  				base = REG_NONE
  2905  			}
  2906  			v = int32(vaddr(ctxt, p, a, &rel))
  2907  
  2908  		case obj.NAME_AUTO,
  2909  			obj.NAME_PARAM:
  2910  			base = REG_SP
  2911  		}
  2912  
  2913  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2914  		if base == REG_NONE {
  2915  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2916  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2917  			goto putrelv
  2918  		}
  2919  
  2920  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2921  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2922  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2923  			return
  2924  		}
  2925  
  2926  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2927  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2928  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2929  			asmbuf.Put1(byte(v))
  2930  			return
  2931  		}
  2932  
  2933  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2934  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2935  		goto putrelv
  2936  	}
  2937  
  2938  	base = int(a.Reg)
  2939  	switch a.Name {
  2940  	case obj.NAME_STATIC,
  2941  		obj.NAME_GOTREF,
  2942  		obj.NAME_EXTERN:
  2943  		if a.Sym == nil {
  2944  			ctxt.Diag("bad addr: %v", p)
  2945  		}
  2946  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2947  			// The base register has already been set. It holds the PC
  2948  			// of this instruction returned by a PC-reading thunk.
  2949  			// See obj6.go:rewriteToPcrel.
  2950  		} else {
  2951  			base = REG_NONE
  2952  		}
  2953  		v = int32(vaddr(ctxt, p, a, &rel))
  2954  
  2955  	case obj.NAME_AUTO,
  2956  		obj.NAME_PARAM:
  2957  		base = REG_SP
  2958  	}
  2959  
  2960  	if base == REG_TLS {
  2961  		v = int32(vaddr(ctxt, p, a, &rel))
  2962  	}
  2963  
  2964  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  2965  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2966  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  2967  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2968  				ctxt.Diag("%v has offset against gotref", p)
  2969  			}
  2970  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2971  			goto putrelv
  2972  		}
  2973  
  2974  		// temporary
  2975  		asmbuf.Put2(
  2976  			byte(0<<6|4<<0|r<<3), // sib present
  2977  			0<<6|4<<3|5<<0,       // DS:d32
  2978  		)
  2979  		goto putrelv
  2980  	}
  2981  
  2982  	if base == REG_SP || base == REG_R12 {
  2983  		if v == 0 {
  2984  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2985  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2986  			return
  2987  		}
  2988  
  2989  		if v >= -128 && v < 128 {
  2990  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2991  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2992  			asmbuf.Put1(byte(v))
  2993  			return
  2994  		}
  2995  
  2996  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2997  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2998  		goto putrelv
  2999  	}
  3000  
  3001  	if REG_AX <= base && base <= REG_R15 {
  3002  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3003  			rel = obj.Reloc{}
  3004  			rel.Type = objabi.R_TLS_LE
  3005  			rel.Siz = 4
  3006  			rel.Sym = nil
  3007  			rel.Add = int64(v)
  3008  			v = 0
  3009  		}
  3010  
  3011  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3012  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3013  			return
  3014  		}
  3015  
  3016  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3017  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3018  			return
  3019  		}
  3020  
  3021  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3022  		goto putrelv
  3023  	}
  3024  
  3025  	goto bad
  3026  
  3027  putrelv:
  3028  	if rel.Siz != 0 {
  3029  		if rel.Siz != 4 {
  3030  			ctxt.Diag("bad rel")
  3031  			goto bad
  3032  		}
  3033  
  3034  		r := obj.Addrel(cursym)
  3035  		*r = rel
  3036  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3037  	}
  3038  
  3039  	asmbuf.PutInt32(v)
  3040  	return
  3041  
  3042  bad:
  3043  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3044  	return
  3045  }
  3046  
  3047  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3048  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3049  }
  3050  
  3051  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3052  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3053  }
  3054  
  3055  func bytereg(a *obj.Addr, t *uint8) {
  3056  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3057  		a.Reg += REG_AL - REG_AX
  3058  		*t = 0
  3059  	}
  3060  }
  3061  
  3062  func unbytereg(a *obj.Addr, t *uint8) {
  3063  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3064  		a.Reg += REG_AX - REG_AL
  3065  		*t = 0
  3066  	}
  3067  }
  3068  
  3069  const (
  3070  	E = 0xff
  3071  )
  3072  
  3073  var ymovtab = []Movtab{
  3074  	/* push */
  3075  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3076  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3077  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3078  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3079  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3080  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3081  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3082  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3083  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3084  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3085  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3086  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3087  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3088  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3089  
  3090  	/* pop */
  3091  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3092  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3093  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3094  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3095  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3096  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3097  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3098  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3099  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3100  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3101  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3102  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3103  
  3104  	/* mov seg */
  3105  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3106  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3107  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3108  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3109  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3110  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3111  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3112  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3113  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3114  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3115  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3116  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3117  
  3118  	/* mov cr */
  3119  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3120  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3121  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3122  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3123  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3124  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3125  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3126  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3127  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3128  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3129  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3130  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3131  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3132  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3133  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3134  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3135  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3136  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3137  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3138  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3139  
  3140  	/* mov dr */
  3141  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3142  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3143  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3144  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3145  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3146  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3147  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3148  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3149  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3150  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3151  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3152  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3153  
  3154  	/* mov tr */
  3155  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3156  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3157  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3158  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3159  
  3160  	/* lgdt, sgdt, lidt, sidt */
  3161  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3162  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3163  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3164  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3165  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3166  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3167  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3168  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3169  
  3170  	/* lldt, sldt */
  3171  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3172  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3173  
  3174  	/* lmsw, smsw */
  3175  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3176  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3177  
  3178  	/* ltr, str */
  3179  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3180  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3181  
  3182  	/* load full pointer - unsupported
  3183  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3184  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3185  	*/
  3186  
  3187  	/* double shift */
  3188  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3189  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3190  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3191  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3192  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3193  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3194  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3195  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3196  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3197  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3198  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3199  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3200  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3201  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3202  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3203  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3204  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3205  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3206  
  3207  	/* load TLS base */
  3208  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3209  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3210  	{0, 0, 0, 0, 0, [4]uint8{}},
  3211  }
  3212  
  3213  func isax(a *obj.Addr) bool {
  3214  	switch a.Reg {
  3215  	case REG_AX, REG_AL, REG_AH:
  3216  		return true
  3217  	}
  3218  
  3219  	if a.Index == REG_AX {
  3220  		return true
  3221  	}
  3222  	return false
  3223  }
  3224  
  3225  func subreg(p *obj.Prog, from int, to int) {
  3226  	if false { /* debug['Q'] */
  3227  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3228  	}
  3229  
  3230  	if int(p.From.Reg) == from {
  3231  		p.From.Reg = int16(to)
  3232  		p.Ft = 0
  3233  	}
  3234  
  3235  	if int(p.To.Reg) == from {
  3236  		p.To.Reg = int16(to)
  3237  		p.Tt = 0
  3238  	}
  3239  
  3240  	if int(p.From.Index) == from {
  3241  		p.From.Index = int16(to)
  3242  		p.Ft = 0
  3243  	}
  3244  
  3245  	if int(p.To.Index) == from {
  3246  		p.To.Index = int16(to)
  3247  		p.Tt = 0
  3248  	}
  3249  
  3250  	if false { /* debug['Q'] */
  3251  		fmt.Printf("%v\n", p)
  3252  	}
  3253  }
  3254  
  3255  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3256  	switch op {
  3257  	case Pm, Pe, Pf2, Pf3:
  3258  		if osize != 1 {
  3259  			if op != Pm {
  3260  				asmbuf.Put1(byte(op))
  3261  			}
  3262  			asmbuf.Put1(Pm)
  3263  			z++
  3264  			op = int(o.op[z])
  3265  			break
  3266  		}
  3267  		fallthrough
  3268  
  3269  	default:
  3270  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3271  			asmbuf.Put1(Pm)
  3272  		}
  3273  	}
  3274  
  3275  	asmbuf.Put1(byte(op))
  3276  	return z
  3277  }
  3278  
  3279  var bpduff1 = []byte{
  3280  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3281  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3282  }
  3283  
  3284  var bpduff2 = []byte{
  3285  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3286  }
  3287  
  3288  // Emit VEX prefix and opcode byte.
  3289  // The three addresses are the r/m, vvvv, and reg fields.
  3290  // The reg and rm arguments appear in the same order as the
  3291  // arguments to asmand, which typically follows the call to asmvex.
  3292  // The final two arguments are the VEX prefix (see encoding above)
  3293  // and the opcode byte.
  3294  // For details about vex prefix see:
  3295  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3296  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3297  	asmbuf.vexflag = 1
  3298  	rexR := 0
  3299  	if r != nil {
  3300  		rexR = regrex[r.Reg] & Rxr
  3301  	}
  3302  	rexB := 0
  3303  	rexX := 0
  3304  	if rm != nil {
  3305  		rexB = regrex[rm.Reg] & Rxb
  3306  		rexX = regrex[rm.Index] & Rxx
  3307  	}
  3308  	vexM := (vex >> 3) & 0xF
  3309  	vexWLP := vex & 0x87
  3310  	vexV := byte(0)
  3311  	if v != nil {
  3312  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3313  	}
  3314  	vexV ^= 0xF
  3315  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3316  		// Can use 2-byte encoding.
  3317  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3318  	} else {
  3319  		// Must use 3-byte encoding.
  3320  		asmbuf.Put3(0xc4,
  3321  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3322  			vexV<<3|vexWLP,
  3323  		)
  3324  	}
  3325  	asmbuf.Put1(opcode)
  3326  }
  3327  
  3328  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3329  	o := opindex[p.As&obj.AMask]
  3330  
  3331  	if o == nil {
  3332  		ctxt.Diag("asmins: missing op %v", p)
  3333  		return
  3334  	}
  3335  
  3336  	pre := prefixof(ctxt, p, &p.From)
  3337  	if pre != 0 {
  3338  		asmbuf.Put1(byte(pre))
  3339  	}
  3340  	pre = prefixof(ctxt, p, &p.To)
  3341  	if pre != 0 {
  3342  		asmbuf.Put1(byte(pre))
  3343  	}
  3344  
  3345  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3346  	// which encodes as SHRQ $32(DX*0), AX.
  3347  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3348  	// Change encoding generated by assemblers and compilers and remove.
  3349  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3350  		p.From3 = new(obj.Addr)
  3351  		p.From3.Type = obj.TYPE_REG
  3352  		p.From3.Reg = p.From.Index
  3353  		p.From.Index = 0
  3354  	}
  3355  
  3356  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3357  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3358  	switch p.As {
  3359  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3360  		if p.From3Type() == obj.TYPE_NONE {
  3361  			p.From3 = new(obj.Addr)
  3362  			*p.From3 = p.From
  3363  			p.From = obj.Addr{}
  3364  			p.From.Type = obj.TYPE_CONST
  3365  			p.From.Offset = p.To.Offset
  3366  			p.To.Offset = 0
  3367  		}
  3368  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3369  		if p.From3Type() == obj.TYPE_NONE {
  3370  			p.From3 = new(obj.Addr)
  3371  			*p.From3 = p.To
  3372  			p.To = obj.Addr{}
  3373  			p.To.Type = obj.TYPE_CONST
  3374  			p.To.Offset = p.From3.Offset
  3375  			p.From3.Offset = 0
  3376  		}
  3377  	}
  3378  
  3379  	if p.Ft == 0 {
  3380  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3381  	}
  3382  	if p.Tt == 0 {
  3383  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3384  	}
  3385  
  3386  	ft := int(p.Ft) * Ymax
  3387  	f3t := Ynone * Ymax
  3388  	if p.From3 != nil {
  3389  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3390  	}
  3391  	tt := int(p.Tt) * Ymax
  3392  
  3393  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3394  	z := 0
  3395  	var a *obj.Addr
  3396  	var l int
  3397  	var op int
  3398  	var q *obj.Prog
  3399  	var r *obj.Reloc
  3400  	var rel obj.Reloc
  3401  	var v int64
  3402  	for i := range o.ytab {
  3403  		yt := &o.ytab[i]
  3404  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3405  			switch o.prefix {
  3406  			case Px1: /* first option valid only in 32-bit mode */
  3407  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3408  					z += int(yt.zoffset) + xo
  3409  					continue
  3410  				}
  3411  			case Pq: /* 16 bit escape and opcode escape */
  3412  				asmbuf.Put2(Pe, Pm)
  3413  
  3414  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3415  				asmbuf.rexflag |= Pw
  3416  				asmbuf.Put2(Pe, Pm)
  3417  
  3418  			case Pq4: /*  66 0F 38 */
  3419  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3420  
  3421  			case Pf2, /* xmm opcode escape */
  3422  				Pf3:
  3423  				asmbuf.Put2(o.prefix, Pm)
  3424  
  3425  			case Pef3:
  3426  				asmbuf.Put3(Pe, Pf3, Pm)
  3427  
  3428  			case Pfw: /* xmm opcode escape + REX.W */
  3429  				asmbuf.rexflag |= Pw
  3430  				asmbuf.Put2(Pf3, Pm)
  3431  
  3432  			case Pm: /* opcode escape */
  3433  				asmbuf.Put1(Pm)
  3434  
  3435  			case Pe: /* 16 bit escape */
  3436  				asmbuf.Put1(Pe)
  3437  
  3438  			case Pw: /* 64-bit escape */
  3439  				if ctxt.Arch.Family != sys.AMD64 {
  3440  					ctxt.Diag("asmins: illegal 64: %v", p)
  3441  				}
  3442  				asmbuf.rexflag |= Pw
  3443  
  3444  			case Pw8: /* 64-bit escape if z >= 8 */
  3445  				if z >= 8 {
  3446  					if ctxt.Arch.Family != sys.AMD64 {
  3447  						ctxt.Diag("asmins: illegal 64: %v", p)
  3448  					}
  3449  					asmbuf.rexflag |= Pw
  3450  				}
  3451  
  3452  			case Pb: /* botch */
  3453  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3454  					goto bad
  3455  				}
  3456  				// NOTE(rsc): This is probably safe to do always,
  3457  				// but when enabled it chooses different encodings
  3458  				// than the old cmd/internal/obj/i386 code did,
  3459  				// which breaks our "same bits out" checks.
  3460  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3461  				// in the original obj/i386, and it would encode
  3462  				// (using a valid, shorter form) as 3c 00 if we enabled
  3463  				// the call to bytereg here.
  3464  				if ctxt.Arch.Family == sys.AMD64 {
  3465  					bytereg(&p.From, &p.Ft)
  3466  					bytereg(&p.To, &p.Tt)
  3467  				}
  3468  
  3469  			case P32: /* 32 bit but illegal if 64-bit mode */
  3470  				if ctxt.Arch.Family == sys.AMD64 {
  3471  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3472  				}
  3473  
  3474  			case Py: /* 64-bit only, no prefix */
  3475  				if ctxt.Arch.Family != sys.AMD64 {
  3476  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3477  				}
  3478  
  3479  			case Py1: /* 64-bit only if z < 1, no prefix */
  3480  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3481  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3482  				}
  3483  
  3484  			case Py3: /* 64-bit only if z < 3, no prefix */
  3485  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3486  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3487  				}
  3488  			}
  3489  
  3490  			if z >= len(o.op) {
  3491  				log.Fatalf("asmins bad table %v", p)
  3492  			}
  3493  			op = int(o.op[z])
  3494  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3495  			if op == 0x0f && o.prefix != Pvex {
  3496  				asmbuf.Put1(byte(op))
  3497  				z++
  3498  				op = int(o.op[z])
  3499  			}
  3500  
  3501  			switch yt.zcase {
  3502  			default:
  3503  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3504  				return
  3505  
  3506  			case Zpseudo:
  3507  				break
  3508  
  3509  			case Zlit:
  3510  				for ; ; z++ {
  3511  					op = int(o.op[z])
  3512  					if op == 0 {
  3513  						break
  3514  					}
  3515  					asmbuf.Put1(byte(op))
  3516  				}
  3517  
  3518  			case Zlitm_r:
  3519  				for ; ; z++ {
  3520  					op = int(o.op[z])
  3521  					if op == 0 {
  3522  						break
  3523  					}
  3524  					asmbuf.Put1(byte(op))
  3525  				}
  3526  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3527  
  3528  			case Zmb_r:
  3529  				bytereg(&p.From, &p.Ft)
  3530  				fallthrough
  3531  
  3532  			case Zm_r:
  3533  				asmbuf.Put1(byte(op))
  3534  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3535  
  3536  			case Zm2_r:
  3537  				asmbuf.Put2(byte(op), o.op[z+1])
  3538  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3539  
  3540  			case Zm_r_xm:
  3541  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3542  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3543  
  3544  			case Zm_r_xm_nr:
  3545  				asmbuf.rexflag = 0
  3546  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3547  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3548  
  3549  			case Zm_r_i_xm:
  3550  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3551  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.From3)
  3552  				asmbuf.Put1(byte(p.To.Offset))
  3553  
  3554  			case Zibm_r, Zibr_m:
  3555  				for {
  3556  					tmp1 := z
  3557  					z++
  3558  					op = int(o.op[tmp1])
  3559  					if op == 0 {
  3560  						break
  3561  					}
  3562  					asmbuf.Put1(byte(op))
  3563  				}
  3564  				if yt.zcase == Zibr_m {
  3565  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.From3)
  3566  				} else {
  3567  					asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3568  				}
  3569  				asmbuf.Put1(byte(p.From.Offset))
  3570  
  3571  			case Zaut_r:
  3572  				asmbuf.Put1(0x8d) // leal
  3573  				if p.From.Type != obj.TYPE_ADDR {
  3574  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3575  				}
  3576  				p.From.Type = obj.TYPE_MEM
  3577  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3578  				p.From.Type = obj.TYPE_ADDR
  3579  
  3580  			case Zm_o:
  3581  				asmbuf.Put1(byte(op))
  3582  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3583  
  3584  			case Zr_m:
  3585  				asmbuf.Put1(byte(op))
  3586  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3587  
  3588  			case Zvex_rm_v_r:
  3589  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3590  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3591  
  3592  			case Zvex_i_r_v:
  3593  				asmbuf.asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3594  				regnum := byte(0x7)
  3595  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3596  					regnum &= byte(p.From3.Reg - REG_X0)
  3597  				} else {
  3598  					regnum &= byte(p.From3.Reg - REG_Y0)
  3599  				}
  3600  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3601  				asmbuf.Put1(byte(p.From.Offset))
  3602  
  3603  			case Zvex_i_rm_v_r:
  3604  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3605  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3606  				asmbuf.Put1(byte(p.From3.Offset))
  3607  
  3608  			case Zvex_i_rm_r:
  3609  				asmbuf.asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3610  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3611  				asmbuf.Put1(byte(p.From.Offset))
  3612  
  3613  			case Zvex_v_rm_r:
  3614  				asmbuf.asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3615  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3616  
  3617  			case Zvex_r_v_rm:
  3618  				asmbuf.asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3619  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3620  
  3621  			case Zr_m_xm:
  3622  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3623  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3624  
  3625  			case Zr_m_xm_nr:
  3626  				asmbuf.rexflag = 0
  3627  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3628  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3629  
  3630  			case Zo_m:
  3631  				asmbuf.Put1(byte(op))
  3632  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3633  
  3634  			case Zcallindreg:
  3635  				r = obj.Addrel(cursym)
  3636  				r.Off = int32(p.Pc)
  3637  				r.Type = objabi.R_CALLIND
  3638  				r.Siz = 0
  3639  				fallthrough
  3640  
  3641  			case Zo_m64:
  3642  				asmbuf.Put1(byte(op))
  3643  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3644  
  3645  			case Zm_ibo:
  3646  				asmbuf.Put1(byte(op))
  3647  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3648  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3649  
  3650  			case Zibo_m:
  3651  				asmbuf.Put1(byte(op))
  3652  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3653  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3654  
  3655  			case Zibo_m_xm:
  3656  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3657  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3658  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3659  
  3660  			case Z_ib, Zib_:
  3661  				if yt.zcase == Zib_ {
  3662  					a = &p.From
  3663  				} else {
  3664  					a = &p.To
  3665  				}
  3666  				asmbuf.Put1(byte(op))
  3667  				if p.As == AXABORT {
  3668  					asmbuf.Put1(o.op[z+1])
  3669  				}
  3670  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3671  
  3672  			case Zib_rp:
  3673  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3674  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3675  
  3676  			case Zil_rp:
  3677  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3678  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3679  				if o.prefix == Pe {
  3680  					v = vaddr(ctxt, p, &p.From, nil)
  3681  					asmbuf.PutInt16(int16(v))
  3682  				} else {
  3683  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3684  				}
  3685  
  3686  			case Zo_iw:
  3687  				asmbuf.Put1(byte(op))
  3688  				if p.From.Type != obj.TYPE_NONE {
  3689  					v = vaddr(ctxt, p, &p.From, nil)
  3690  					asmbuf.PutInt16(int16(v))
  3691  				}
  3692  
  3693  			case Ziq_rp:
  3694  				v = vaddr(ctxt, p, &p.From, &rel)
  3695  				l = int(v >> 32)
  3696  				if l == 0 && rel.Siz != 8 {
  3697  					//p->mark |= 0100;
  3698  					//print("zero: %llux %v\n", v, p);
  3699  					asmbuf.rexflag &^= (0x40 | Rxw)
  3700  
  3701  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3702  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3703  					if rel.Type != 0 {
  3704  						r = obj.Addrel(cursym)
  3705  						*r = rel
  3706  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3707  					}
  3708  
  3709  					asmbuf.PutInt32(int32(v))
  3710  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3711  
  3712  					//p->mark |= 0100;
  3713  					//print("sign: %llux %v\n", v, p);
  3714  					asmbuf.Put1(0xc7)
  3715  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3716  
  3717  					asmbuf.PutInt32(int32(v)) // need all 8
  3718  				} else {
  3719  					//print("all: %llux %v\n", v, p);
  3720  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3721  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3722  					if rel.Type != 0 {
  3723  						r = obj.Addrel(cursym)
  3724  						*r = rel
  3725  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3726  					}
  3727  
  3728  					asmbuf.PutInt64(v)
  3729  				}
  3730  
  3731  			case Zib_rr:
  3732  				asmbuf.Put1(byte(op))
  3733  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3734  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3735  
  3736  			case Z_il, Zil_:
  3737  				if yt.zcase == Zil_ {
  3738  					a = &p.From
  3739  				} else {
  3740  					a = &p.To
  3741  				}
  3742  				asmbuf.Put1(byte(op))
  3743  				if o.prefix == Pe {
  3744  					v = vaddr(ctxt, p, a, nil)
  3745  					asmbuf.PutInt16(int16(v))
  3746  				} else {
  3747  					asmbuf.relput4(ctxt, cursym, p, a)
  3748  				}
  3749  
  3750  			case Zm_ilo, Zilo_m:
  3751  				asmbuf.Put1(byte(op))
  3752  				if yt.zcase == Zilo_m {
  3753  					a = &p.From
  3754  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3755  				} else {
  3756  					a = &p.To
  3757  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3758  				}
  3759  
  3760  				if o.prefix == Pe {
  3761  					v = vaddr(ctxt, p, a, nil)
  3762  					asmbuf.PutInt16(int16(v))
  3763  				} else {
  3764  					asmbuf.relput4(ctxt, cursym, p, a)
  3765  				}
  3766  
  3767  			case Zil_rr:
  3768  				asmbuf.Put1(byte(op))
  3769  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3770  				if o.prefix == Pe {
  3771  					v = vaddr(ctxt, p, &p.From, nil)
  3772  					asmbuf.PutInt16(int16(v))
  3773  				} else {
  3774  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3775  				}
  3776  
  3777  			case Z_rp:
  3778  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3779  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3780  
  3781  			case Zrp_:
  3782  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3783  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3784  
  3785  			case Zclr:
  3786  				asmbuf.rexflag &^= Pw
  3787  				asmbuf.Put1(byte(op))
  3788  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3789  
  3790  			case Zcallcon, Zjmpcon:
  3791  				if yt.zcase == Zcallcon {
  3792  					asmbuf.Put1(byte(op))
  3793  				} else {
  3794  					asmbuf.Put1(o.op[z+1])
  3795  				}
  3796  				r = obj.Addrel(cursym)
  3797  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3798  				r.Type = objabi.R_PCREL
  3799  				r.Siz = 4
  3800  				r.Add = p.To.Offset
  3801  				asmbuf.PutInt32(0)
  3802  
  3803  			case Zcallind:
  3804  				asmbuf.Put2(byte(op), o.op[z+1])
  3805  				r = obj.Addrel(cursym)
  3806  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3807  				if ctxt.Arch.Family == sys.AMD64 {
  3808  					r.Type = objabi.R_PCREL
  3809  				} else {
  3810  					r.Type = objabi.R_ADDR
  3811  				}
  3812  				r.Siz = 4
  3813  				r.Add = p.To.Offset
  3814  				r.Sym = p.To.Sym
  3815  				asmbuf.PutInt32(0)
  3816  
  3817  			case Zcall, Zcallduff:
  3818  				if p.To.Sym == nil {
  3819  					ctxt.Diag("call without target")
  3820  					log.Fatalf("bad code")
  3821  				}
  3822  
  3823  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3824  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3825  				}
  3826  
  3827  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3828  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3829  					// (the call jumps into the middle of the function).
  3830  					// This makes it possible to see call sites for duffcopy/duffzero in
  3831  					// BP-based profiling tools like Linux perf (which is the
  3832  					// whole point of obj.Framepointer_enabled).
  3833  					// MOVQ BP, -16(SP)
  3834  					// LEAQ -16(SP), BP
  3835  					asmbuf.Put(bpduff1)
  3836  				}
  3837  				asmbuf.Put1(byte(op))
  3838  				r = obj.Addrel(cursym)
  3839  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3840  				r.Sym = p.To.Sym
  3841  				r.Add = p.To.Offset
  3842  				r.Type = objabi.R_CALL
  3843  				r.Siz = 4
  3844  				asmbuf.PutInt32(0)
  3845  
  3846  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3847  					// Pop BP pushed above.
  3848  					// MOVQ 0(BP), BP
  3849  					asmbuf.Put(bpduff2)
  3850  				}
  3851  
  3852  			// TODO: jump across functions needs reloc
  3853  			case Zbr, Zjmp, Zloop:
  3854  				if p.As == AXBEGIN {
  3855  					asmbuf.Put1(byte(op))
  3856  				}
  3857  				if p.To.Sym != nil {
  3858  					if yt.zcase != Zjmp {
  3859  						ctxt.Diag("branch to ATEXT")
  3860  						log.Fatalf("bad code")
  3861  					}
  3862  
  3863  					asmbuf.Put1(o.op[z+1])
  3864  					r = obj.Addrel(cursym)
  3865  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3866  					r.Sym = p.To.Sym
  3867  					r.Type = objabi.R_PCREL
  3868  					r.Siz = 4
  3869  					asmbuf.PutInt32(0)
  3870  					break
  3871  				}
  3872  
  3873  				// Assumes q is in this function.
  3874  				// TODO: Check in input, preserve in brchain.
  3875  
  3876  				// Fill in backward jump now.
  3877  				q = p.Pcond
  3878  
  3879  				if q == nil {
  3880  					ctxt.Diag("jmp/branch/loop without target")
  3881  					log.Fatalf("bad code")
  3882  				}
  3883  
  3884  				if p.Back&1 != 0 {
  3885  					v = q.Pc - (p.Pc + 2)
  3886  					if v >= -128 && p.As != AXBEGIN {
  3887  						if p.As == AJCXZL {
  3888  							asmbuf.Put1(0x67)
  3889  						}
  3890  						asmbuf.Put2(byte(op), byte(v))
  3891  					} else if yt.zcase == Zloop {
  3892  						ctxt.Diag("loop too far: %v", p)
  3893  					} else {
  3894  						v -= 5 - 2
  3895  						if p.As == AXBEGIN {
  3896  							v--
  3897  						}
  3898  						if yt.zcase == Zbr {
  3899  							asmbuf.Put1(0x0f)
  3900  							v--
  3901  						}
  3902  
  3903  						asmbuf.Put1(o.op[z+1])
  3904  						asmbuf.PutInt32(int32(v))
  3905  					}
  3906  
  3907  					break
  3908  				}
  3909  
  3910  				// Annotate target; will fill in later.
  3911  				p.Forwd = q.Rel
  3912  
  3913  				q.Rel = p
  3914  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3915  					if p.As == AJCXZL {
  3916  						asmbuf.Put1(0x67)
  3917  					}
  3918  					asmbuf.Put2(byte(op), 0)
  3919  				} else if yt.zcase == Zloop {
  3920  					ctxt.Diag("loop too far: %v", p)
  3921  				} else {
  3922  					if yt.zcase == Zbr {
  3923  						asmbuf.Put1(0x0f)
  3924  					}
  3925  					asmbuf.Put1(o.op[z+1])
  3926  					asmbuf.PutInt32(0)
  3927  				}
  3928  
  3929  				break
  3930  
  3931  			/*
  3932  				v = q->pc - p->pc - 2;
  3933  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3934  					*ctxt->andptr++ = op;
  3935  					*ctxt->andptr++ = v;
  3936  				} else {
  3937  					v -= 5-2;
  3938  					if(yt.zcase == Zbr) {
  3939  						*ctxt->andptr++ = 0x0f;
  3940  						v--;
  3941  					}
  3942  					*ctxt->andptr++ = o->op[z+1];
  3943  					*ctxt->andptr++ = v;
  3944  					*ctxt->andptr++ = v>>8;
  3945  					*ctxt->andptr++ = v>>16;
  3946  					*ctxt->andptr++ = v>>24;
  3947  				}
  3948  			*/
  3949  
  3950  			case Zbyte:
  3951  				v = vaddr(ctxt, p, &p.From, &rel)
  3952  				if rel.Siz != 0 {
  3953  					rel.Siz = uint8(op)
  3954  					r = obj.Addrel(cursym)
  3955  					*r = rel
  3956  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3957  				}
  3958  
  3959  				asmbuf.Put1(byte(v))
  3960  				if op > 1 {
  3961  					asmbuf.Put1(byte(v >> 8))
  3962  					if op > 2 {
  3963  						asmbuf.PutInt16(int16(v >> 16))
  3964  						if op > 4 {
  3965  							asmbuf.PutInt32(int32(v >> 32))
  3966  						}
  3967  					}
  3968  				}
  3969  			}
  3970  
  3971  			return
  3972  		}
  3973  		z += int(yt.zoffset) + xo
  3974  	}
  3975  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3976  		var pp obj.Prog
  3977  		var t []byte
  3978  		if p.As == mo[0].as {
  3979  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3980  				t = mo[0].op[:]
  3981  				switch mo[0].code {
  3982  				default:
  3983  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3984  
  3985  				case 0: /* lit */
  3986  					for z = 0; t[z] != E; z++ {
  3987  						asmbuf.Put1(t[z])
  3988  					}
  3989  
  3990  				case 1: /* r,m */
  3991  					asmbuf.Put1(t[0])
  3992  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  3993  
  3994  				case 2: /* m,r */
  3995  					asmbuf.Put1(t[0])
  3996  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  3997  
  3998  				case 3: /* r,m - 2op */
  3999  					asmbuf.Put2(t[0], t[1])
  4000  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4001  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4002  
  4003  				case 4: /* m,r - 2op */
  4004  					asmbuf.Put2(t[0], t[1])
  4005  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4006  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4007  
  4008  				case 5: /* load full pointer, trash heap */
  4009  					if t[0] != 0 {
  4010  						asmbuf.Put1(t[0])
  4011  					}
  4012  					switch p.To.Index {
  4013  					default:
  4014  						goto bad
  4015  
  4016  					case REG_DS:
  4017  						asmbuf.Put1(0xc5)
  4018  
  4019  					case REG_SS:
  4020  						asmbuf.Put2(0x0f, 0xb2)
  4021  
  4022  					case REG_ES:
  4023  						asmbuf.Put1(0xc4)
  4024  
  4025  					case REG_FS:
  4026  						asmbuf.Put2(0x0f, 0xb4)
  4027  
  4028  					case REG_GS:
  4029  						asmbuf.Put2(0x0f, 0xb5)
  4030  					}
  4031  
  4032  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4033  
  4034  				case 6: /* double shift */
  4035  					if t[0] == Pw {
  4036  						if ctxt.Arch.Family != sys.AMD64 {
  4037  							ctxt.Diag("asmins: illegal 64: %v", p)
  4038  						}
  4039  						asmbuf.rexflag |= Pw
  4040  						t = t[1:]
  4041  					} else if t[0] == Pe {
  4042  						asmbuf.Put1(Pe)
  4043  						t = t[1:]
  4044  					}
  4045  
  4046  					switch p.From.Type {
  4047  					default:
  4048  						goto bad
  4049  
  4050  					case obj.TYPE_CONST:
  4051  						asmbuf.Put2(0x0f, t[0])
  4052  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4053  						asmbuf.Put1(byte(p.From.Offset))
  4054  
  4055  					case obj.TYPE_REG:
  4056  						switch p.From.Reg {
  4057  						default:
  4058  							goto bad
  4059  
  4060  						case REG_CL, REG_CX:
  4061  							asmbuf.Put2(0x0f, t[1])
  4062  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4063  						}
  4064  					}
  4065  
  4066  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4067  				// where you load the TLS base register into a register and then index off that
  4068  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4069  				// are handled in prefixof above and should not be listed here.
  4070  				case 7: /* mov tls, r */
  4071  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4072  						ctxt.Diag("invalid load of TLS: %v", p)
  4073  					}
  4074  
  4075  					if ctxt.Arch.Family == sys.I386 {
  4076  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4077  						// where you load the TLS base register into a register and then index off that
  4078  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4079  						// are handled in prefixof above and should not be listed here.
  4080  						switch ctxt.Headtype {
  4081  						default:
  4082  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4083  
  4084  						case objabi.Hlinux,
  4085  							objabi.Hnacl:
  4086  							if ctxt.Flag_shared {
  4087  								// Note that this is not generating the same insns as the other cases.
  4088  								//     MOV TLS, dst
  4089  								// becomes
  4090  								//     call __x86.get_pc_thunk.dst
  4091  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4092  								// which is encoded as
  4093  								//     call __x86.get_pc_thunk.dst
  4094  								//     movq 0(dst), dst
  4095  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4096  								// is g, which we can't check here, but will when we assemble the second
  4097  								// instruction.
  4098  								dst := p.To.Reg
  4099  								asmbuf.Put1(0xe8)
  4100  								r = obj.Addrel(cursym)
  4101  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4102  								r.Type = objabi.R_CALL
  4103  								r.Siz = 4
  4104  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4105  								asmbuf.PutInt32(0)
  4106  
  4107  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4108  								r = obj.Addrel(cursym)
  4109  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4110  								r.Type = objabi.R_TLS_IE
  4111  								r.Siz = 4
  4112  								r.Add = 2
  4113  								asmbuf.PutInt32(0)
  4114  							} else {
  4115  								// ELF TLS base is 0(GS).
  4116  								pp.From = p.From
  4117  
  4118  								pp.From.Type = obj.TYPE_MEM
  4119  								pp.From.Reg = REG_GS
  4120  								pp.From.Offset = 0
  4121  								pp.From.Index = REG_NONE
  4122  								pp.From.Scale = 0
  4123  								asmbuf.Put2(0x65, // GS
  4124  									0x8B)
  4125  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4126  							}
  4127  						case objabi.Hplan9:
  4128  							pp.From = obj.Addr{}
  4129  							pp.From.Type = obj.TYPE_MEM
  4130  							pp.From.Name = obj.NAME_EXTERN
  4131  							pp.From.Sym = plan9privates
  4132  							pp.From.Offset = 0
  4133  							pp.From.Index = REG_NONE
  4134  							asmbuf.Put1(0x8B)
  4135  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4136  
  4137  						case objabi.Hwindows:
  4138  							// Windows TLS base is always 0x14(FS).
  4139  							pp.From = p.From
  4140  
  4141  							pp.From.Type = obj.TYPE_MEM
  4142  							pp.From.Reg = REG_FS
  4143  							pp.From.Offset = 0x14
  4144  							pp.From.Index = REG_NONE
  4145  							pp.From.Scale = 0
  4146  							asmbuf.Put2(0x64, // FS
  4147  								0x8B)
  4148  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4149  						}
  4150  						break
  4151  					}
  4152  
  4153  					switch ctxt.Headtype {
  4154  					default:
  4155  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4156  
  4157  					case objabi.Hlinux:
  4158  						if !ctxt.Flag_shared {
  4159  							log.Fatalf("unknown TLS base location for linux without -shared")
  4160  						}
  4161  						// Note that this is not generating the same insn as the other cases.
  4162  						//     MOV TLS, R_to
  4163  						// becomes
  4164  						//     movq g@gottpoff(%rip), R_to
  4165  						// which is encoded as
  4166  						//     movq 0(%rip), R_to
  4167  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4168  						// is g, which we can't check here, but will when we assemble the second
  4169  						// instruction.
  4170  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4171  
  4172  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4173  						r = obj.Addrel(cursym)
  4174  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4175  						r.Type = objabi.R_TLS_IE
  4176  						r.Siz = 4
  4177  						r.Add = -4
  4178  						asmbuf.PutInt32(0)
  4179  
  4180  					case objabi.Hplan9:
  4181  						pp.From = obj.Addr{}
  4182  						pp.From.Type = obj.TYPE_MEM
  4183  						pp.From.Name = obj.NAME_EXTERN
  4184  						pp.From.Sym = plan9privates
  4185  						pp.From.Offset = 0
  4186  						pp.From.Index = REG_NONE
  4187  						asmbuf.rexflag |= Pw
  4188  						asmbuf.Put1(0x8B)
  4189  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4190  
  4191  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4192  						// TLS base is 0(FS).
  4193  						pp.From = p.From
  4194  
  4195  						pp.From.Type = obj.TYPE_MEM
  4196  						pp.From.Name = obj.NAME_NONE
  4197  						pp.From.Reg = REG_NONE
  4198  						pp.From.Offset = 0
  4199  						pp.From.Index = REG_NONE
  4200  						pp.From.Scale = 0
  4201  						asmbuf.rexflag |= Pw
  4202  						asmbuf.Put2(0x64, // FS
  4203  							0x8B)
  4204  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4205  
  4206  					case objabi.Hwindows:
  4207  						// Windows TLS base is always 0x28(GS).
  4208  						pp.From = p.From
  4209  
  4210  						pp.From.Type = obj.TYPE_MEM
  4211  						pp.From.Name = obj.NAME_NONE
  4212  						pp.From.Reg = REG_GS
  4213  						pp.From.Offset = 0x28
  4214  						pp.From.Index = REG_NONE
  4215  						pp.From.Scale = 0
  4216  						asmbuf.rexflag |= Pw
  4217  						asmbuf.Put2(0x65, // GS
  4218  							0x8B)
  4219  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4220  					}
  4221  				}
  4222  				return
  4223  			}
  4224  		}
  4225  	}
  4226  	goto bad
  4227  
  4228  bad:
  4229  	if ctxt.Arch.Family != sys.AMD64 {
  4230  		/*
  4231  		 * here, the assembly has failed.
  4232  		 * if its a byte instruction that has
  4233  		 * unaddressable registers, try to
  4234  		 * exchange registers and reissue the
  4235  		 * instruction with the operands renamed.
  4236  		 */
  4237  		pp := *p
  4238  
  4239  		unbytereg(&pp.From, &pp.Ft)
  4240  		unbytereg(&pp.To, &pp.Tt)
  4241  
  4242  		z := int(p.From.Reg)
  4243  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4244  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4245  			// For now, different to keep bit-for-bit compatibility.
  4246  			if ctxt.Arch.Family == sys.I386 {
  4247  				breg := byteswapreg(ctxt, &p.To)
  4248  				if breg != REG_AX {
  4249  					asmbuf.Put1(0x87) // xchg lhs,bx
  4250  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4251  					subreg(&pp, z, breg)
  4252  					asmbuf.doasm(ctxt, cursym, &pp)
  4253  					asmbuf.Put1(0x87) // xchg lhs,bx
  4254  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4255  				} else {
  4256  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4257  					subreg(&pp, z, REG_AX)
  4258  					asmbuf.doasm(ctxt, cursym, &pp)
  4259  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4260  				}
  4261  				return
  4262  			}
  4263  
  4264  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4265  				// We certainly don't want to exchange
  4266  				// with AX if the op is MUL or DIV.
  4267  				asmbuf.Put1(0x87) // xchg lhs,bx
  4268  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4269  				subreg(&pp, z, REG_BX)
  4270  				asmbuf.doasm(ctxt, cursym, &pp)
  4271  				asmbuf.Put1(0x87) // xchg lhs,bx
  4272  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4273  			} else {
  4274  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4275  				subreg(&pp, z, REG_AX)
  4276  				asmbuf.doasm(ctxt, cursym, &pp)
  4277  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4278  			}
  4279  			return
  4280  		}
  4281  
  4282  		z = int(p.To.Reg)
  4283  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4284  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4285  			// For now, different to keep bit-for-bit compatibility.
  4286  			if ctxt.Arch.Family == sys.I386 {
  4287  				breg := byteswapreg(ctxt, &p.From)
  4288  				if breg != REG_AX {
  4289  					asmbuf.Put1(0x87) //xchg rhs,bx
  4290  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4291  					subreg(&pp, z, breg)
  4292  					asmbuf.doasm(ctxt, cursym, &pp)
  4293  					asmbuf.Put1(0x87) // xchg rhs,bx
  4294  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4295  				} else {
  4296  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4297  					subreg(&pp, z, REG_AX)
  4298  					asmbuf.doasm(ctxt, cursym, &pp)
  4299  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4300  				}
  4301  				return
  4302  			}
  4303  
  4304  			if isax(&p.From) {
  4305  				asmbuf.Put1(0x87) // xchg rhs,bx
  4306  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4307  				subreg(&pp, z, REG_BX)
  4308  				asmbuf.doasm(ctxt, cursym, &pp)
  4309  				asmbuf.Put1(0x87) // xchg rhs,bx
  4310  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4311  			} else {
  4312  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4313  				subreg(&pp, z, REG_AX)
  4314  				asmbuf.doasm(ctxt, cursym, &pp)
  4315  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4316  			}
  4317  			return
  4318  		}
  4319  	}
  4320  
  4321  	ctxt.Diag("invalid instruction: %v", p)
  4322  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4323  	return
  4324  }
  4325  
  4326  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4327  // which is not referenced in a.
  4328  // If a is empty, it returns BX to account for MULB-like instructions
  4329  // that might use DX and AX.
  4330  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4331  	cana, canb, canc, cand := true, true, true, true
  4332  	if a.Type == obj.TYPE_NONE {
  4333  		cana, cand = false, false
  4334  	}
  4335  
  4336  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4337  		switch a.Reg {
  4338  		case REG_NONE:
  4339  			cana, cand = false, false
  4340  		case REG_AX, REG_AL, REG_AH:
  4341  			cana = false
  4342  		case REG_BX, REG_BL, REG_BH:
  4343  			canb = false
  4344  		case REG_CX, REG_CL, REG_CH:
  4345  			canc = false
  4346  		case REG_DX, REG_DL, REG_DH:
  4347  			cand = false
  4348  		}
  4349  	}
  4350  
  4351  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4352  		switch a.Index {
  4353  		case REG_AX:
  4354  			cana = false
  4355  		case REG_BX:
  4356  			canb = false
  4357  		case REG_CX:
  4358  			canc = false
  4359  		case REG_DX:
  4360  			cand = false
  4361  		}
  4362  	}
  4363  
  4364  	switch {
  4365  	case cana:
  4366  		return REG_AX
  4367  	case canb:
  4368  		return REG_BX
  4369  	case canc:
  4370  		return REG_CX
  4371  	case cand:
  4372  		return REG_DX
  4373  	default:
  4374  		ctxt.Diag("impossible byte register")
  4375  		log.Fatalf("bad code")
  4376  		return 0
  4377  	}
  4378  }
  4379  
  4380  func isbadbyte(a *obj.Addr) bool {
  4381  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4382  }
  4383  
  4384  var naclret = []uint8{
  4385  	0x5e, // POPL SI
  4386  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4387  	0x83,
  4388  	0xe6,
  4389  	0xe0, // ANDL $~31, SI
  4390  	0x4c,
  4391  	0x01,
  4392  	0xfe, // ADDQ R15, SI
  4393  	0xff,
  4394  	0xe6, // JMP SI
  4395  }
  4396  
  4397  var naclret8 = []uint8{
  4398  	0x5d, // POPL BP
  4399  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4400  	0x83,
  4401  	0xe5,
  4402  	0xe0, // ANDL $~31, BP
  4403  	0xff,
  4404  	0xe5, // JMP BP
  4405  }
  4406  
  4407  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4408  
  4409  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4410  
  4411  var naclmovs = []uint8{
  4412  	0x89,
  4413  	0xf6, // MOVL SI, SI
  4414  	0x49,
  4415  	0x8d,
  4416  	0x34,
  4417  	0x37, // LEAQ (R15)(SI*1), SI
  4418  	0x89,
  4419  	0xff, // MOVL DI, DI
  4420  	0x49,
  4421  	0x8d,
  4422  	0x3c,
  4423  	0x3f, // LEAQ (R15)(DI*1), DI
  4424  }
  4425  
  4426  var naclstos = []uint8{
  4427  	0x89,
  4428  	0xff, // MOVL DI, DI
  4429  	0x49,
  4430  	0x8d,
  4431  	0x3c,
  4432  	0x3f, // LEAQ (R15)(DI*1), DI
  4433  }
  4434  
  4435  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4436  	if reg >= REG_R8 {
  4437  		asmbuf.Put1(0x45)
  4438  	}
  4439  	reg = (reg - REG_AX) & 7
  4440  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4441  }
  4442  
  4443  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4444  	asmbuf.Reset()
  4445  
  4446  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4447  		switch p.As {
  4448  		case obj.ARET:
  4449  			asmbuf.Put(naclret8)
  4450  			return
  4451  
  4452  		case obj.ACALL,
  4453  			obj.AJMP:
  4454  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4455  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4456  			}
  4457  
  4458  		case AINT:
  4459  			asmbuf.Put1(0xf4)
  4460  			return
  4461  		}
  4462  	}
  4463  
  4464  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4465  		if p.As == AREP {
  4466  			asmbuf.rep++
  4467  			return
  4468  		}
  4469  
  4470  		if p.As == AREPN {
  4471  			asmbuf.repn++
  4472  			return
  4473  		}
  4474  
  4475  		if p.As == ALOCK {
  4476  			asmbuf.lock = true
  4477  			return
  4478  		}
  4479  
  4480  		if p.As != ALEAQ && p.As != ALEAL {
  4481  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4482  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4483  			}
  4484  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4485  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4486  			}
  4487  		}
  4488  
  4489  		switch p.As {
  4490  		case obj.ARET:
  4491  			asmbuf.Put(naclret)
  4492  			return
  4493  
  4494  		case obj.ACALL,
  4495  			obj.AJMP:
  4496  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4497  				// ANDL $~31, reg
  4498  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4499  				// ADDQ R15, reg
  4500  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4501  			}
  4502  
  4503  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4504  				// ANDL $~31, reg
  4505  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4506  				// ADDQ R15, reg
  4507  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4508  			}
  4509  
  4510  		case AINT:
  4511  			asmbuf.Put1(0xf4)
  4512  			return
  4513  
  4514  		case ASCASB,
  4515  			ASCASW,
  4516  			ASCASL,
  4517  			ASCASQ,
  4518  			ASTOSB,
  4519  			ASTOSW,
  4520  			ASTOSL,
  4521  			ASTOSQ:
  4522  			asmbuf.Put(naclstos)
  4523  
  4524  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4525  			asmbuf.Put(naclmovs)
  4526  		}
  4527  
  4528  		if asmbuf.rep != 0 {
  4529  			asmbuf.Put1(0xf3)
  4530  			asmbuf.rep = 0
  4531  		}
  4532  
  4533  		if asmbuf.repn != 0 {
  4534  			asmbuf.Put1(0xf2)
  4535  			asmbuf.repn = 0
  4536  		}
  4537  
  4538  		if asmbuf.lock {
  4539  			asmbuf.Put1(0xf0)
  4540  			asmbuf.lock = false
  4541  		}
  4542  	}
  4543  
  4544  	asmbuf.rexflag = 0
  4545  	asmbuf.vexflag = 0
  4546  	mark := asmbuf.Len()
  4547  	asmbuf.doasm(ctxt, cursym, p)
  4548  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4549  		/*
  4550  		 * as befits the whole approach of the architecture,
  4551  		 * the rex prefix must appear before the first opcode byte
  4552  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4553  		 * before the 0f opcode escape!), or it might be ignored.
  4554  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4555  		 */
  4556  		if ctxt.Arch.Family != sys.AMD64 {
  4557  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4558  		}
  4559  		n := asmbuf.Len()
  4560  		var np int
  4561  		for np = mark; np < n; np++ {
  4562  			c := asmbuf.At(np)
  4563  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4564  				break
  4565  			}
  4566  		}
  4567  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4568  	}
  4569  
  4570  	n := asmbuf.Len()
  4571  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4572  		r := &cursym.R[i]
  4573  		if int64(r.Off) < p.Pc {
  4574  			break
  4575  		}
  4576  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4577  			r.Off++
  4578  		}
  4579  		if r.Type == objabi.R_PCREL {
  4580  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4581  				// PC-relative addressing is relative to the end of the instruction,
  4582  				// but the relocations applied by the linker are relative to the end
  4583  				// of the relocation. Because immediate instruction
  4584  				// arguments can follow the PC-relative memory reference in the
  4585  				// instruction encoding, the two may not coincide. In this case,
  4586  				// adjust addend so that linker can keep relocating relative to the
  4587  				// end of the relocation.
  4588  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4589  			} else if ctxt.Arch.Family == sys.I386 {
  4590  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4591  				// assumes that the previous instruction loaded the PC of the end
  4592  				// of that instruction into CX, so the adjustment is relative to
  4593  				// that.
  4594  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4595  			}
  4596  		}
  4597  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4598  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4599  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4600  		}
  4601  
  4602  	}
  4603  
  4604  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4605  		switch p.To.Reg {
  4606  		case REG_SP:
  4607  			asmbuf.Put(naclspfix)
  4608  		case REG_BP:
  4609  			asmbuf.Put(naclbpfix)
  4610  		}
  4611  	}
  4612  }