github.com/mangodowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type ytab struct {
    76  	from    uint8
    77  	from3   uint8
    78  	to      uint8
    79  	zcase   uint8
    80  	zoffset uint8
    81  }
    82  
    83  type Movtab struct {
    84  	as   obj.As
    85  	ft   uint8
    86  	f3t  uint8
    87  	tt   uint8
    88  	code uint8
    89  	op   [4]uint8
    90  }
    91  
    92  const (
    93  	Yxxx = iota
    94  	Ynone
    95  	Yi0 // $0
    96  	Yi1 // $1
    97  	Yi8 // $x, x fits in int8
    98  	Yu8 // $x, x fits in uint8
    99  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   100  	Ys32
   101  	Yi32
   102  	Yi64
   103  	Yiauto
   104  	Yal
   105  	Ycl
   106  	Yax
   107  	Ycx
   108  	Yrb
   109  	Yrl
   110  	Yrl32 // Yrl on 32-bit system
   111  	Yrf
   112  	Yf0
   113  	Yrx
   114  	Ymb
   115  	Yml
   116  	Ym
   117  	Ybr
   118  	Ycs
   119  	Yss
   120  	Yds
   121  	Yes
   122  	Yfs
   123  	Ygs
   124  	Ygdtr
   125  	Yidtr
   126  	Yldtr
   127  	Ymsw
   128  	Ytask
   129  	Ycr0
   130  	Ycr1
   131  	Ycr2
   132  	Ycr3
   133  	Ycr4
   134  	Ycr5
   135  	Ycr6
   136  	Ycr7
   137  	Ycr8
   138  	Ydr0
   139  	Ydr1
   140  	Ydr2
   141  	Ydr3
   142  	Ydr4
   143  	Ydr5
   144  	Ydr6
   145  	Ydr7
   146  	Ytr0
   147  	Ytr1
   148  	Ytr2
   149  	Ytr3
   150  	Ytr4
   151  	Ytr5
   152  	Ytr6
   153  	Ytr7
   154  	Ymr
   155  	Ymm
   156  	Yxr
   157  	Yxm
   158  	Yyr
   159  	Yym
   160  	Ytls
   161  	Ytextsize
   162  	Yindir
   163  	Ymax
   164  )
   165  
   166  const (
   167  	Zxxx = iota
   168  	Zlit
   169  	Zlitm_r
   170  	Z_rp
   171  	Zbr
   172  	Zcall
   173  	Zcallcon
   174  	Zcallduff
   175  	Zcallind
   176  	Zcallindreg
   177  	Zib_
   178  	Zib_rp
   179  	Zibo_m
   180  	Zibo_m_xm
   181  	Zil_
   182  	Zil_rp
   183  	Ziq_rp
   184  	Zilo_m
   185  	Zjmp
   186  	Zjmpcon
   187  	Zloop
   188  	Zo_iw
   189  	Zm_o
   190  	Zm_r
   191  	Zm2_r
   192  	Zm_r_xm
   193  	Zm_r_i_xm
   194  	Zm_r_xm_nr
   195  	Zr_m_xm_nr
   196  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   197  	Zibr_m
   198  	Zmb_r
   199  	Zaut_r
   200  	Zo_m
   201  	Zo_m64
   202  	Zpseudo
   203  	Zr_m
   204  	Zr_m_xm
   205  	Zrp_
   206  	Z_ib
   207  	Z_il
   208  	Zm_ibo
   209  	Zm_ilo
   210  	Zib_rr
   211  	Zil_rr
   212  	Zclr
   213  	Zbyte
   214  	Zvex_rm_v_r
   215  	Zvex_r_v_rm
   216  	Zvex_v_rm_r
   217  	Zvex_i_rm_r
   218  	Zvex_i_r_v
   219  	Zvex_i_rm_v_r
   220  	Zmax
   221  )
   222  
   223  const (
   224  	Px   = 0
   225  	Px1  = 1    // symbolic; exact value doesn't matter
   226  	P32  = 0x32 /* 32-bit only */
   227  	Pe   = 0x66 /* operand escape */
   228  	Pm   = 0x0f /* 2byte opcode escape */
   229  	Pq   = 0xff /* both escapes: 66 0f */
   230  	Pb   = 0xfe /* byte operands */
   231  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   232  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   233  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   234  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   235  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   236  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   237  	Pw   = 0x48 /* Rex.w */
   238  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   239  	Py   = 0x80 /* defaults to 64-bit mode */
   240  	Py1  = 0x81 // symbolic; exact value doesn't matter
   241  	Py3  = 0x83 // symbolic; exact value doesn't matter
   242  	Pvex = 0x84 // symbolic: exact value doesn't matter
   243  
   244  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   245  	Rxr = 1 << 2 /* extend modrm reg */
   246  	Rxx = 1 << 1 /* extend sib index */
   247  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   248  )
   249  
   250  const (
   251  	// Encoding for VEX prefix in tables.
   252  	// The P, L, and W fields are chosen to match
   253  	// their eventual locations in the VEX prefix bytes.
   254  
   255  	// P field - 2 bits
   256  	vex66 = 1 << 0
   257  	vexF3 = 2 << 0
   258  	vexF2 = 3 << 0
   259  	// L field - 1 bit
   260  	vexLZ  = 0 << 2
   261  	vexLIG = 0 << 2
   262  	vex128 = 0 << 2
   263  	vex256 = 1 << 2
   264  	// W field - 1 bit
   265  	vexWIG = 0 << 7
   266  	vexW0  = 0 << 7
   267  	vexW1  = 1 << 7
   268  	// M field - 5 bits, but mostly reserved; we can store up to 4
   269  	vex0F   = 1 << 3
   270  	vex0F38 = 2 << 3
   271  	vex0F3A = 3 << 3
   272  
   273  	// Combinations used in the manual.
   274  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   275  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   276  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   277  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   278  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   279  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   280  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   281  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   282  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   283  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   284  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   285  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   286  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   287  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   288  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   289  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   290  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   291  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   292  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   293  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   294  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   295  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   296  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   297  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   298  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   299  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   300  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   301  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   302  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   303  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   304  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   305  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   306  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   307  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   308  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   309  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   310  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   311  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   312  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   313  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   314  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   315  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   316  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   317  )
   318  
   319  var ycover [Ymax * Ymax]uint8
   320  
   321  var reg [MAXREG]int
   322  
   323  var regrex [MAXREG + 1]int
   324  
   325  var ynone = []ytab{
   326  	{Ynone, Ynone, Ynone, Zlit, 1},
   327  }
   328  
   329  var ytext = []ytab{
   330  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   331  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   332  }
   333  
   334  var ynop = []ytab{
   335  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   336  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   337  	{Ynone, Ynone, Yml, Zpseudo, 0},
   338  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   339  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   340  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   341  	{Yml, Ynone, Ynone, Zpseudo, 0},
   342  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   343  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   344  }
   345  
   346  var yfuncdata = []ytab{
   347  	{Yi32, Ynone, Ym, Zpseudo, 0},
   348  }
   349  
   350  var ypcdata = []ytab{
   351  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   352  }
   353  
   354  var yxorb = []ytab{
   355  	{Yi32, Ynone, Yal, Zib_, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  	{Yrb, Ynone, Ymb, Zr_m, 1},
   358  	{Ymb, Ynone, Yrb, Zm_r, 1},
   359  }
   360  
   361  var yaddl = []ytab{
   362  	{Yi8, Ynone, Yml, Zibo_m, 2},
   363  	{Yi32, Ynone, Yax, Zil_, 1},
   364  	{Yi32, Ynone, Yml, Zilo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  	{Yml, Ynone, Yrl, Zm_r, 1},
   367  }
   368  
   369  var yincl = []ytab{
   370  	{Ynone, Ynone, Yrl, Z_rp, 1},
   371  	{Ynone, Ynone, Yml, Zo_m, 2},
   372  }
   373  
   374  var yincq = []ytab{
   375  	{Ynone, Ynone, Yml, Zo_m, 2},
   376  }
   377  
   378  var ycmpb = []ytab{
   379  	{Yal, Ynone, Yi32, Z_ib, 1},
   380  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   381  	{Ymb, Ynone, Yrb, Zm_r, 1},
   382  	{Yrb, Ynone, Ymb, Zr_m, 1},
   383  }
   384  
   385  var ycmpl = []ytab{
   386  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   387  	{Yax, Ynone, Yi32, Z_il, 1},
   388  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   389  	{Yml, Ynone, Yrl, Zm_r, 1},
   390  	{Yrl, Ynone, Yml, Zr_m, 1},
   391  }
   392  
   393  var yshb = []ytab{
   394  	{Yi1, Ynone, Ymb, Zo_m, 2},
   395  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   396  	{Ycx, Ynone, Ymb, Zo_m, 2},
   397  }
   398  
   399  var yshl = []ytab{
   400  	{Yi1, Ynone, Yml, Zo_m, 2},
   401  	{Yi32, Ynone, Yml, Zibo_m, 2},
   402  	{Ycl, Ynone, Yml, Zo_m, 2},
   403  	{Ycx, Ynone, Yml, Zo_m, 2},
   404  }
   405  
   406  var ytestl = []ytab{
   407  	{Yi32, Ynone, Yax, Zil_, 1},
   408  	{Yi32, Ynone, Yml, Zilo_m, 2},
   409  	{Yrl, Ynone, Yml, Zr_m, 1},
   410  	{Yml, Ynone, Yrl, Zm_r, 1},
   411  }
   412  
   413  var ymovb = []ytab{
   414  	{Yrb, Ynone, Ymb, Zr_m, 1},
   415  	{Ymb, Ynone, Yrb, Zm_r, 1},
   416  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   417  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   418  }
   419  
   420  var ybtl = []ytab{
   421  	{Yi8, Ynone, Yml, Zibo_m, 2},
   422  	{Yrl, Ynone, Yml, Zr_m, 1},
   423  }
   424  
   425  var ymovw = []ytab{
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  	{Yi0, Ynone, Yrl, Zclr, 1},
   429  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   430  	{Yi32, Ynone, Yml, Zilo_m, 2},
   431  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   432  }
   433  
   434  var ymovl = []ytab{
   435  	{Yrl, Ynone, Yml, Zr_m, 1},
   436  	{Yml, Ynone, Yrl, Zm_r, 1},
   437  	{Yi0, Ynone, Yrl, Zclr, 1},
   438  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   439  	{Yi32, Ynone, Yml, Zilo_m, 2},
   440  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   441  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   442  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   443  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   444  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   445  }
   446  
   447  var yret = []ytab{
   448  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   449  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   450  }
   451  
   452  var ymovq = []ytab{
   453  	// valid in 32-bit mode
   454  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   455  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   456  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   457  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   458  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   459  
   460  	// valid only in 64-bit mode, usually with 64-bit prefix
   461  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   462  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   463  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   464  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   465  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   466  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   467  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   468  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   469  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   470  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   471  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   472  }
   473  
   474  var ym_rl = []ytab{
   475  	{Ym, Ynone, Yrl, Zm_r, 1},
   476  }
   477  
   478  var yrl_m = []ytab{
   479  	{Yrl, Ynone, Ym, Zr_m, 1},
   480  }
   481  
   482  var ymb_rl = []ytab{
   483  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   484  }
   485  
   486  var yml_rl = []ytab{
   487  	{Yml, Ynone, Yrl, Zm_r, 1},
   488  }
   489  
   490  var yrl_ml = []ytab{
   491  	{Yrl, Ynone, Yml, Zr_m, 1},
   492  }
   493  
   494  var yml_mb = []ytab{
   495  	{Yrb, Ynone, Ymb, Zr_m, 1},
   496  	{Ymb, Ynone, Yrb, Zm_r, 1},
   497  }
   498  
   499  var yrb_mb = []ytab{
   500  	{Yrb, Ynone, Ymb, Zr_m, 1},
   501  }
   502  
   503  var yxchg = []ytab{
   504  	{Yax, Ynone, Yrl, Z_rp, 1},
   505  	{Yrl, Ynone, Yax, Zrp_, 1},
   506  	{Yrl, Ynone, Yml, Zr_m, 1},
   507  	{Yml, Ynone, Yrl, Zm_r, 1},
   508  }
   509  
   510  var ydivl = []ytab{
   511  	{Yml, Ynone, Ynone, Zm_o, 2},
   512  }
   513  
   514  var ydivb = []ytab{
   515  	{Ymb, Ynone, Ynone, Zm_o, 2},
   516  }
   517  
   518  var yimul = []ytab{
   519  	{Yml, Ynone, Ynone, Zm_o, 2},
   520  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   521  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   522  	{Yml, Ynone, Yrl, Zm_r, 2},
   523  }
   524  
   525  var yimul3 = []ytab{
   526  	{Yi8, Yml, Yrl, Zibm_r, 2},
   527  }
   528  
   529  var ybyte = []ytab{
   530  	{Yi64, Ynone, Ynone, Zbyte, 1},
   531  }
   532  
   533  var yin = []ytab{
   534  	{Yi32, Ynone, Ynone, Zib_, 1},
   535  	{Ynone, Ynone, Ynone, Zlit, 1},
   536  }
   537  
   538  var yint = []ytab{
   539  	{Yi32, Ynone, Ynone, Zib_, 1},
   540  }
   541  
   542  var ypushl = []ytab{
   543  	{Yrl, Ynone, Ynone, Zrp_, 1},
   544  	{Ym, Ynone, Ynone, Zm_o, 2},
   545  	{Yi8, Ynone, Ynone, Zib_, 1},
   546  	{Yi32, Ynone, Ynone, Zil_, 1},
   547  }
   548  
   549  var ypopl = []ytab{
   550  	{Ynone, Ynone, Yrl, Z_rp, 1},
   551  	{Ynone, Ynone, Ym, Zo_m, 2},
   552  }
   553  
   554  var ybswap = []ytab{
   555  	{Ynone, Ynone, Yrl, Z_rp, 2},
   556  }
   557  
   558  var yscond = []ytab{
   559  	{Ynone, Ynone, Ymb, Zo_m, 2},
   560  }
   561  
   562  var yjcond = []ytab{
   563  	{Ynone, Ynone, Ybr, Zbr, 0},
   564  	{Yi0, Ynone, Ybr, Zbr, 0},
   565  	{Yi1, Ynone, Ybr, Zbr, 1},
   566  }
   567  
   568  var yloop = []ytab{
   569  	{Ynone, Ynone, Ybr, Zloop, 1},
   570  }
   571  
   572  var ycall = []ytab{
   573  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   574  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   575  	{Ynone, Ynone, Yindir, Zcallind, 2},
   576  	{Ynone, Ynone, Ybr, Zcall, 0},
   577  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   578  }
   579  
   580  var yduff = []ytab{
   581  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   582  }
   583  
   584  var yjmp = []ytab{
   585  	{Ynone, Ynone, Yml, Zo_m64, 2},
   586  	{Ynone, Ynone, Ybr, Zjmp, 0},
   587  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   588  }
   589  
   590  var yfmvd = []ytab{
   591  	{Ym, Ynone, Yf0, Zm_o, 2},
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yrf, Ynone, Yf0, Zm_o, 2},
   594  	{Yf0, Ynone, Yrf, Zo_m, 2},
   595  }
   596  
   597  var yfmvdp = []ytab{
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  	{Yf0, Ynone, Yrf, Zo_m, 2},
   600  }
   601  
   602  var yfmvf = []ytab{
   603  	{Ym, Ynone, Yf0, Zm_o, 2},
   604  	{Yf0, Ynone, Ym, Zo_m, 2},
   605  }
   606  
   607  var yfmvx = []ytab{
   608  	{Ym, Ynone, Yf0, Zm_o, 2},
   609  }
   610  
   611  var yfmvp = []ytab{
   612  	{Yf0, Ynone, Ym, Zo_m, 2},
   613  }
   614  
   615  var yfcmv = []ytab{
   616  	{Yrf, Ynone, Yf0, Zm_o, 2},
   617  }
   618  
   619  var yfadd = []ytab{
   620  	{Ym, Ynone, Yf0, Zm_o, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Yrf, Zo_m, 2},
   623  }
   624  
   625  var yfxch = []ytab{
   626  	{Yf0, Ynone, Yrf, Zo_m, 2},
   627  	{Yrf, Ynone, Yf0, Zm_o, 2},
   628  }
   629  
   630  var ycompp = []ytab{
   631  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   632  }
   633  
   634  var ystsw = []ytab{
   635  	{Ynone, Ynone, Ym, Zo_m, 2},
   636  	{Ynone, Ynone, Yax, Zlit, 1},
   637  }
   638  
   639  var ysvrs = []ytab{
   640  	{Ynone, Ynone, Ym, Zo_m, 2},
   641  	{Ym, Ynone, Ynone, Zm_o, 2},
   642  }
   643  
   644  var ymm = []ytab{
   645  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   647  }
   648  
   649  var yxm = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   651  }
   652  
   653  var yxm_q4 = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r, 1},
   655  }
   656  
   657  var yxcvm1 = []ytab{
   658  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   659  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   660  }
   661  
   662  var yxcvm2 = []ytab{
   663  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   664  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   665  }
   666  
   667  var yxr = []ytab{
   668  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   669  }
   670  
   671  var yxr_ml = []ytab{
   672  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   673  }
   674  
   675  var ymr = []ytab{
   676  	{Ymr, Ynone, Ymr, Zm_r, 1},
   677  }
   678  
   679  var ymr_ml = []ytab{
   680  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   681  }
   682  
   683  var yxcmpi = []ytab{
   684  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   685  }
   686  
   687  var yxmov = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   689  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   690  }
   691  
   692  var yxcvfl = []ytab{
   693  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   694  }
   695  
   696  var yxcvlf = []ytab{
   697  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   698  }
   699  
   700  var yxcvfq = []ytab{
   701  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   702  }
   703  
   704  var yxcvqf = []ytab{
   705  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   706  }
   707  
   708  var yps = []ytab{
   709  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   710  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   711  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   712  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   713  }
   714  
   715  var yxrrl = []ytab{
   716  	{Yxr, Ynone, Yrl, Zm_r, 1},
   717  }
   718  
   719  var ymrxr = []ytab{
   720  	{Ymr, Ynone, Yxr, Zm_r, 1},
   721  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   722  }
   723  
   724  var ymshuf = []ytab{
   725  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   726  }
   727  
   728  var ymshufb = []ytab{
   729  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   730  }
   731  
   732  var yxshuf = []ytab{
   733  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   734  }
   735  
   736  var yextrw = []ytab{
   737  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   738  }
   739  
   740  var yextr = []ytab{
   741  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   742  }
   743  
   744  var yinsrw = []ytab{
   745  	{Yu8, Yml, Yxr, Zibm_r, 2},
   746  }
   747  
   748  var yinsr = []ytab{
   749  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   750  }
   751  
   752  var ypsdq = []ytab{
   753  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   754  }
   755  
   756  var ymskb = []ytab{
   757  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   758  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   759  }
   760  
   761  var ycrc32l = []ytab{
   762  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   763  }
   764  
   765  var yprefetch = []ytab{
   766  	{Ym, Ynone, Ynone, Zm_o, 2},
   767  }
   768  
   769  var yaes = []ytab{
   770  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   771  }
   772  
   773  var yxbegin = []ytab{
   774  	{Ynone, Ynone, Ybr, Zjmp, 1},
   775  }
   776  
   777  var yxabort = []ytab{
   778  	{Yu8, Ynone, Ynone, Zib_, 1},
   779  }
   780  
   781  var ylddqu = []ytab{
   782  	{Ym, Ynone, Yxr, Zm_r, 1},
   783  }
   784  
   785  // VEX instructions that come in two forms:
   786  //	VTHING xmm2/m128, xmmV, xmm1
   787  //	VTHING ymm2/m256, ymmV, ymm1
   788  // The opcode array in the corresponding Optab entry
   789  // should contain the (VEX prefixes, opcode byte) pair
   790  // for each of the two forms.
   791  // For example, the entries for VPXOR are:
   792  //
   793  //	VPXOR xmm2/m128, xmmV, xmm1
   794  //	VEX.NDS.128.66.0F.WIG EF /r
   795  //
   796  //	VPXOR ymm2/m256, ymmV, ymm1
   797  //	VEX.NDS.256.66.0F.WIG EF /r
   798  //
   799  // The NDS/NDD/DDS part can be dropped, producing this
   800  // Optab entry:
   801  //
   802  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   803  //
   804  var yvex_xy3 = []ytab{
   805  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   806  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   807  }
   808  
   809  var yvex_ri3 = []ytab{
   810  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   811  }
   812  
   813  var yvex_xyi3 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   815  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   816  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   817  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   818  }
   819  
   820  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   821  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   822  }
   823  
   824  var yvex_xyi4 = []ytab{
   825  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   826  }
   827  
   828  var yvex_shift = []ytab{
   829  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   830  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   831  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   832  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   833  }
   834  
   835  var yvex_shift_dq = []ytab{
   836  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   837  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   838  }
   839  
   840  var yvex_r3 = []ytab{
   841  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   842  }
   843  
   844  var yvex_vmr3 = []ytab{
   845  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   846  }
   847  
   848  var yvex_xy2 = []ytab{
   849  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_xyr2 = []ytab{
   854  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   855  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   856  }
   857  
   858  var yvex_vmovdqa = []ytab{
   859  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   860  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   861  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   862  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   863  }
   864  
   865  var yvex_vmovntdq = []ytab{
   866  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   867  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   868  }
   869  
   870  var yvex_vpbroadcast = []ytab{
   871  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   872  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   873  }
   874  
   875  var yvex_vpbroadcast_sd = []ytab{
   876  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   877  }
   878  
   879  var ymmxmm0f38 = []ytab{
   880  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   881  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   882  }
   883  
   884  /*
   885   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   886   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   887   * to find the entry with the given p.As and then looks through the ytable for
   888   * that instruction (the second field in the optab struct) for a line whose
   889   * first two values match the Ytypes of the p.From and p.To operands.  The
   890   * function oclass computes the specific Ytype of an operand and then the set
   891   * of more general Ytypes that it satisfies is implied by the ycover table, set
   892   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   893   * from the more general 8-bit constants, but instinit says
   894   *
   895   *        ycover[Yi0*Ymax+Ys32] = 1
   896   *        ycover[Yi1*Ymax+Ys32] = 1
   897   *        ycover[Yi8*Ymax+Ys32] = 1
   898   *
   899   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   900   * if that's what an instruction can handle.
   901   *
   902   * In parallel with the scan through the ytable for the appropriate line, there
   903   * is a z pointer that starts out pointing at the strange magic byte list in
   904   * the Optab struct.  With each step past a non-matching ytable line, z
   905   * advances by the 4th entry in the line.  When a matching line is found, that
   906   * z pointer has the extra data to use in laying down the instruction bytes.
   907   * The actual bytes laid down are a function of the 3rd entry in the line (that
   908   * is, the Ztype) and the z bytes.
   909   *
   910   * For example, let's look at AADDL.  The optab line says:
   911   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   912   *
   913   * and yaddl says
   914   *        var yaddl = []ytab{
   915   *                {Yi8, Ynone, Yml, Zibo_m, 2},
   916   *                {Yi32, Ynone, Yax, Zil_, 1},
   917   *                {Yi32, Ynone, Yml, Zilo_m, 2},
   918   *                {Yrl, Ynone, Yml, Zr_m, 1},
   919   *                {Yml, Ynone, Yrl, Zm_r, 1},
   920   *        }
   921   *
   922   * so there are 5 possible types of ADDL instruction that can be laid down, and
   923   * possible states used to lay them down (Ztype and z pointer, assuming z
   924   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   925   *
   926   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   927   *        Yi32, Yax -> Zil_, z+2 (0x05)
   928   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   929   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   930   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   931   *
   932   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   933   * relatively straightforward as this program goes.
   934   *
   935   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   936   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   937   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   938   * Zilo_m is the same but a long (32-bit) immediate.
   939   */
   940  var optab =
   941  /*	as, ytab, andproto, opcode */
   942  []Optab{
   943  	{obj.AXXX, nil, 0, [23]uint8{}},
   944  	{AAAA, ynone, P32, [23]uint8{0x37}},
   945  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   946  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   947  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   948  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   949  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   950  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   951  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   952  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   953  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   954  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   955  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   956  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   957  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   958  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   959  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   960  	{AADJSP, nil, 0, [23]uint8{}},
   961  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   962  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   963  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   964  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   965  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   966  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   967  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   968  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   969  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   970  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   971  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   972  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   973  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   974  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   975  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   976  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   977  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   978  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   979  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   980  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   981  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   982  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   983  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   984  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   985  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   986  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   987  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   988  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   989  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   990  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   991  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   992  	{ABYTE, ybyte, Px, [23]uint8{1}},
   993  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   994  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   995  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   996  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   997  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   998  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   999  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1000  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1001  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1002  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1003  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1004  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1005  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1006  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1007  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1008  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1009  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1010  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1011  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1012  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1013  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1014  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1015  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1016  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1017  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1018  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1019  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1020  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1021  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1022  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1023  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1024  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1025  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1026  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1027  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1028  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1029  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1030  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1031  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1032  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1033  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1034  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1035  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1036  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1037  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1038  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1039  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1040  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1041  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1042  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1043  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1044  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1045  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1046  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1047  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1048  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1049  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1050  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1051  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1052  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1054  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1055  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1056  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1057  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1058  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1059  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1060  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1061  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1062  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1063  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1064  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1065  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1066  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1067  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1068  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1069  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1070  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1071  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1072  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1073  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1074  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1075  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1076  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1077  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1078  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1079  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1080  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1081  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1082  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1083  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1084  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1085  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1086  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1087  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1088  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1089  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1090  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1091  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1092  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1093  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1094  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1095  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1096  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1097  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1098  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1099  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1100  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1101  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1102  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1103  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1104  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1105  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1106  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1107  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1108  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1109  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1110  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1111  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1112  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1113  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1116  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1117  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1118  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1119  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1120  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1121  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1122  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1123  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1124  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1125  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1126  	{AINT, yint, Px, [23]uint8{0xcd}},
  1127  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1128  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1129  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1130  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1131  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1132  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1133  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1134  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1135  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1136  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1137  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1138  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1139  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1140  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1141  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1142  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1143  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1144  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1145  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1146  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1147  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1148  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1149  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1150  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1151  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1152  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1153  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1154  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1155  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1156  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1157  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1158  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1159  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1160  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1161  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1162  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1163  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1164  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1165  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1166  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1167  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1168  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1169  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1170  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1171  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1172  	{ALONG, ybyte, Px, [23]uint8{4}},
  1173  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1174  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1175  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1176  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1177  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1178  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1179  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1180  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1181  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1182  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1183  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1184  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1185  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1186  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1187  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1188  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1189  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1190  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1191  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1192  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1193  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1194  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1195  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1196  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1197  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1198  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1199  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1200  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1201  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1202  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1203  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1204  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1205  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1206  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1207  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1208  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1209  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1210  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1211  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1212  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1213  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1214  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1216  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1217  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1218  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1219  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1220  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1221  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1222  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1223  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1224  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1225  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1226  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1227  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1228  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1229  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1230  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1231  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1232  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1233  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1234  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1235  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1236  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1237  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1238  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1239  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1240  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1241  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1242  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1243  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1244  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1245  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1246  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1247  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1249  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1250  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1251  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1252  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1253  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1254  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1255  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1256  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1257  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1258  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1259  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1260  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1261  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1262  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1263  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1264  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1265  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1266  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1267  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1268  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1269  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1270  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1271  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1272  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1273  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1274  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1275  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1276  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1277  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1278  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1279  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1280  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1281  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1282  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1283  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1284  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1285  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1286  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1287  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1288  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1289  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1290  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1291  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1292  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1293  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1294  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1295  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1296  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1297  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1298  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1299  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1300  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1301  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1302  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1303  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1304  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1305  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1306  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1307  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1308  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1309  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1310  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1311  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1312  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1313  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1314  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1315  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1316  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1317  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1318  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1319  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1320  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1321  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1322  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1323  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1324  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1325  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1326  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1327  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1328  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1329  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1330  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1331  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1332  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1333  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1334  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1335  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1336  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1337  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1338  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1339  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1340  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1341  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1342  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1343  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1344  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1345  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1346  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1347  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1348  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1349  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1350  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1351  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1352  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1353  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1354  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1355  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1356  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1357  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1358  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1359  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1360  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1361  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1362  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1363  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1364  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1365  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1366  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1367  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1368  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1369  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1370  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1371  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1372  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1373  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1374  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1375  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1376  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1377  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1378  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1379  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1380  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1381  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1382  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1383  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1384  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1385  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1386  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1387  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1388  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1389  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1390  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1391  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1392  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1393  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1394  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1395  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1396  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1397  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1398  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1399  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1400  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1401  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1402  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1403  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1404  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1405  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1406  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1407  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1408  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1409  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1410  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1411  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1412  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1413  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1414  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1415  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1416  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1417  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1418  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1419  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1420  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1421  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1422  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1423  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1424  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1425  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1426  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1427  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1428  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1429  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1430  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1431  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1432  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1433  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1434  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1435  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1436  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1437  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1438  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1439  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1440  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1441  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1442  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1443  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1444  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1445  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1446  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1447  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1448  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1449  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1450  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1451  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1452  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1453  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1454  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1455  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1456  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1457  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1458  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1459  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1460  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1461  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1462  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1463  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1464  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1465  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1466  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1467  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1468  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1469  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1470  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1471  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1472  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1473  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1474  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1475  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1476  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1477  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1478  	{AWORD, ybyte, Px, [23]uint8{2}},
  1479  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1480  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1481  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1482  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1483  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1484  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1485  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1487  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1488  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1489  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1490  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1491  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1492  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1493  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1494  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1495  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1496  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1497  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1498  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1499  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1500  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1501  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1502  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1503  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1504  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1505  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1506  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1507  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1508  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1509  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1510  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1511  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1512  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1513  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1514  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1515  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1516  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1517  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1518  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1519  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1520  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1521  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1522  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1523  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1524  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1525  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1526  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1527  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1528  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1529  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1530  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1531  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1532  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1533  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1534  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1535  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1536  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1537  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1538  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1539  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1541  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1542  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1543  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1544  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1545  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1546  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1547  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1548  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1549  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1551  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1552  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1553  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1554  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1555  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1556  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1557  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1558  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1559  	{AFFREE, nil, 0, [23]uint8{}},
  1560  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1561  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1562  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1563  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1564  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1565  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1566  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1567  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1568  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1569  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1570  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1571  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1572  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1573  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1574  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1575  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1576  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1577  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1578  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1579  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1580  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1581  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1582  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1583  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1584  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1585  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1586  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1587  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1588  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1589  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1590  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1591  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1592  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1593  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1594  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1595  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1596  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1597  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1598  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1599  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1600  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1601  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1602  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1603  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1604  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1605  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1606  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1607  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1608  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1609  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1610  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1611  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1612  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1613  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1614  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1615  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1616  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1617  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1618  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1619  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1620  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1621  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1622  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1623  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1624  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1625  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1626  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1627  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1628  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1629  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1630  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1631  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1632  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1633  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1634  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1635  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1636  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1637  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1638  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1639  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1640  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1641  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1642  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1643  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1644  
  1645  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1646  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1647  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1648  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1649  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1650  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1651  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1652  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1653  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1654  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1655  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1656  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1657  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1658  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1659  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1660  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1661  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1662  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1663  
  1664  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1665  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1666  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1667  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1668  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1669  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1670  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1671  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1672  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1673  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1674  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1675  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1676  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1677  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1678  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1679  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1680  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1681  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1682  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1683  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1684  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1685  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1686  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1687  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1688  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1689  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1690  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1691  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1692  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1693  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1694  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1695  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1696  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1697  
  1698  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1699  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1700  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1701  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1702  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1703  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1704  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1705  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1706  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1707  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.AEND, nil, 0, [23]uint8{}},
  1710  	{0, nil, 0, [23]uint8{}},
  1711  }
  1712  
  1713  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1714  
  1715  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1716  // This happens on systems like Solaris that call .so functions instead of system calls.
  1717  // It does not seem to be necessary for any other systems. This is probably working
  1718  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1719  // what that bug is. And this does fix it.
  1720  func isextern(s *obj.LSym) bool {
  1721  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1722  	return strings.HasPrefix(s.Name, "libc_")
  1723  }
  1724  
  1725  // single-instruction no-ops of various lengths.
  1726  // constructed by hand and disassembled with gdb to verify.
  1727  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1728  var nop = [][16]uint8{
  1729  	{0x90},
  1730  	{0x66, 0x90},
  1731  	{0x0F, 0x1F, 0x00},
  1732  	{0x0F, 0x1F, 0x40, 0x00},
  1733  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1734  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1735  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1736  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1737  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1738  }
  1739  
  1740  // Native Client rejects the repeated 0x66 prefix.
  1741  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1742  func fillnop(p []byte, n int) {
  1743  	var m int
  1744  
  1745  	for n > 0 {
  1746  		m = n
  1747  		if m > len(nop) {
  1748  			m = len(nop)
  1749  		}
  1750  		copy(p[:m], nop[m-1][:m])
  1751  		p = p[m:]
  1752  		n -= m
  1753  	}
  1754  }
  1755  
  1756  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1757  	s.Grow(int64(c) + int64(pad))
  1758  	fillnop(s.P[c:], int(pad))
  1759  	return c + pad
  1760  }
  1761  
  1762  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1763  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1764  		return l
  1765  	}
  1766  	return q
  1767  }
  1768  
  1769  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1770  	if s.P != nil {
  1771  		return
  1772  	}
  1773  
  1774  	if ycover[0] == 0 {
  1775  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1776  	}
  1777  
  1778  	var asmbuf AsmBuf
  1779  
  1780  	for p := s.Func.Text; p != nil; p = p.Link {
  1781  		if p.To.Type == obj.TYPE_BRANCH {
  1782  			if p.Pcond == nil {
  1783  				p.Pcond = p
  1784  			}
  1785  		}
  1786  		if p.As == AADJSP {
  1787  			p.To.Type = obj.TYPE_REG
  1788  			p.To.Reg = REG_SP
  1789  			v := int32(-p.From.Offset)
  1790  			p.From.Offset = int64(v)
  1791  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1792  			if v < 0 {
  1793  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1794  				v = -v
  1795  				p.From.Offset = int64(v)
  1796  			}
  1797  
  1798  			if v == 0 {
  1799  				p.As = obj.ANOP
  1800  			}
  1801  		}
  1802  	}
  1803  
  1804  	var q *obj.Prog
  1805  	var count int64 // rough count of number of instructions
  1806  	for p := s.Func.Text; p != nil; p = p.Link {
  1807  		count++
  1808  		p.Back = 2 // use short branches first time through
  1809  		q = p.Pcond
  1810  		if q != nil && (q.Back&2 != 0) {
  1811  			p.Back |= 1 // backward jump
  1812  			q.Back |= 4 // loop head
  1813  		}
  1814  
  1815  		if p.As == AADJSP {
  1816  			p.To.Type = obj.TYPE_REG
  1817  			p.To.Reg = REG_SP
  1818  			v := int32(-p.From.Offset)
  1819  			p.From.Offset = int64(v)
  1820  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1821  			if v < 0 {
  1822  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1823  				v = -v
  1824  				p.From.Offset = int64(v)
  1825  			}
  1826  
  1827  			if v == 0 {
  1828  				p.As = obj.ANOP
  1829  			}
  1830  		}
  1831  	}
  1832  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1833  
  1834  	n := 0
  1835  	var c int32
  1836  	errors := ctxt.Errors
  1837  	for {
  1838  		loop := int32(0)
  1839  		for i := range s.R {
  1840  			s.R[i] = obj.Reloc{}
  1841  		}
  1842  		s.R = s.R[:0]
  1843  		s.P = s.P[:0]
  1844  		c = 0
  1845  		for p := s.Func.Text; p != nil; p = p.Link {
  1846  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1847  
  1848  				// pad everything to avoid crossing 32-byte boundary
  1849  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1850  					c = naclpad(ctxt, s, c, -c&31)
  1851  				}
  1852  
  1853  				// pad call deferreturn to start at 32-byte boundary
  1854  				// so that subtracting 5 in jmpdefer will jump back
  1855  				// to that boundary and rerun the call.
  1856  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1857  					c = naclpad(ctxt, s, c, -c&31)
  1858  				}
  1859  
  1860  				// pad call to end at 32-byte boundary
  1861  				if p.As == obj.ACALL {
  1862  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1863  				}
  1864  
  1865  				// the linker treats REP and STOSQ as different instructions
  1866  				// but in fact the REP is a prefix on the STOSQ.
  1867  				// make sure REP has room for 2 more bytes, so that
  1868  				// padding will not be inserted before the next instruction.
  1869  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1870  					c = naclpad(ctxt, s, c, -c&31)
  1871  				}
  1872  
  1873  				// same for LOCK.
  1874  				// various instructions follow; the longest is 4 bytes.
  1875  				// give ourselves 8 bytes so as to avoid surprises.
  1876  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1877  					c = naclpad(ctxt, s, c, -c&31)
  1878  				}
  1879  			}
  1880  
  1881  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1882  				// pad with NOPs
  1883  				v := -c & (LoopAlign - 1)
  1884  
  1885  				if v <= MaxLoopPad {
  1886  					s.Grow(int64(c) + int64(v))
  1887  					fillnop(s.P[c:], int(v))
  1888  					c += v
  1889  				}
  1890  			}
  1891  
  1892  			p.Pc = int64(c)
  1893  
  1894  			// process forward jumps to p
  1895  			for q = p.Rel; q != nil; q = q.Forwd {
  1896  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1897  				if q.Back&2 != 0 { // short
  1898  					if v > 127 {
  1899  						loop++
  1900  						q.Back ^= 2
  1901  					}
  1902  
  1903  					if q.As == AJCXZL || q.As == AXBEGIN {
  1904  						s.P[q.Pc+2] = byte(v)
  1905  					} else {
  1906  						s.P[q.Pc+1] = byte(v)
  1907  					}
  1908  				} else {
  1909  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1910  				}
  1911  			}
  1912  
  1913  			p.Rel = nil
  1914  
  1915  			p.Pc = int64(c)
  1916  			asmbuf.asmins(ctxt, s, p)
  1917  			m := asmbuf.Len()
  1918  			if int(p.Isize) != m {
  1919  				p.Isize = uint8(m)
  1920  				loop++
  1921  			}
  1922  
  1923  			s.Grow(p.Pc + int64(m))
  1924  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1925  			c += int32(m)
  1926  		}
  1927  
  1928  		n++
  1929  		if n > 20 {
  1930  			ctxt.Diag("span must be looping")
  1931  			log.Fatalf("loop")
  1932  		}
  1933  		if loop == 0 {
  1934  			break
  1935  		}
  1936  		if ctxt.Errors > errors {
  1937  			return
  1938  		}
  1939  	}
  1940  
  1941  	if ctxt.Headtype == objabi.Hnacl {
  1942  		c = naclpad(ctxt, s, c, -c&31)
  1943  	}
  1944  
  1945  	s.Size = int64(c)
  1946  
  1947  	if false { /* debug['a'] > 1 */
  1948  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1949  		var i int
  1950  		for i = 0; i < len(s.P); i++ {
  1951  			fmt.Printf(" %.2x", s.P[i])
  1952  			if i%16 == 15 {
  1953  				fmt.Printf("\n  %.6x", uint(i+1))
  1954  			}
  1955  		}
  1956  
  1957  		if i%16 != 0 {
  1958  			fmt.Printf("\n")
  1959  		}
  1960  
  1961  		for i := 0; i < len(s.R); i++ {
  1962  			r := &s.R[i]
  1963  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1964  		}
  1965  	}
  1966  }
  1967  
  1968  func instinit(ctxt *obj.Link) {
  1969  	if ycover[0] != 0 {
  1970  		// Already initialized; stop now.
  1971  		// This happens in the cmd/asm tests,
  1972  		// each of which re-initializes the arch.
  1973  		return
  1974  	}
  1975  
  1976  	switch ctxt.Headtype {
  1977  	case objabi.Hplan9:
  1978  		plan9privates = ctxt.Lookup("_privates")
  1979  	case objabi.Hnacl:
  1980  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  1981  	}
  1982  
  1983  	for i := 1; optab[i].as != 0; i++ {
  1984  		c := optab[i].as
  1985  		if opindex[c&obj.AMask] != nil {
  1986  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  1987  		}
  1988  		opindex[c&obj.AMask] = &optab[i]
  1989  	}
  1990  
  1991  	for i := 0; i < Ymax; i++ {
  1992  		ycover[i*Ymax+i] = 1
  1993  	}
  1994  
  1995  	ycover[Yi0*Ymax+Yi8] = 1
  1996  	ycover[Yi1*Ymax+Yi8] = 1
  1997  	ycover[Yu7*Ymax+Yi8] = 1
  1998  
  1999  	ycover[Yi0*Ymax+Yu7] = 1
  2000  	ycover[Yi1*Ymax+Yu7] = 1
  2001  
  2002  	ycover[Yi0*Ymax+Yu8] = 1
  2003  	ycover[Yi1*Ymax+Yu8] = 1
  2004  	ycover[Yu7*Ymax+Yu8] = 1
  2005  
  2006  	ycover[Yi0*Ymax+Ys32] = 1
  2007  	ycover[Yi1*Ymax+Ys32] = 1
  2008  	ycover[Yu7*Ymax+Ys32] = 1
  2009  	ycover[Yu8*Ymax+Ys32] = 1
  2010  	ycover[Yi8*Ymax+Ys32] = 1
  2011  
  2012  	ycover[Yi0*Ymax+Yi32] = 1
  2013  	ycover[Yi1*Ymax+Yi32] = 1
  2014  	ycover[Yu7*Ymax+Yi32] = 1
  2015  	ycover[Yu8*Ymax+Yi32] = 1
  2016  	ycover[Yi8*Ymax+Yi32] = 1
  2017  	ycover[Ys32*Ymax+Yi32] = 1
  2018  
  2019  	ycover[Yi0*Ymax+Yi64] = 1
  2020  	ycover[Yi1*Ymax+Yi64] = 1
  2021  	ycover[Yu7*Ymax+Yi64] = 1
  2022  	ycover[Yu8*Ymax+Yi64] = 1
  2023  	ycover[Yi8*Ymax+Yi64] = 1
  2024  	ycover[Ys32*Ymax+Yi64] = 1
  2025  	ycover[Yi32*Ymax+Yi64] = 1
  2026  
  2027  	ycover[Yal*Ymax+Yrb] = 1
  2028  	ycover[Ycl*Ymax+Yrb] = 1
  2029  	ycover[Yax*Ymax+Yrb] = 1
  2030  	ycover[Ycx*Ymax+Yrb] = 1
  2031  	ycover[Yrx*Ymax+Yrb] = 1
  2032  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2033  
  2034  	ycover[Ycl*Ymax+Ycx] = 1
  2035  
  2036  	ycover[Yax*Ymax+Yrx] = 1
  2037  	ycover[Ycx*Ymax+Yrx] = 1
  2038  
  2039  	ycover[Yax*Ymax+Yrl] = 1
  2040  	ycover[Ycx*Ymax+Yrl] = 1
  2041  	ycover[Yrx*Ymax+Yrl] = 1
  2042  	ycover[Yrl32*Ymax+Yrl] = 1
  2043  
  2044  	ycover[Yf0*Ymax+Yrf] = 1
  2045  
  2046  	ycover[Yal*Ymax+Ymb] = 1
  2047  	ycover[Ycl*Ymax+Ymb] = 1
  2048  	ycover[Yax*Ymax+Ymb] = 1
  2049  	ycover[Ycx*Ymax+Ymb] = 1
  2050  	ycover[Yrx*Ymax+Ymb] = 1
  2051  	ycover[Yrb*Ymax+Ymb] = 1
  2052  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2053  	ycover[Ym*Ymax+Ymb] = 1
  2054  
  2055  	ycover[Yax*Ymax+Yml] = 1
  2056  	ycover[Ycx*Ymax+Yml] = 1
  2057  	ycover[Yrx*Ymax+Yml] = 1
  2058  	ycover[Yrl*Ymax+Yml] = 1
  2059  	ycover[Yrl32*Ymax+Yml] = 1
  2060  	ycover[Ym*Ymax+Yml] = 1
  2061  
  2062  	ycover[Yax*Ymax+Ymm] = 1
  2063  	ycover[Ycx*Ymax+Ymm] = 1
  2064  	ycover[Yrx*Ymax+Ymm] = 1
  2065  	ycover[Yrl*Ymax+Ymm] = 1
  2066  	ycover[Yrl32*Ymax+Ymm] = 1
  2067  	ycover[Ym*Ymax+Ymm] = 1
  2068  	ycover[Ymr*Ymax+Ymm] = 1
  2069  
  2070  	ycover[Ym*Ymax+Yxm] = 1
  2071  	ycover[Yxr*Ymax+Yxm] = 1
  2072  
  2073  	ycover[Ym*Ymax+Yym] = 1
  2074  	ycover[Yyr*Ymax+Yym] = 1
  2075  
  2076  	for i := 0; i < MAXREG; i++ {
  2077  		reg[i] = -1
  2078  		if i >= REG_AL && i <= REG_R15B {
  2079  			reg[i] = (i - REG_AL) & 7
  2080  			if i >= REG_SPB && i <= REG_DIB {
  2081  				regrex[i] = 0x40
  2082  			}
  2083  			if i >= REG_R8B && i <= REG_R15B {
  2084  				regrex[i] = Rxr | Rxx | Rxb
  2085  			}
  2086  		}
  2087  
  2088  		if i >= REG_AH && i <= REG_BH {
  2089  			reg[i] = 4 + ((i - REG_AH) & 7)
  2090  		}
  2091  		if i >= REG_AX && i <= REG_R15 {
  2092  			reg[i] = (i - REG_AX) & 7
  2093  			if i >= REG_R8 {
  2094  				regrex[i] = Rxr | Rxx | Rxb
  2095  			}
  2096  		}
  2097  
  2098  		if i >= REG_F0 && i <= REG_F0+7 {
  2099  			reg[i] = (i - REG_F0) & 7
  2100  		}
  2101  		if i >= REG_M0 && i <= REG_M0+7 {
  2102  			reg[i] = (i - REG_M0) & 7
  2103  		}
  2104  		if i >= REG_X0 && i <= REG_X0+15 {
  2105  			reg[i] = (i - REG_X0) & 7
  2106  			if i >= REG_X0+8 {
  2107  				regrex[i] = Rxr | Rxx | Rxb
  2108  			}
  2109  		}
  2110  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2111  			reg[i] = (i - REG_Y0) & 7
  2112  			if i >= REG_Y0+8 {
  2113  				regrex[i] = Rxr | Rxx | Rxb
  2114  			}
  2115  		}
  2116  
  2117  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2118  			regrex[i] = Rxr
  2119  		}
  2120  	}
  2121  }
  2122  
  2123  var isAndroid = (objabi.GOOS == "android")
  2124  
  2125  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2126  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2127  		return 0
  2128  	}
  2129  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2130  		switch a.Reg {
  2131  		case REG_CS:
  2132  			return 0x2e
  2133  
  2134  		case REG_DS:
  2135  			return 0x3e
  2136  
  2137  		case REG_ES:
  2138  			return 0x26
  2139  
  2140  		case REG_FS:
  2141  			return 0x64
  2142  
  2143  		case REG_GS:
  2144  			return 0x65
  2145  
  2146  		case REG_TLS:
  2147  			// NOTE: Systems listed here should be only systems that
  2148  			// support direct TLS references like 8(TLS) implemented as
  2149  			// direct references from FS or GS. Systems that require
  2150  			// the initial-exec model, where you load the TLS base into
  2151  			// a register and then index from that register, do not reach
  2152  			// this code and should not be listed.
  2153  			if ctxt.Arch.Family == sys.I386 {
  2154  				switch ctxt.Headtype {
  2155  				default:
  2156  					if isAndroid {
  2157  						return 0x65 // GS
  2158  					}
  2159  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2160  
  2161  				case objabi.Hdarwin,
  2162  					objabi.Hdragonfly,
  2163  					objabi.Hfreebsd,
  2164  					objabi.Hnetbsd,
  2165  					objabi.Hopenbsd:
  2166  					return 0x65 // GS
  2167  				}
  2168  			}
  2169  
  2170  			switch ctxt.Headtype {
  2171  			default:
  2172  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2173  
  2174  			case objabi.Hlinux:
  2175  				if isAndroid {
  2176  					return 0x64 // FS
  2177  				}
  2178  
  2179  				if ctxt.Flag_shared {
  2180  					log.Fatalf("unknown TLS base register for linux with -shared")
  2181  				} else {
  2182  					return 0x64 // FS
  2183  				}
  2184  
  2185  			case objabi.Hdragonfly,
  2186  				objabi.Hfreebsd,
  2187  				objabi.Hnetbsd,
  2188  				objabi.Hopenbsd,
  2189  				objabi.Hsolaris:
  2190  				return 0x64 // FS
  2191  
  2192  			case objabi.Hdarwin:
  2193  				return 0x65 // GS
  2194  			}
  2195  		}
  2196  	}
  2197  
  2198  	if ctxt.Arch.Family == sys.I386 {
  2199  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2200  			// When building for inclusion into a shared library, an instruction of the form
  2201  			//     MOVL 0(CX)(TLS*1), AX
  2202  			// becomes
  2203  			//     mov %gs:(%ecx), %eax
  2204  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2205  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2206  			// a shared library the instruction it becomes
  2207  			//     mov 0x0(%ecx), $eax
  2208  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2209  			if a.Offset != 0 {
  2210  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2211  			}
  2212  			return 0x65 // GS
  2213  		}
  2214  		return 0
  2215  	}
  2216  
  2217  	switch a.Index {
  2218  	case REG_CS:
  2219  		return 0x2e
  2220  
  2221  	case REG_DS:
  2222  		return 0x3e
  2223  
  2224  	case REG_ES:
  2225  		return 0x26
  2226  
  2227  	case REG_TLS:
  2228  		if ctxt.Flag_shared {
  2229  			// When building for inclusion into a shared library, an instruction of the form
  2230  			//     MOV 0(CX)(TLS*1), AX
  2231  			// becomes
  2232  			//     mov %fs:(%rcx), %rax
  2233  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2234  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2235  			// a shared library the instruction does not require a prefix.
  2236  			if a.Offset != 0 {
  2237  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2238  			}
  2239  			return 0x64
  2240  		}
  2241  
  2242  	case REG_FS:
  2243  		return 0x64
  2244  
  2245  	case REG_GS:
  2246  		return 0x65
  2247  	}
  2248  
  2249  	return 0
  2250  }
  2251  
  2252  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2253  	switch a.Type {
  2254  	case obj.TYPE_NONE:
  2255  		return Ynone
  2256  
  2257  	case obj.TYPE_BRANCH:
  2258  		return Ybr
  2259  
  2260  	case obj.TYPE_INDIR:
  2261  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2262  			return Yindir
  2263  		}
  2264  		return Yxxx
  2265  
  2266  	case obj.TYPE_MEM:
  2267  		if a.Index == REG_SP {
  2268  			// Can't use SP as the index register
  2269  			return Yxxx
  2270  		}
  2271  		if ctxt.Arch.Family == sys.AMD64 {
  2272  			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
  2273  			// where the sign extension doesn't matter).
  2274  			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
  2275  			if !(a.Offset == int64(int32(a.Offset)) ||
  2276  				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
  2277  				return Yxxx
  2278  			}
  2279  			switch a.Name {
  2280  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2281  				// Global variables can't use index registers and their
  2282  				// base register is %rip (%rip is encoded as REG_NONE).
  2283  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2284  					return Yxxx
  2285  				}
  2286  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2287  				// These names must have a base of SP.  The old compiler
  2288  				// uses 0 for the base register. SSA uses REG_SP.
  2289  				if a.Reg != REG_SP && a.Reg != 0 {
  2290  					return Yxxx
  2291  				}
  2292  			case obj.NAME_NONE:
  2293  				// everything is ok
  2294  			default:
  2295  				// unknown name
  2296  				return Yxxx
  2297  			}
  2298  		}
  2299  		return Ym
  2300  
  2301  	case obj.TYPE_ADDR:
  2302  		switch a.Name {
  2303  		case obj.NAME_GOTREF:
  2304  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2305  			return Yxxx
  2306  
  2307  		case obj.NAME_EXTERN,
  2308  			obj.NAME_STATIC:
  2309  			if a.Sym != nil && isextern(a.Sym) || (ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared) {
  2310  				return Yi32
  2311  			}
  2312  			return Yiauto // use pc-relative addressing
  2313  
  2314  		case obj.NAME_AUTO,
  2315  			obj.NAME_PARAM:
  2316  			return Yiauto
  2317  		}
  2318  
  2319  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2320  		// and got Yi32 in an earlier version of this code.
  2321  		// Keep doing that until we fix yduff etc.
  2322  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2323  			return Yi32
  2324  		}
  2325  
  2326  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2327  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2328  		}
  2329  		fallthrough
  2330  
  2331  		// fall through
  2332  
  2333  	case obj.TYPE_CONST:
  2334  		if a.Sym != nil {
  2335  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2336  		}
  2337  
  2338  		v := a.Offset
  2339  		if ctxt.Arch.Family == sys.I386 {
  2340  			v = int64(int32(v))
  2341  		}
  2342  		if v == 0 {
  2343  			if p.Mark&PRESERVEFLAGS != 0 {
  2344  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2345  				return Yu7
  2346  			}
  2347  			return Yi0
  2348  		}
  2349  		if v == 1 {
  2350  			return Yi1
  2351  		}
  2352  		if v >= 0 && v <= 127 {
  2353  			return Yu7
  2354  		}
  2355  		if v >= 0 && v <= 255 {
  2356  			return Yu8
  2357  		}
  2358  		if v >= -128 && v <= 127 {
  2359  			return Yi8
  2360  		}
  2361  		if ctxt.Arch.Family == sys.I386 {
  2362  			return Yi32
  2363  		}
  2364  		l := int32(v)
  2365  		if int64(l) == v {
  2366  			return Ys32 /* can sign extend */
  2367  		}
  2368  		if v>>32 == 0 {
  2369  			return Yi32 /* unsigned */
  2370  		}
  2371  		return Yi64
  2372  
  2373  	case obj.TYPE_TEXTSIZE:
  2374  		return Ytextsize
  2375  	}
  2376  
  2377  	if a.Type != obj.TYPE_REG {
  2378  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2379  		return Yxxx
  2380  	}
  2381  
  2382  	switch a.Reg {
  2383  	case REG_AL:
  2384  		return Yal
  2385  
  2386  	case REG_AX:
  2387  		return Yax
  2388  
  2389  		/*
  2390  			case REG_SPB:
  2391  		*/
  2392  	case REG_BPB,
  2393  		REG_SIB,
  2394  		REG_DIB,
  2395  		REG_R8B,
  2396  		REG_R9B,
  2397  		REG_R10B,
  2398  		REG_R11B,
  2399  		REG_R12B,
  2400  		REG_R13B,
  2401  		REG_R14B,
  2402  		REG_R15B:
  2403  		if ctxt.Arch.Family == sys.I386 {
  2404  			return Yxxx
  2405  		}
  2406  		fallthrough
  2407  
  2408  	case REG_DL,
  2409  		REG_BL,
  2410  		REG_AH,
  2411  		REG_CH,
  2412  		REG_DH,
  2413  		REG_BH:
  2414  		return Yrb
  2415  
  2416  	case REG_CL:
  2417  		return Ycl
  2418  
  2419  	case REG_CX:
  2420  		return Ycx
  2421  
  2422  	case REG_DX, REG_BX:
  2423  		return Yrx
  2424  
  2425  	case REG_R8, /* not really Yrl */
  2426  		REG_R9,
  2427  		REG_R10,
  2428  		REG_R11,
  2429  		REG_R12,
  2430  		REG_R13,
  2431  		REG_R14,
  2432  		REG_R15:
  2433  		if ctxt.Arch.Family == sys.I386 {
  2434  			return Yxxx
  2435  		}
  2436  		fallthrough
  2437  
  2438  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2439  		if ctxt.Arch.Family == sys.I386 {
  2440  			return Yrl32
  2441  		}
  2442  		return Yrl
  2443  
  2444  	case REG_F0 + 0:
  2445  		return Yf0
  2446  
  2447  	case REG_F0 + 1,
  2448  		REG_F0 + 2,
  2449  		REG_F0 + 3,
  2450  		REG_F0 + 4,
  2451  		REG_F0 + 5,
  2452  		REG_F0 + 6,
  2453  		REG_F0 + 7:
  2454  		return Yrf
  2455  
  2456  	case REG_M0 + 0,
  2457  		REG_M0 + 1,
  2458  		REG_M0 + 2,
  2459  		REG_M0 + 3,
  2460  		REG_M0 + 4,
  2461  		REG_M0 + 5,
  2462  		REG_M0 + 6,
  2463  		REG_M0 + 7:
  2464  		return Ymr
  2465  
  2466  	case REG_X0 + 0,
  2467  		REG_X0 + 1,
  2468  		REG_X0 + 2,
  2469  		REG_X0 + 3,
  2470  		REG_X0 + 4,
  2471  		REG_X0 + 5,
  2472  		REG_X0 + 6,
  2473  		REG_X0 + 7,
  2474  		REG_X0 + 8,
  2475  		REG_X0 + 9,
  2476  		REG_X0 + 10,
  2477  		REG_X0 + 11,
  2478  		REG_X0 + 12,
  2479  		REG_X0 + 13,
  2480  		REG_X0 + 14,
  2481  		REG_X0 + 15:
  2482  		return Yxr
  2483  
  2484  	case REG_Y0 + 0,
  2485  		REG_Y0 + 1,
  2486  		REG_Y0 + 2,
  2487  		REG_Y0 + 3,
  2488  		REG_Y0 + 4,
  2489  		REG_Y0 + 5,
  2490  		REG_Y0 + 6,
  2491  		REG_Y0 + 7,
  2492  		REG_Y0 + 8,
  2493  		REG_Y0 + 9,
  2494  		REG_Y0 + 10,
  2495  		REG_Y0 + 11,
  2496  		REG_Y0 + 12,
  2497  		REG_Y0 + 13,
  2498  		REG_Y0 + 14,
  2499  		REG_Y0 + 15:
  2500  		return Yyr
  2501  
  2502  	case REG_CS:
  2503  		return Ycs
  2504  	case REG_SS:
  2505  		return Yss
  2506  	case REG_DS:
  2507  		return Yds
  2508  	case REG_ES:
  2509  		return Yes
  2510  	case REG_FS:
  2511  		return Yfs
  2512  	case REG_GS:
  2513  		return Ygs
  2514  	case REG_TLS:
  2515  		return Ytls
  2516  
  2517  	case REG_GDTR:
  2518  		return Ygdtr
  2519  	case REG_IDTR:
  2520  		return Yidtr
  2521  	case REG_LDTR:
  2522  		return Yldtr
  2523  	case REG_MSW:
  2524  		return Ymsw
  2525  	case REG_TASK:
  2526  		return Ytask
  2527  
  2528  	case REG_CR + 0:
  2529  		return Ycr0
  2530  	case REG_CR + 1:
  2531  		return Ycr1
  2532  	case REG_CR + 2:
  2533  		return Ycr2
  2534  	case REG_CR + 3:
  2535  		return Ycr3
  2536  	case REG_CR + 4:
  2537  		return Ycr4
  2538  	case REG_CR + 5:
  2539  		return Ycr5
  2540  	case REG_CR + 6:
  2541  		return Ycr6
  2542  	case REG_CR + 7:
  2543  		return Ycr7
  2544  	case REG_CR + 8:
  2545  		return Ycr8
  2546  
  2547  	case REG_DR + 0:
  2548  		return Ydr0
  2549  	case REG_DR + 1:
  2550  		return Ydr1
  2551  	case REG_DR + 2:
  2552  		return Ydr2
  2553  	case REG_DR + 3:
  2554  		return Ydr3
  2555  	case REG_DR + 4:
  2556  		return Ydr4
  2557  	case REG_DR + 5:
  2558  		return Ydr5
  2559  	case REG_DR + 6:
  2560  		return Ydr6
  2561  	case REG_DR + 7:
  2562  		return Ydr7
  2563  
  2564  	case REG_TR + 0:
  2565  		return Ytr0
  2566  	case REG_TR + 1:
  2567  		return Ytr1
  2568  	case REG_TR + 2:
  2569  		return Ytr2
  2570  	case REG_TR + 3:
  2571  		return Ytr3
  2572  	case REG_TR + 4:
  2573  		return Ytr4
  2574  	case REG_TR + 5:
  2575  		return Ytr5
  2576  	case REG_TR + 6:
  2577  		return Ytr6
  2578  	case REG_TR + 7:
  2579  		return Ytr7
  2580  	}
  2581  
  2582  	return Yxxx
  2583  }
  2584  
  2585  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2586  // and hold assembly state.
  2587  type AsmBuf struct {
  2588  	buf     [100]byte
  2589  	off     int
  2590  	rexflag int
  2591  	vexflag int
  2592  	rep     int
  2593  	repn    int
  2594  	lock    bool
  2595  }
  2596  
  2597  // Put1 appends one byte to the end of the buffer.
  2598  func (a *AsmBuf) Put1(x byte) {
  2599  	a.buf[a.off] = x
  2600  	a.off++
  2601  }
  2602  
  2603  // Put2 appends two bytes to the end of the buffer.
  2604  func (a *AsmBuf) Put2(x, y byte) {
  2605  	a.buf[a.off+0] = x
  2606  	a.buf[a.off+1] = y
  2607  	a.off += 2
  2608  }
  2609  
  2610  // Put3 appends three bytes to the end of the buffer.
  2611  func (a *AsmBuf) Put3(x, y, z byte) {
  2612  	a.buf[a.off+0] = x
  2613  	a.buf[a.off+1] = y
  2614  	a.buf[a.off+2] = z
  2615  	a.off += 3
  2616  }
  2617  
  2618  // Put4 appends four bytes to the end of the buffer.
  2619  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2620  	a.buf[a.off+0] = x
  2621  	a.buf[a.off+1] = y
  2622  	a.buf[a.off+2] = z
  2623  	a.buf[a.off+3] = w
  2624  	a.off += 4
  2625  }
  2626  
  2627  // PutInt16 writes v into the buffer using little-endian encoding.
  2628  func (a *AsmBuf) PutInt16(v int16) {
  2629  	a.buf[a.off+0] = byte(v)
  2630  	a.buf[a.off+1] = byte(v >> 8)
  2631  	a.off += 2
  2632  }
  2633  
  2634  // PutInt32 writes v into the buffer using little-endian encoding.
  2635  func (a *AsmBuf) PutInt32(v int32) {
  2636  	a.buf[a.off+0] = byte(v)
  2637  	a.buf[a.off+1] = byte(v >> 8)
  2638  	a.buf[a.off+2] = byte(v >> 16)
  2639  	a.buf[a.off+3] = byte(v >> 24)
  2640  	a.off += 4
  2641  }
  2642  
  2643  // PutInt64 writes v into the buffer using little-endian encoding.
  2644  func (a *AsmBuf) PutInt64(v int64) {
  2645  	a.buf[a.off+0] = byte(v)
  2646  	a.buf[a.off+1] = byte(v >> 8)
  2647  	a.buf[a.off+2] = byte(v >> 16)
  2648  	a.buf[a.off+3] = byte(v >> 24)
  2649  	a.buf[a.off+4] = byte(v >> 32)
  2650  	a.buf[a.off+5] = byte(v >> 40)
  2651  	a.buf[a.off+6] = byte(v >> 48)
  2652  	a.buf[a.off+7] = byte(v >> 56)
  2653  	a.off += 8
  2654  }
  2655  
  2656  // Put copies b into the buffer.
  2657  func (a *AsmBuf) Put(b []byte) {
  2658  	copy(a.buf[a.off:], b)
  2659  	a.off += len(b)
  2660  }
  2661  
  2662  // Insert inserts b at offset i.
  2663  func (a *AsmBuf) Insert(i int, b byte) {
  2664  	a.off++
  2665  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2666  	a.buf[i] = b
  2667  }
  2668  
  2669  // Last returns the byte at the end of the buffer.
  2670  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2671  
  2672  // Len returns the length of the buffer.
  2673  func (a *AsmBuf) Len() int { return a.off }
  2674  
  2675  // Bytes returns the contents of the buffer.
  2676  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2677  
  2678  // Reset empties the buffer.
  2679  func (a *AsmBuf) Reset() { a.off = 0 }
  2680  
  2681  // At returns the byte at offset i.
  2682  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2683  
  2684  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2685  	var i int
  2686  
  2687  	switch index {
  2688  	default:
  2689  		goto bad
  2690  
  2691  	case REG_NONE:
  2692  		i = 4 << 3
  2693  		goto bas
  2694  
  2695  	case REG_R8,
  2696  		REG_R9,
  2697  		REG_R10,
  2698  		REG_R11,
  2699  		REG_R12,
  2700  		REG_R13,
  2701  		REG_R14,
  2702  		REG_R15:
  2703  		if ctxt.Arch.Family == sys.I386 {
  2704  			goto bad
  2705  		}
  2706  		fallthrough
  2707  
  2708  	case REG_AX,
  2709  		REG_CX,
  2710  		REG_DX,
  2711  		REG_BX,
  2712  		REG_BP,
  2713  		REG_SI,
  2714  		REG_DI:
  2715  		i = reg[index] << 3
  2716  	}
  2717  
  2718  	switch scale {
  2719  	default:
  2720  		goto bad
  2721  
  2722  	case 1:
  2723  		break
  2724  
  2725  	case 2:
  2726  		i |= 1 << 6
  2727  
  2728  	case 4:
  2729  		i |= 2 << 6
  2730  
  2731  	case 8:
  2732  		i |= 3 << 6
  2733  	}
  2734  
  2735  bas:
  2736  	switch base {
  2737  	default:
  2738  		goto bad
  2739  
  2740  	case REG_NONE: /* must be mod=00 */
  2741  		i |= 5
  2742  
  2743  	case REG_R8,
  2744  		REG_R9,
  2745  		REG_R10,
  2746  		REG_R11,
  2747  		REG_R12,
  2748  		REG_R13,
  2749  		REG_R14,
  2750  		REG_R15:
  2751  		if ctxt.Arch.Family == sys.I386 {
  2752  			goto bad
  2753  		}
  2754  		fallthrough
  2755  
  2756  	case REG_AX,
  2757  		REG_CX,
  2758  		REG_DX,
  2759  		REG_BX,
  2760  		REG_SP,
  2761  		REG_BP,
  2762  		REG_SI,
  2763  		REG_DI:
  2764  		i |= reg[base]
  2765  	}
  2766  
  2767  	asmbuf.Put1(byte(i))
  2768  	return
  2769  
  2770  bad:
  2771  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2772  	asmbuf.Put1(0)
  2773  	return
  2774  }
  2775  
  2776  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2777  	var rel obj.Reloc
  2778  
  2779  	v := vaddr(ctxt, p, a, &rel)
  2780  	if rel.Siz != 0 {
  2781  		if rel.Siz != 4 {
  2782  			ctxt.Diag("bad reloc")
  2783  		}
  2784  		r := obj.Addrel(cursym)
  2785  		*r = rel
  2786  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2787  	}
  2788  
  2789  	asmbuf.PutInt32(int32(v))
  2790  }
  2791  
  2792  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2793  	if r != nil {
  2794  		*r = obj.Reloc{}
  2795  	}
  2796  
  2797  	switch a.Name {
  2798  	case obj.NAME_STATIC,
  2799  		obj.NAME_GOTREF,
  2800  		obj.NAME_EXTERN:
  2801  		s := a.Sym
  2802  		if r == nil {
  2803  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2804  			log.Fatalf("reloc")
  2805  		}
  2806  
  2807  		if a.Name == obj.NAME_GOTREF {
  2808  			r.Siz = 4
  2809  			r.Type = objabi.R_GOTPCREL
  2810  		} else if isextern(s) || (ctxt.Arch.Family != sys.AMD64 && !ctxt.Flag_shared) {
  2811  			r.Siz = 4
  2812  			r.Type = objabi.R_ADDR
  2813  		} else {
  2814  			r.Siz = 4
  2815  			r.Type = objabi.R_PCREL
  2816  		}
  2817  
  2818  		r.Off = -1 // caller must fill in
  2819  		r.Sym = s
  2820  		r.Add = a.Offset
  2821  
  2822  		return 0
  2823  	}
  2824  
  2825  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2826  		if r == nil {
  2827  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2828  			log.Fatalf("reloc")
  2829  		}
  2830  
  2831  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2832  			r.Type = objabi.R_TLS_LE
  2833  			r.Siz = 4
  2834  			r.Off = -1 // caller must fill in
  2835  			r.Add = a.Offset
  2836  		}
  2837  		return 0
  2838  	}
  2839  
  2840  	return a.Offset
  2841  }
  2842  
  2843  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2844  	var base int
  2845  	var rel obj.Reloc
  2846  
  2847  	rex &= 0x40 | Rxr
  2848  	switch {
  2849  	case int64(int32(a.Offset)) == a.Offset:
  2850  		// Offset fits in sign-extended 32 bits.
  2851  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2852  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2853  		// This is allowed for assembly that wants to use 32-bit hex
  2854  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2855  	default:
  2856  		ctxt.Diag("offset too large in %s", p)
  2857  	}
  2858  	v := int32(a.Offset)
  2859  	rel.Siz = 0
  2860  
  2861  	switch a.Type {
  2862  	case obj.TYPE_ADDR:
  2863  		if a.Name == obj.NAME_NONE {
  2864  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2865  		}
  2866  		if a.Index == REG_TLS {
  2867  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2868  		}
  2869  		goto bad
  2870  
  2871  	case obj.TYPE_REG:
  2872  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2873  			goto bad
  2874  		}
  2875  		if v != 0 {
  2876  			goto bad
  2877  		}
  2878  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2879  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2880  		return
  2881  	}
  2882  
  2883  	if a.Type != obj.TYPE_MEM {
  2884  		goto bad
  2885  	}
  2886  
  2887  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2888  		base := int(a.Reg)
  2889  		switch a.Name {
  2890  		case obj.NAME_EXTERN,
  2891  			obj.NAME_GOTREF,
  2892  			obj.NAME_STATIC:
  2893  			if !isextern(a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2894  				goto bad
  2895  			}
  2896  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2897  				// The base register has already been set. It holds the PC
  2898  				// of this instruction returned by a PC-reading thunk.
  2899  				// See obj6.go:rewriteToPcrel.
  2900  			} else {
  2901  				base = REG_NONE
  2902  			}
  2903  			v = int32(vaddr(ctxt, p, a, &rel))
  2904  
  2905  		case obj.NAME_AUTO,
  2906  			obj.NAME_PARAM:
  2907  			base = REG_SP
  2908  		}
  2909  
  2910  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2911  		if base == REG_NONE {
  2912  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2913  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2914  			goto putrelv
  2915  		}
  2916  
  2917  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2918  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2919  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2920  			return
  2921  		}
  2922  
  2923  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2924  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2925  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2926  			asmbuf.Put1(byte(v))
  2927  			return
  2928  		}
  2929  
  2930  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2931  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2932  		goto putrelv
  2933  	}
  2934  
  2935  	base = int(a.Reg)
  2936  	switch a.Name {
  2937  	case obj.NAME_STATIC,
  2938  		obj.NAME_GOTREF,
  2939  		obj.NAME_EXTERN:
  2940  		if a.Sym == nil {
  2941  			ctxt.Diag("bad addr: %v", p)
  2942  		}
  2943  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2944  			// The base register has already been set. It holds the PC
  2945  			// of this instruction returned by a PC-reading thunk.
  2946  			// See obj6.go:rewriteToPcrel.
  2947  		} else {
  2948  			base = REG_NONE
  2949  		}
  2950  		v = int32(vaddr(ctxt, p, a, &rel))
  2951  
  2952  	case obj.NAME_AUTO,
  2953  		obj.NAME_PARAM:
  2954  		base = REG_SP
  2955  	}
  2956  
  2957  	if base == REG_TLS {
  2958  		v = int32(vaddr(ctxt, p, a, &rel))
  2959  	}
  2960  
  2961  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  2962  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2963  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  2964  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2965  				ctxt.Diag("%v has offset against gotref", p)
  2966  			}
  2967  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2968  			goto putrelv
  2969  		}
  2970  
  2971  		// temporary
  2972  		asmbuf.Put2(
  2973  			byte(0<<6|4<<0|r<<3), // sib present
  2974  			0<<6|4<<3|5<<0,       // DS:d32
  2975  		)
  2976  		goto putrelv
  2977  	}
  2978  
  2979  	if base == REG_SP || base == REG_R12 {
  2980  		if v == 0 {
  2981  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2982  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2983  			return
  2984  		}
  2985  
  2986  		if v >= -128 && v < 128 {
  2987  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2988  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2989  			asmbuf.Put1(byte(v))
  2990  			return
  2991  		}
  2992  
  2993  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2994  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2995  		goto putrelv
  2996  	}
  2997  
  2998  	if REG_AX <= base && base <= REG_R15 {
  2999  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3000  			rel = obj.Reloc{}
  3001  			rel.Type = objabi.R_TLS_LE
  3002  			rel.Siz = 4
  3003  			rel.Sym = nil
  3004  			rel.Add = int64(v)
  3005  			v = 0
  3006  		}
  3007  
  3008  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3009  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3010  			return
  3011  		}
  3012  
  3013  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3014  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3015  			return
  3016  		}
  3017  
  3018  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3019  		goto putrelv
  3020  	}
  3021  
  3022  	goto bad
  3023  
  3024  putrelv:
  3025  	if rel.Siz != 0 {
  3026  		if rel.Siz != 4 {
  3027  			ctxt.Diag("bad rel")
  3028  			goto bad
  3029  		}
  3030  
  3031  		r := obj.Addrel(cursym)
  3032  		*r = rel
  3033  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3034  	}
  3035  
  3036  	asmbuf.PutInt32(v)
  3037  	return
  3038  
  3039  bad:
  3040  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3041  	return
  3042  }
  3043  
  3044  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3045  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3046  }
  3047  
  3048  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3049  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3050  }
  3051  
  3052  func bytereg(a *obj.Addr, t *uint8) {
  3053  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3054  		a.Reg += REG_AL - REG_AX
  3055  		*t = 0
  3056  	}
  3057  }
  3058  
  3059  func unbytereg(a *obj.Addr, t *uint8) {
  3060  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3061  		a.Reg += REG_AX - REG_AL
  3062  		*t = 0
  3063  	}
  3064  }
  3065  
  3066  const (
  3067  	E = 0xff
  3068  )
  3069  
  3070  var ymovtab = []Movtab{
  3071  	/* push */
  3072  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3073  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3074  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3075  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3076  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3077  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3078  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3079  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3080  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3081  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3082  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3083  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3084  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3085  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3086  
  3087  	/* pop */
  3088  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3089  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3090  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3091  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3092  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3093  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3094  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3095  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3096  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3097  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3098  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3099  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3100  
  3101  	/* mov seg */
  3102  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3103  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3104  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3105  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3106  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3107  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3108  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3109  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3110  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3111  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3112  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3113  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3114  
  3115  	/* mov cr */
  3116  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3117  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3118  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3119  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3120  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3121  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3122  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3123  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3124  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3125  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3126  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3127  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3128  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3129  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3130  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3131  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3132  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3133  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3134  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3135  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3136  
  3137  	/* mov dr */
  3138  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3139  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3140  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3141  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3142  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3143  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3144  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3145  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3146  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3147  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3148  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3149  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3150  
  3151  	/* mov tr */
  3152  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3153  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3154  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3155  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3156  
  3157  	/* lgdt, sgdt, lidt, sidt */
  3158  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3159  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3160  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3161  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3162  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3163  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3164  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3165  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3166  
  3167  	/* lldt, sldt */
  3168  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3169  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3170  
  3171  	/* lmsw, smsw */
  3172  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3173  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3174  
  3175  	/* ltr, str */
  3176  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3177  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3178  
  3179  	/* load full pointer - unsupported
  3180  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3181  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3182  	*/
  3183  
  3184  	/* double shift */
  3185  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3186  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3187  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3188  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3189  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3190  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3191  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3192  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3193  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3194  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3195  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3196  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3197  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3198  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3199  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3200  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3201  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3202  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3203  
  3204  	/* load TLS base */
  3205  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3206  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3207  	{0, 0, 0, 0, 0, [4]uint8{}},
  3208  }
  3209  
  3210  func isax(a *obj.Addr) bool {
  3211  	switch a.Reg {
  3212  	case REG_AX, REG_AL, REG_AH:
  3213  		return true
  3214  	}
  3215  
  3216  	if a.Index == REG_AX {
  3217  		return true
  3218  	}
  3219  	return false
  3220  }
  3221  
  3222  func subreg(p *obj.Prog, from int, to int) {
  3223  	if false { /* debug['Q'] */
  3224  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3225  	}
  3226  
  3227  	if int(p.From.Reg) == from {
  3228  		p.From.Reg = int16(to)
  3229  		p.Ft = 0
  3230  	}
  3231  
  3232  	if int(p.To.Reg) == from {
  3233  		p.To.Reg = int16(to)
  3234  		p.Tt = 0
  3235  	}
  3236  
  3237  	if int(p.From.Index) == from {
  3238  		p.From.Index = int16(to)
  3239  		p.Ft = 0
  3240  	}
  3241  
  3242  	if int(p.To.Index) == from {
  3243  		p.To.Index = int16(to)
  3244  		p.Tt = 0
  3245  	}
  3246  
  3247  	if false { /* debug['Q'] */
  3248  		fmt.Printf("%v\n", p)
  3249  	}
  3250  }
  3251  
  3252  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3253  	switch op {
  3254  	case Pm, Pe, Pf2, Pf3:
  3255  		if osize != 1 {
  3256  			if op != Pm {
  3257  				asmbuf.Put1(byte(op))
  3258  			}
  3259  			asmbuf.Put1(Pm)
  3260  			z++
  3261  			op = int(o.op[z])
  3262  			break
  3263  		}
  3264  		fallthrough
  3265  
  3266  	default:
  3267  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3268  			asmbuf.Put1(Pm)
  3269  		}
  3270  	}
  3271  
  3272  	asmbuf.Put1(byte(op))
  3273  	return z
  3274  }
  3275  
  3276  var bpduff1 = []byte{
  3277  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3278  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3279  }
  3280  
  3281  var bpduff2 = []byte{
  3282  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3283  }
  3284  
  3285  // Emit VEX prefix and opcode byte.
  3286  // The three addresses are the r/m, vvvv, and reg fields.
  3287  // The reg and rm arguments appear in the same order as the
  3288  // arguments to asmand, which typically follows the call to asmvex.
  3289  // The final two arguments are the VEX prefix (see encoding above)
  3290  // and the opcode byte.
  3291  // For details about vex prefix see:
  3292  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3293  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3294  	asmbuf.vexflag = 1
  3295  	rexR := 0
  3296  	if r != nil {
  3297  		rexR = regrex[r.Reg] & Rxr
  3298  	}
  3299  	rexB := 0
  3300  	rexX := 0
  3301  	if rm != nil {
  3302  		rexB = regrex[rm.Reg] & Rxb
  3303  		rexX = regrex[rm.Index] & Rxx
  3304  	}
  3305  	vexM := (vex >> 3) & 0xF
  3306  	vexWLP := vex & 0x87
  3307  	vexV := byte(0)
  3308  	if v != nil {
  3309  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3310  	}
  3311  	vexV ^= 0xF
  3312  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3313  		// Can use 2-byte encoding.
  3314  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3315  	} else {
  3316  		// Must use 3-byte encoding.
  3317  		asmbuf.Put3(0xc4,
  3318  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3319  			vexV<<3|vexWLP,
  3320  		)
  3321  	}
  3322  	asmbuf.Put1(opcode)
  3323  }
  3324  
  3325  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3326  	o := opindex[p.As&obj.AMask]
  3327  
  3328  	if o == nil {
  3329  		ctxt.Diag("asmins: missing op %v", p)
  3330  		return
  3331  	}
  3332  
  3333  	pre := prefixof(ctxt, p, &p.From)
  3334  	if pre != 0 {
  3335  		asmbuf.Put1(byte(pre))
  3336  	}
  3337  	pre = prefixof(ctxt, p, &p.To)
  3338  	if pre != 0 {
  3339  		asmbuf.Put1(byte(pre))
  3340  	}
  3341  
  3342  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3343  	// which encodes as SHRQ $32(DX*0), AX.
  3344  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3345  	// Change encoding generated by assemblers and compilers and remove.
  3346  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3347  		p.From3 = new(obj.Addr)
  3348  		p.From3.Type = obj.TYPE_REG
  3349  		p.From3.Reg = p.From.Index
  3350  		p.From.Index = 0
  3351  	}
  3352  
  3353  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3354  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3355  	switch p.As {
  3356  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3357  		if p.From3Type() == obj.TYPE_NONE {
  3358  			p.From3 = new(obj.Addr)
  3359  			*p.From3 = p.From
  3360  			p.From = obj.Addr{}
  3361  			p.From.Type = obj.TYPE_CONST
  3362  			p.From.Offset = p.To.Offset
  3363  			p.To.Offset = 0
  3364  		}
  3365  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3366  		if p.From3Type() == obj.TYPE_NONE {
  3367  			p.From3 = new(obj.Addr)
  3368  			*p.From3 = p.To
  3369  			p.To = obj.Addr{}
  3370  			p.To.Type = obj.TYPE_CONST
  3371  			p.To.Offset = p.From3.Offset
  3372  			p.From3.Offset = 0
  3373  		}
  3374  	}
  3375  
  3376  	if p.Ft == 0 {
  3377  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3378  	}
  3379  	if p.Tt == 0 {
  3380  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3381  	}
  3382  
  3383  	ft := int(p.Ft) * Ymax
  3384  	f3t := Ynone * Ymax
  3385  	if p.From3 != nil {
  3386  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3387  	}
  3388  	tt := int(p.Tt) * Ymax
  3389  
  3390  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3391  	z := 0
  3392  	var a *obj.Addr
  3393  	var l int
  3394  	var op int
  3395  	var q *obj.Prog
  3396  	var r *obj.Reloc
  3397  	var rel obj.Reloc
  3398  	var v int64
  3399  	for i := range o.ytab {
  3400  		yt := &o.ytab[i]
  3401  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3402  			switch o.prefix {
  3403  			case Px1: /* first option valid only in 32-bit mode */
  3404  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3405  					z += int(yt.zoffset) + xo
  3406  					continue
  3407  				}
  3408  			case Pq: /* 16 bit escape and opcode escape */
  3409  				asmbuf.Put2(Pe, Pm)
  3410  
  3411  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3412  				asmbuf.rexflag |= Pw
  3413  				asmbuf.Put2(Pe, Pm)
  3414  
  3415  			case Pq4: /*  66 0F 38 */
  3416  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3417  
  3418  			case Pf2, /* xmm opcode escape */
  3419  				Pf3:
  3420  				asmbuf.Put2(o.prefix, Pm)
  3421  
  3422  			case Pef3:
  3423  				asmbuf.Put3(Pe, Pf3, Pm)
  3424  
  3425  			case Pfw: /* xmm opcode escape + REX.W */
  3426  				asmbuf.rexflag |= Pw
  3427  				asmbuf.Put2(Pf3, Pm)
  3428  
  3429  			case Pm: /* opcode escape */
  3430  				asmbuf.Put1(Pm)
  3431  
  3432  			case Pe: /* 16 bit escape */
  3433  				asmbuf.Put1(Pe)
  3434  
  3435  			case Pw: /* 64-bit escape */
  3436  				if ctxt.Arch.Family != sys.AMD64 {
  3437  					ctxt.Diag("asmins: illegal 64: %v", p)
  3438  				}
  3439  				asmbuf.rexflag |= Pw
  3440  
  3441  			case Pw8: /* 64-bit escape if z >= 8 */
  3442  				if z >= 8 {
  3443  					if ctxt.Arch.Family != sys.AMD64 {
  3444  						ctxt.Diag("asmins: illegal 64: %v", p)
  3445  					}
  3446  					asmbuf.rexflag |= Pw
  3447  				}
  3448  
  3449  			case Pb: /* botch */
  3450  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3451  					goto bad
  3452  				}
  3453  				// NOTE(rsc): This is probably safe to do always,
  3454  				// but when enabled it chooses different encodings
  3455  				// than the old cmd/internal/obj/i386 code did,
  3456  				// which breaks our "same bits out" checks.
  3457  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3458  				// in the original obj/i386, and it would encode
  3459  				// (using a valid, shorter form) as 3c 00 if we enabled
  3460  				// the call to bytereg here.
  3461  				if ctxt.Arch.Family == sys.AMD64 {
  3462  					bytereg(&p.From, &p.Ft)
  3463  					bytereg(&p.To, &p.Tt)
  3464  				}
  3465  
  3466  			case P32: /* 32 bit but illegal if 64-bit mode */
  3467  				if ctxt.Arch.Family == sys.AMD64 {
  3468  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3469  				}
  3470  
  3471  			case Py: /* 64-bit only, no prefix */
  3472  				if ctxt.Arch.Family != sys.AMD64 {
  3473  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3474  				}
  3475  
  3476  			case Py1: /* 64-bit only if z < 1, no prefix */
  3477  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3478  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3479  				}
  3480  
  3481  			case Py3: /* 64-bit only if z < 3, no prefix */
  3482  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3483  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3484  				}
  3485  			}
  3486  
  3487  			if z >= len(o.op) {
  3488  				log.Fatalf("asmins bad table %v", p)
  3489  			}
  3490  			op = int(o.op[z])
  3491  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3492  			if op == 0x0f && o.prefix != Pvex {
  3493  				asmbuf.Put1(byte(op))
  3494  				z++
  3495  				op = int(o.op[z])
  3496  			}
  3497  
  3498  			switch yt.zcase {
  3499  			default:
  3500  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3501  				return
  3502  
  3503  			case Zpseudo:
  3504  				break
  3505  
  3506  			case Zlit:
  3507  				for ; ; z++ {
  3508  					op = int(o.op[z])
  3509  					if op == 0 {
  3510  						break
  3511  					}
  3512  					asmbuf.Put1(byte(op))
  3513  				}
  3514  
  3515  			case Zlitm_r:
  3516  				for ; ; z++ {
  3517  					op = int(o.op[z])
  3518  					if op == 0 {
  3519  						break
  3520  					}
  3521  					asmbuf.Put1(byte(op))
  3522  				}
  3523  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3524  
  3525  			case Zmb_r:
  3526  				bytereg(&p.From, &p.Ft)
  3527  				fallthrough
  3528  
  3529  			case Zm_r:
  3530  				asmbuf.Put1(byte(op))
  3531  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3532  
  3533  			case Zm2_r:
  3534  				asmbuf.Put2(byte(op), o.op[z+1])
  3535  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3536  
  3537  			case Zm_r_xm:
  3538  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3539  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3540  
  3541  			case Zm_r_xm_nr:
  3542  				asmbuf.rexflag = 0
  3543  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3544  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3545  
  3546  			case Zm_r_i_xm:
  3547  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3548  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.From3)
  3549  				asmbuf.Put1(byte(p.To.Offset))
  3550  
  3551  			case Zibm_r, Zibr_m:
  3552  				for {
  3553  					tmp1 := z
  3554  					z++
  3555  					op = int(o.op[tmp1])
  3556  					if op == 0 {
  3557  						break
  3558  					}
  3559  					asmbuf.Put1(byte(op))
  3560  				}
  3561  				if yt.zcase == Zibr_m {
  3562  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.From3)
  3563  				} else {
  3564  					asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3565  				}
  3566  				asmbuf.Put1(byte(p.From.Offset))
  3567  
  3568  			case Zaut_r:
  3569  				asmbuf.Put1(0x8d) // leal
  3570  				if p.From.Type != obj.TYPE_ADDR {
  3571  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3572  				}
  3573  				p.From.Type = obj.TYPE_MEM
  3574  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3575  				p.From.Type = obj.TYPE_ADDR
  3576  
  3577  			case Zm_o:
  3578  				asmbuf.Put1(byte(op))
  3579  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3580  
  3581  			case Zr_m:
  3582  				asmbuf.Put1(byte(op))
  3583  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3584  
  3585  			case Zvex_rm_v_r:
  3586  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3587  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3588  
  3589  			case Zvex_i_r_v:
  3590  				asmbuf.asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3591  				regnum := byte(0x7)
  3592  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3593  					regnum &= byte(p.From3.Reg - REG_X0)
  3594  				} else {
  3595  					regnum &= byte(p.From3.Reg - REG_Y0)
  3596  				}
  3597  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3598  				asmbuf.Put1(byte(p.From.Offset))
  3599  
  3600  			case Zvex_i_rm_v_r:
  3601  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3602  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3603  				asmbuf.Put1(byte(p.From3.Offset))
  3604  
  3605  			case Zvex_i_rm_r:
  3606  				asmbuf.asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3607  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3608  				asmbuf.Put1(byte(p.From.Offset))
  3609  
  3610  			case Zvex_v_rm_r:
  3611  				asmbuf.asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3612  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3613  
  3614  			case Zvex_r_v_rm:
  3615  				asmbuf.asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3616  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3617  
  3618  			case Zr_m_xm:
  3619  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3620  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3621  
  3622  			case Zr_m_xm_nr:
  3623  				asmbuf.rexflag = 0
  3624  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3625  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3626  
  3627  			case Zo_m:
  3628  				asmbuf.Put1(byte(op))
  3629  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3630  
  3631  			case Zcallindreg:
  3632  				r = obj.Addrel(cursym)
  3633  				r.Off = int32(p.Pc)
  3634  				r.Type = objabi.R_CALLIND
  3635  				r.Siz = 0
  3636  				fallthrough
  3637  
  3638  			case Zo_m64:
  3639  				asmbuf.Put1(byte(op))
  3640  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3641  
  3642  			case Zm_ibo:
  3643  				asmbuf.Put1(byte(op))
  3644  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3645  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3646  
  3647  			case Zibo_m:
  3648  				asmbuf.Put1(byte(op))
  3649  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3650  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3651  
  3652  			case Zibo_m_xm:
  3653  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3654  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3655  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3656  
  3657  			case Z_ib, Zib_:
  3658  				if yt.zcase == Zib_ {
  3659  					a = &p.From
  3660  				} else {
  3661  					a = &p.To
  3662  				}
  3663  				asmbuf.Put1(byte(op))
  3664  				if p.As == AXABORT {
  3665  					asmbuf.Put1(o.op[z+1])
  3666  				}
  3667  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3668  
  3669  			case Zib_rp:
  3670  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3671  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3672  
  3673  			case Zil_rp:
  3674  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3675  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3676  				if o.prefix == Pe {
  3677  					v = vaddr(ctxt, p, &p.From, nil)
  3678  					asmbuf.PutInt16(int16(v))
  3679  				} else {
  3680  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3681  				}
  3682  
  3683  			case Zo_iw:
  3684  				asmbuf.Put1(byte(op))
  3685  				if p.From.Type != obj.TYPE_NONE {
  3686  					v = vaddr(ctxt, p, &p.From, nil)
  3687  					asmbuf.PutInt16(int16(v))
  3688  				}
  3689  
  3690  			case Ziq_rp:
  3691  				v = vaddr(ctxt, p, &p.From, &rel)
  3692  				l = int(v >> 32)
  3693  				if l == 0 && rel.Siz != 8 {
  3694  					//p->mark |= 0100;
  3695  					//print("zero: %llux %v\n", v, p);
  3696  					asmbuf.rexflag &^= (0x40 | Rxw)
  3697  
  3698  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3699  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3700  					if rel.Type != 0 {
  3701  						r = obj.Addrel(cursym)
  3702  						*r = rel
  3703  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3704  					}
  3705  
  3706  					asmbuf.PutInt32(int32(v))
  3707  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3708  
  3709  					//p->mark |= 0100;
  3710  					//print("sign: %llux %v\n", v, p);
  3711  					asmbuf.Put1(0xc7)
  3712  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3713  
  3714  					asmbuf.PutInt32(int32(v)) // need all 8
  3715  				} else {
  3716  					//print("all: %llux %v\n", v, p);
  3717  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3718  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3719  					if rel.Type != 0 {
  3720  						r = obj.Addrel(cursym)
  3721  						*r = rel
  3722  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3723  					}
  3724  
  3725  					asmbuf.PutInt64(v)
  3726  				}
  3727  
  3728  			case Zib_rr:
  3729  				asmbuf.Put1(byte(op))
  3730  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3731  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3732  
  3733  			case Z_il, Zil_:
  3734  				if yt.zcase == Zil_ {
  3735  					a = &p.From
  3736  				} else {
  3737  					a = &p.To
  3738  				}
  3739  				asmbuf.Put1(byte(op))
  3740  				if o.prefix == Pe {
  3741  					v = vaddr(ctxt, p, a, nil)
  3742  					asmbuf.PutInt16(int16(v))
  3743  				} else {
  3744  					asmbuf.relput4(ctxt, cursym, p, a)
  3745  				}
  3746  
  3747  			case Zm_ilo, Zilo_m:
  3748  				asmbuf.Put1(byte(op))
  3749  				if yt.zcase == Zilo_m {
  3750  					a = &p.From
  3751  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3752  				} else {
  3753  					a = &p.To
  3754  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3755  				}
  3756  
  3757  				if o.prefix == Pe {
  3758  					v = vaddr(ctxt, p, a, nil)
  3759  					asmbuf.PutInt16(int16(v))
  3760  				} else {
  3761  					asmbuf.relput4(ctxt, cursym, p, a)
  3762  				}
  3763  
  3764  			case Zil_rr:
  3765  				asmbuf.Put1(byte(op))
  3766  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3767  				if o.prefix == Pe {
  3768  					v = vaddr(ctxt, p, &p.From, nil)
  3769  					asmbuf.PutInt16(int16(v))
  3770  				} else {
  3771  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3772  				}
  3773  
  3774  			case Z_rp:
  3775  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3776  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3777  
  3778  			case Zrp_:
  3779  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3780  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3781  
  3782  			case Zclr:
  3783  				asmbuf.rexflag &^= Pw
  3784  				asmbuf.Put1(byte(op))
  3785  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3786  
  3787  			case Zcallcon, Zjmpcon:
  3788  				if yt.zcase == Zcallcon {
  3789  					asmbuf.Put1(byte(op))
  3790  				} else {
  3791  					asmbuf.Put1(o.op[z+1])
  3792  				}
  3793  				r = obj.Addrel(cursym)
  3794  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3795  				r.Type = objabi.R_PCREL
  3796  				r.Siz = 4
  3797  				r.Add = p.To.Offset
  3798  				asmbuf.PutInt32(0)
  3799  
  3800  			case Zcallind:
  3801  				asmbuf.Put2(byte(op), o.op[z+1])
  3802  				r = obj.Addrel(cursym)
  3803  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3804  				if ctxt.Arch.Family == sys.AMD64 {
  3805  					r.Type = objabi.R_PCREL
  3806  				} else {
  3807  					r.Type = objabi.R_ADDR
  3808  				}
  3809  				r.Siz = 4
  3810  				r.Add = p.To.Offset
  3811  				r.Sym = p.To.Sym
  3812  				asmbuf.PutInt32(0)
  3813  
  3814  			case Zcall, Zcallduff:
  3815  				if p.To.Sym == nil {
  3816  					ctxt.Diag("call without target")
  3817  					log.Fatalf("bad code")
  3818  				}
  3819  
  3820  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3821  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3822  				}
  3823  
  3824  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3825  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3826  					// (the call jumps into the middle of the function).
  3827  					// This makes it possible to see call sites for duffcopy/duffzero in
  3828  					// BP-based profiling tools like Linux perf (which is the
  3829  					// whole point of obj.Framepointer_enabled).
  3830  					// MOVQ BP, -16(SP)
  3831  					// LEAQ -16(SP), BP
  3832  					asmbuf.Put(bpduff1)
  3833  				}
  3834  				asmbuf.Put1(byte(op))
  3835  				r = obj.Addrel(cursym)
  3836  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3837  				r.Sym = p.To.Sym
  3838  				r.Add = p.To.Offset
  3839  				r.Type = objabi.R_CALL
  3840  				r.Siz = 4
  3841  				asmbuf.PutInt32(0)
  3842  
  3843  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3844  					// Pop BP pushed above.
  3845  					// MOVQ 0(BP), BP
  3846  					asmbuf.Put(bpduff2)
  3847  				}
  3848  
  3849  			// TODO: jump across functions needs reloc
  3850  			case Zbr, Zjmp, Zloop:
  3851  				if p.As == AXBEGIN {
  3852  					asmbuf.Put1(byte(op))
  3853  				}
  3854  				if p.To.Sym != nil {
  3855  					if yt.zcase != Zjmp {
  3856  						ctxt.Diag("branch to ATEXT")
  3857  						log.Fatalf("bad code")
  3858  					}
  3859  
  3860  					asmbuf.Put1(o.op[z+1])
  3861  					r = obj.Addrel(cursym)
  3862  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3863  					r.Sym = p.To.Sym
  3864  					r.Type = objabi.R_PCREL
  3865  					r.Siz = 4
  3866  					asmbuf.PutInt32(0)
  3867  					break
  3868  				}
  3869  
  3870  				// Assumes q is in this function.
  3871  				// TODO: Check in input, preserve in brchain.
  3872  
  3873  				// Fill in backward jump now.
  3874  				q = p.Pcond
  3875  
  3876  				if q == nil {
  3877  					ctxt.Diag("jmp/branch/loop without target")
  3878  					log.Fatalf("bad code")
  3879  				}
  3880  
  3881  				if p.Back&1 != 0 {
  3882  					v = q.Pc - (p.Pc + 2)
  3883  					if v >= -128 && p.As != AXBEGIN {
  3884  						if p.As == AJCXZL {
  3885  							asmbuf.Put1(0x67)
  3886  						}
  3887  						asmbuf.Put2(byte(op), byte(v))
  3888  					} else if yt.zcase == Zloop {
  3889  						ctxt.Diag("loop too far: %v", p)
  3890  					} else {
  3891  						v -= 5 - 2
  3892  						if p.As == AXBEGIN {
  3893  							v--
  3894  						}
  3895  						if yt.zcase == Zbr {
  3896  							asmbuf.Put1(0x0f)
  3897  							v--
  3898  						}
  3899  
  3900  						asmbuf.Put1(o.op[z+1])
  3901  						asmbuf.PutInt32(int32(v))
  3902  					}
  3903  
  3904  					break
  3905  				}
  3906  
  3907  				// Annotate target; will fill in later.
  3908  				p.Forwd = q.Rel
  3909  
  3910  				q.Rel = p
  3911  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3912  					if p.As == AJCXZL {
  3913  						asmbuf.Put1(0x67)
  3914  					}
  3915  					asmbuf.Put2(byte(op), 0)
  3916  				} else if yt.zcase == Zloop {
  3917  					ctxt.Diag("loop too far: %v", p)
  3918  				} else {
  3919  					if yt.zcase == Zbr {
  3920  						asmbuf.Put1(0x0f)
  3921  					}
  3922  					asmbuf.Put1(o.op[z+1])
  3923  					asmbuf.PutInt32(0)
  3924  				}
  3925  
  3926  				break
  3927  
  3928  			/*
  3929  				v = q->pc - p->pc - 2;
  3930  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3931  					*ctxt->andptr++ = op;
  3932  					*ctxt->andptr++ = v;
  3933  				} else {
  3934  					v -= 5-2;
  3935  					if(yt.zcase == Zbr) {
  3936  						*ctxt->andptr++ = 0x0f;
  3937  						v--;
  3938  					}
  3939  					*ctxt->andptr++ = o->op[z+1];
  3940  					*ctxt->andptr++ = v;
  3941  					*ctxt->andptr++ = v>>8;
  3942  					*ctxt->andptr++ = v>>16;
  3943  					*ctxt->andptr++ = v>>24;
  3944  				}
  3945  			*/
  3946  
  3947  			case Zbyte:
  3948  				v = vaddr(ctxt, p, &p.From, &rel)
  3949  				if rel.Siz != 0 {
  3950  					rel.Siz = uint8(op)
  3951  					r = obj.Addrel(cursym)
  3952  					*r = rel
  3953  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3954  				}
  3955  
  3956  				asmbuf.Put1(byte(v))
  3957  				if op > 1 {
  3958  					asmbuf.Put1(byte(v >> 8))
  3959  					if op > 2 {
  3960  						asmbuf.PutInt16(int16(v >> 16))
  3961  						if op > 4 {
  3962  							asmbuf.PutInt32(int32(v >> 32))
  3963  						}
  3964  					}
  3965  				}
  3966  			}
  3967  
  3968  			return
  3969  		}
  3970  		z += int(yt.zoffset) + xo
  3971  	}
  3972  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3973  		var pp obj.Prog
  3974  		var t []byte
  3975  		if p.As == mo[0].as {
  3976  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3977  				t = mo[0].op[:]
  3978  				switch mo[0].code {
  3979  				default:
  3980  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3981  
  3982  				case 0: /* lit */
  3983  					for z = 0; t[z] != E; z++ {
  3984  						asmbuf.Put1(t[z])
  3985  					}
  3986  
  3987  				case 1: /* r,m */
  3988  					asmbuf.Put1(t[0])
  3989  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  3990  
  3991  				case 2: /* m,r */
  3992  					asmbuf.Put1(t[0])
  3993  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  3994  
  3995  				case 3: /* r,m - 2op */
  3996  					asmbuf.Put2(t[0], t[1])
  3997  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  3998  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3999  
  4000  				case 4: /* m,r - 2op */
  4001  					asmbuf.Put2(t[0], t[1])
  4002  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4003  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4004  
  4005  				case 5: /* load full pointer, trash heap */
  4006  					if t[0] != 0 {
  4007  						asmbuf.Put1(t[0])
  4008  					}
  4009  					switch p.To.Index {
  4010  					default:
  4011  						goto bad
  4012  
  4013  					case REG_DS:
  4014  						asmbuf.Put1(0xc5)
  4015  
  4016  					case REG_SS:
  4017  						asmbuf.Put2(0x0f, 0xb2)
  4018  
  4019  					case REG_ES:
  4020  						asmbuf.Put1(0xc4)
  4021  
  4022  					case REG_FS:
  4023  						asmbuf.Put2(0x0f, 0xb4)
  4024  
  4025  					case REG_GS:
  4026  						asmbuf.Put2(0x0f, 0xb5)
  4027  					}
  4028  
  4029  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4030  
  4031  				case 6: /* double shift */
  4032  					if t[0] == Pw {
  4033  						if ctxt.Arch.Family != sys.AMD64 {
  4034  							ctxt.Diag("asmins: illegal 64: %v", p)
  4035  						}
  4036  						asmbuf.rexflag |= Pw
  4037  						t = t[1:]
  4038  					} else if t[0] == Pe {
  4039  						asmbuf.Put1(Pe)
  4040  						t = t[1:]
  4041  					}
  4042  
  4043  					switch p.From.Type {
  4044  					default:
  4045  						goto bad
  4046  
  4047  					case obj.TYPE_CONST:
  4048  						asmbuf.Put2(0x0f, t[0])
  4049  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4050  						asmbuf.Put1(byte(p.From.Offset))
  4051  
  4052  					case obj.TYPE_REG:
  4053  						switch p.From.Reg {
  4054  						default:
  4055  							goto bad
  4056  
  4057  						case REG_CL, REG_CX:
  4058  							asmbuf.Put2(0x0f, t[1])
  4059  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4060  						}
  4061  					}
  4062  
  4063  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4064  				// where you load the TLS base register into a register and then index off that
  4065  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4066  				// are handled in prefixof above and should not be listed here.
  4067  				case 7: /* mov tls, r */
  4068  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4069  						ctxt.Diag("invalid load of TLS: %v", p)
  4070  					}
  4071  
  4072  					if ctxt.Arch.Family == sys.I386 {
  4073  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4074  						// where you load the TLS base register into a register and then index off that
  4075  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4076  						// are handled in prefixof above and should not be listed here.
  4077  						switch ctxt.Headtype {
  4078  						default:
  4079  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4080  
  4081  						case objabi.Hlinux,
  4082  							objabi.Hnacl:
  4083  							if ctxt.Flag_shared {
  4084  								// Note that this is not generating the same insns as the other cases.
  4085  								//     MOV TLS, dst
  4086  								// becomes
  4087  								//     call __x86.get_pc_thunk.dst
  4088  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4089  								// which is encoded as
  4090  								//     call __x86.get_pc_thunk.dst
  4091  								//     movq 0(dst), dst
  4092  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4093  								// is g, which we can't check here, but will when we assemble the second
  4094  								// instruction.
  4095  								dst := p.To.Reg
  4096  								asmbuf.Put1(0xe8)
  4097  								r = obj.Addrel(cursym)
  4098  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4099  								r.Type = objabi.R_CALL
  4100  								r.Siz = 4
  4101  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4102  								asmbuf.PutInt32(0)
  4103  
  4104  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4105  								r = obj.Addrel(cursym)
  4106  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4107  								r.Type = objabi.R_TLS_IE
  4108  								r.Siz = 4
  4109  								r.Add = 2
  4110  								asmbuf.PutInt32(0)
  4111  							} else {
  4112  								// ELF TLS base is 0(GS).
  4113  								pp.From = p.From
  4114  
  4115  								pp.From.Type = obj.TYPE_MEM
  4116  								pp.From.Reg = REG_GS
  4117  								pp.From.Offset = 0
  4118  								pp.From.Index = REG_NONE
  4119  								pp.From.Scale = 0
  4120  								asmbuf.Put2(0x65, // GS
  4121  									0x8B)
  4122  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4123  							}
  4124  						case objabi.Hplan9:
  4125  							pp.From = obj.Addr{}
  4126  							pp.From.Type = obj.TYPE_MEM
  4127  							pp.From.Name = obj.NAME_EXTERN
  4128  							pp.From.Sym = plan9privates
  4129  							pp.From.Offset = 0
  4130  							pp.From.Index = REG_NONE
  4131  							asmbuf.Put1(0x8B)
  4132  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4133  
  4134  						case objabi.Hwindows:
  4135  							// Windows TLS base is always 0x14(FS).
  4136  							pp.From = p.From
  4137  
  4138  							pp.From.Type = obj.TYPE_MEM
  4139  							pp.From.Reg = REG_FS
  4140  							pp.From.Offset = 0x14
  4141  							pp.From.Index = REG_NONE
  4142  							pp.From.Scale = 0
  4143  							asmbuf.Put2(0x64, // FS
  4144  								0x8B)
  4145  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4146  						}
  4147  						break
  4148  					}
  4149  
  4150  					switch ctxt.Headtype {
  4151  					default:
  4152  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4153  
  4154  					case objabi.Hlinux:
  4155  						if !ctxt.Flag_shared {
  4156  							log.Fatalf("unknown TLS base location for linux without -shared")
  4157  						}
  4158  						// Note that this is not generating the same insn as the other cases.
  4159  						//     MOV TLS, R_to
  4160  						// becomes
  4161  						//     movq g@gottpoff(%rip), R_to
  4162  						// which is encoded as
  4163  						//     movq 0(%rip), R_to
  4164  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4165  						// is g, which we can't check here, but will when we assemble the second
  4166  						// instruction.
  4167  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4168  
  4169  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4170  						r = obj.Addrel(cursym)
  4171  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4172  						r.Type = objabi.R_TLS_IE
  4173  						r.Siz = 4
  4174  						r.Add = -4
  4175  						asmbuf.PutInt32(0)
  4176  
  4177  					case objabi.Hplan9:
  4178  						pp.From = obj.Addr{}
  4179  						pp.From.Type = obj.TYPE_MEM
  4180  						pp.From.Name = obj.NAME_EXTERN
  4181  						pp.From.Sym = plan9privates
  4182  						pp.From.Offset = 0
  4183  						pp.From.Index = REG_NONE
  4184  						asmbuf.rexflag |= Pw
  4185  						asmbuf.Put1(0x8B)
  4186  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4187  
  4188  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4189  						// TLS base is 0(FS).
  4190  						pp.From = p.From
  4191  
  4192  						pp.From.Type = obj.TYPE_MEM
  4193  						pp.From.Name = obj.NAME_NONE
  4194  						pp.From.Reg = REG_NONE
  4195  						pp.From.Offset = 0
  4196  						pp.From.Index = REG_NONE
  4197  						pp.From.Scale = 0
  4198  						asmbuf.rexflag |= Pw
  4199  						asmbuf.Put2(0x64, // FS
  4200  							0x8B)
  4201  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4202  
  4203  					case objabi.Hwindows:
  4204  						// Windows TLS base is always 0x28(GS).
  4205  						pp.From = p.From
  4206  
  4207  						pp.From.Type = obj.TYPE_MEM
  4208  						pp.From.Name = obj.NAME_NONE
  4209  						pp.From.Reg = REG_GS
  4210  						pp.From.Offset = 0x28
  4211  						pp.From.Index = REG_NONE
  4212  						pp.From.Scale = 0
  4213  						asmbuf.rexflag |= Pw
  4214  						asmbuf.Put2(0x65, // GS
  4215  							0x8B)
  4216  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4217  					}
  4218  				}
  4219  				return
  4220  			}
  4221  		}
  4222  	}
  4223  	goto bad
  4224  
  4225  bad:
  4226  	if ctxt.Arch.Family != sys.AMD64 {
  4227  		/*
  4228  		 * here, the assembly has failed.
  4229  		 * if its a byte instruction that has
  4230  		 * unaddressable registers, try to
  4231  		 * exchange registers and reissue the
  4232  		 * instruction with the operands renamed.
  4233  		 */
  4234  		pp := *p
  4235  
  4236  		unbytereg(&pp.From, &pp.Ft)
  4237  		unbytereg(&pp.To, &pp.Tt)
  4238  
  4239  		z := int(p.From.Reg)
  4240  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4241  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4242  			// For now, different to keep bit-for-bit compatibility.
  4243  			if ctxt.Arch.Family == sys.I386 {
  4244  				breg := byteswapreg(ctxt, &p.To)
  4245  				if breg != REG_AX {
  4246  					asmbuf.Put1(0x87) // xchg lhs,bx
  4247  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4248  					subreg(&pp, z, breg)
  4249  					asmbuf.doasm(ctxt, cursym, &pp)
  4250  					asmbuf.Put1(0x87) // xchg lhs,bx
  4251  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4252  				} else {
  4253  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4254  					subreg(&pp, z, REG_AX)
  4255  					asmbuf.doasm(ctxt, cursym, &pp)
  4256  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4257  				}
  4258  				return
  4259  			}
  4260  
  4261  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4262  				// We certainly don't want to exchange
  4263  				// with AX if the op is MUL or DIV.
  4264  				asmbuf.Put1(0x87) // xchg lhs,bx
  4265  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4266  				subreg(&pp, z, REG_BX)
  4267  				asmbuf.doasm(ctxt, cursym, &pp)
  4268  				asmbuf.Put1(0x87) // xchg lhs,bx
  4269  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4270  			} else {
  4271  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4272  				subreg(&pp, z, REG_AX)
  4273  				asmbuf.doasm(ctxt, cursym, &pp)
  4274  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4275  			}
  4276  			return
  4277  		}
  4278  
  4279  		z = int(p.To.Reg)
  4280  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4281  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4282  			// For now, different to keep bit-for-bit compatibility.
  4283  			if ctxt.Arch.Family == sys.I386 {
  4284  				breg := byteswapreg(ctxt, &p.From)
  4285  				if breg != REG_AX {
  4286  					asmbuf.Put1(0x87) //xchg rhs,bx
  4287  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4288  					subreg(&pp, z, breg)
  4289  					asmbuf.doasm(ctxt, cursym, &pp)
  4290  					asmbuf.Put1(0x87) // xchg rhs,bx
  4291  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4292  				} else {
  4293  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4294  					subreg(&pp, z, REG_AX)
  4295  					asmbuf.doasm(ctxt, cursym, &pp)
  4296  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4297  				}
  4298  				return
  4299  			}
  4300  
  4301  			if isax(&p.From) {
  4302  				asmbuf.Put1(0x87) // xchg rhs,bx
  4303  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4304  				subreg(&pp, z, REG_BX)
  4305  				asmbuf.doasm(ctxt, cursym, &pp)
  4306  				asmbuf.Put1(0x87) // xchg rhs,bx
  4307  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4308  			} else {
  4309  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4310  				subreg(&pp, z, REG_AX)
  4311  				asmbuf.doasm(ctxt, cursym, &pp)
  4312  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4313  			}
  4314  			return
  4315  		}
  4316  	}
  4317  
  4318  	ctxt.Diag("invalid instruction: %v", p)
  4319  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4320  	return
  4321  }
  4322  
  4323  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4324  // which is not referenced in a.
  4325  // If a is empty, it returns BX to account for MULB-like instructions
  4326  // that might use DX and AX.
  4327  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4328  	cana, canb, canc, cand := true, true, true, true
  4329  	if a.Type == obj.TYPE_NONE {
  4330  		cana, cand = false, false
  4331  	}
  4332  
  4333  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4334  		switch a.Reg {
  4335  		case REG_NONE:
  4336  			cana, cand = false, false
  4337  		case REG_AX, REG_AL, REG_AH:
  4338  			cana = false
  4339  		case REG_BX, REG_BL, REG_BH:
  4340  			canb = false
  4341  		case REG_CX, REG_CL, REG_CH:
  4342  			canc = false
  4343  		case REG_DX, REG_DL, REG_DH:
  4344  			cand = false
  4345  		}
  4346  	}
  4347  
  4348  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4349  		switch a.Index {
  4350  		case REG_AX:
  4351  			cana = false
  4352  		case REG_BX:
  4353  			canb = false
  4354  		case REG_CX:
  4355  			canc = false
  4356  		case REG_DX:
  4357  			cand = false
  4358  		}
  4359  	}
  4360  
  4361  	switch {
  4362  	case cana:
  4363  		return REG_AX
  4364  	case canb:
  4365  		return REG_BX
  4366  	case canc:
  4367  		return REG_CX
  4368  	case cand:
  4369  		return REG_DX
  4370  	default:
  4371  		ctxt.Diag("impossible byte register")
  4372  		log.Fatalf("bad code")
  4373  		return 0
  4374  	}
  4375  }
  4376  
  4377  func isbadbyte(a *obj.Addr) bool {
  4378  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4379  }
  4380  
  4381  var naclret = []uint8{
  4382  	0x5e, // POPL SI
  4383  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4384  	0x83,
  4385  	0xe6,
  4386  	0xe0, // ANDL $~31, SI
  4387  	0x4c,
  4388  	0x01,
  4389  	0xfe, // ADDQ R15, SI
  4390  	0xff,
  4391  	0xe6, // JMP SI
  4392  }
  4393  
  4394  var naclret8 = []uint8{
  4395  	0x5d, // POPL BP
  4396  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4397  	0x83,
  4398  	0xe5,
  4399  	0xe0, // ANDL $~31, BP
  4400  	0xff,
  4401  	0xe5, // JMP BP
  4402  }
  4403  
  4404  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4405  
  4406  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4407  
  4408  var naclmovs = []uint8{
  4409  	0x89,
  4410  	0xf6, // MOVL SI, SI
  4411  	0x49,
  4412  	0x8d,
  4413  	0x34,
  4414  	0x37, // LEAQ (R15)(SI*1), SI
  4415  	0x89,
  4416  	0xff, // MOVL DI, DI
  4417  	0x49,
  4418  	0x8d,
  4419  	0x3c,
  4420  	0x3f, // LEAQ (R15)(DI*1), DI
  4421  }
  4422  
  4423  var naclstos = []uint8{
  4424  	0x89,
  4425  	0xff, // MOVL DI, DI
  4426  	0x49,
  4427  	0x8d,
  4428  	0x3c,
  4429  	0x3f, // LEAQ (R15)(DI*1), DI
  4430  }
  4431  
  4432  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4433  	if reg >= REG_R8 {
  4434  		asmbuf.Put1(0x45)
  4435  	}
  4436  	reg = (reg - REG_AX) & 7
  4437  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4438  }
  4439  
  4440  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4441  	asmbuf.Reset()
  4442  
  4443  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4444  		switch p.As {
  4445  		case obj.ARET:
  4446  			asmbuf.Put(naclret8)
  4447  			return
  4448  
  4449  		case obj.ACALL,
  4450  			obj.AJMP:
  4451  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4452  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4453  			}
  4454  
  4455  		case AINT:
  4456  			asmbuf.Put1(0xf4)
  4457  			return
  4458  		}
  4459  	}
  4460  
  4461  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4462  		if p.As == AREP {
  4463  			asmbuf.rep++
  4464  			return
  4465  		}
  4466  
  4467  		if p.As == AREPN {
  4468  			asmbuf.repn++
  4469  			return
  4470  		}
  4471  
  4472  		if p.As == ALOCK {
  4473  			asmbuf.lock = true
  4474  			return
  4475  		}
  4476  
  4477  		if p.As != ALEAQ && p.As != ALEAL {
  4478  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4479  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4480  			}
  4481  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4482  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4483  			}
  4484  		}
  4485  
  4486  		switch p.As {
  4487  		case obj.ARET:
  4488  			asmbuf.Put(naclret)
  4489  			return
  4490  
  4491  		case obj.ACALL,
  4492  			obj.AJMP:
  4493  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4494  				// ANDL $~31, reg
  4495  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4496  				// ADDQ R15, reg
  4497  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4498  			}
  4499  
  4500  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4501  				// ANDL $~31, reg
  4502  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4503  				// ADDQ R15, reg
  4504  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4505  			}
  4506  
  4507  		case AINT:
  4508  			asmbuf.Put1(0xf4)
  4509  			return
  4510  
  4511  		case ASCASB,
  4512  			ASCASW,
  4513  			ASCASL,
  4514  			ASCASQ,
  4515  			ASTOSB,
  4516  			ASTOSW,
  4517  			ASTOSL,
  4518  			ASTOSQ:
  4519  			asmbuf.Put(naclstos)
  4520  
  4521  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4522  			asmbuf.Put(naclmovs)
  4523  		}
  4524  
  4525  		if asmbuf.rep != 0 {
  4526  			asmbuf.Put1(0xf3)
  4527  			asmbuf.rep = 0
  4528  		}
  4529  
  4530  		if asmbuf.repn != 0 {
  4531  			asmbuf.Put1(0xf2)
  4532  			asmbuf.repn = 0
  4533  		}
  4534  
  4535  		if asmbuf.lock {
  4536  			asmbuf.Put1(0xf0)
  4537  			asmbuf.lock = false
  4538  		}
  4539  	}
  4540  
  4541  	asmbuf.rexflag = 0
  4542  	asmbuf.vexflag = 0
  4543  	mark := asmbuf.Len()
  4544  	asmbuf.doasm(ctxt, cursym, p)
  4545  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4546  		/*
  4547  		 * as befits the whole approach of the architecture,
  4548  		 * the rex prefix must appear before the first opcode byte
  4549  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4550  		 * before the 0f opcode escape!), or it might be ignored.
  4551  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4552  		 */
  4553  		if ctxt.Arch.Family != sys.AMD64 {
  4554  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4555  		}
  4556  		n := asmbuf.Len()
  4557  		var np int
  4558  		for np = mark; np < n; np++ {
  4559  			c := asmbuf.At(np)
  4560  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4561  				break
  4562  			}
  4563  		}
  4564  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4565  	}
  4566  
  4567  	n := asmbuf.Len()
  4568  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4569  		r := &cursym.R[i]
  4570  		if int64(r.Off) < p.Pc {
  4571  			break
  4572  		}
  4573  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4574  			r.Off++
  4575  		}
  4576  		if r.Type == objabi.R_PCREL {
  4577  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4578  				// PC-relative addressing is relative to the end of the instruction,
  4579  				// but the relocations applied by the linker are relative to the end
  4580  				// of the relocation. Because immediate instruction
  4581  				// arguments can follow the PC-relative memory reference in the
  4582  				// instruction encoding, the two may not coincide. In this case,
  4583  				// adjust addend so that linker can keep relocating relative to the
  4584  				// end of the relocation.
  4585  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4586  			} else if ctxt.Arch.Family == sys.I386 {
  4587  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4588  				// assumes that the previous instruction loaded the PC of the end
  4589  				// of that instruction into CX, so the adjustment is relative to
  4590  				// that.
  4591  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4592  			}
  4593  		}
  4594  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4595  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4596  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4597  		}
  4598  
  4599  	}
  4600  
  4601  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4602  		switch p.To.Reg {
  4603  		case REG_SP:
  4604  			asmbuf.Put(naclspfix)
  4605  		case REG_BP:
  4606  			asmbuf.Put(naclbpfix)
  4607  		}
  4608  	}
  4609  }