github.com/karrick/go@v0.0.0-20170817181416-d5b0ec858b37/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type ytab struct {
    76  	from    uint8
    77  	from3   uint8
    78  	to      uint8
    79  	zcase   uint8
    80  	zoffset uint8
    81  }
    82  
    83  type Movtab struct {
    84  	as   obj.As
    85  	ft   uint8
    86  	f3t  uint8
    87  	tt   uint8
    88  	code uint8
    89  	op   [4]uint8
    90  }
    91  
    92  const (
    93  	Yxxx = iota
    94  	Ynone
    95  	Yi0 // $0
    96  	Yi1 // $1
    97  	Yi8 // $x, x fits in int8
    98  	Yu8 // $x, x fits in uint8
    99  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   100  	Ys32
   101  	Yi32
   102  	Yi64
   103  	Yiauto
   104  	Yal
   105  	Ycl
   106  	Yax
   107  	Ycx
   108  	Yrb
   109  	Yrl
   110  	Yrl32 // Yrl on 32-bit system
   111  	Yrf
   112  	Yf0
   113  	Yrx
   114  	Ymb
   115  	Yml
   116  	Ym
   117  	Ybr
   118  	Ycs
   119  	Yss
   120  	Yds
   121  	Yes
   122  	Yfs
   123  	Ygs
   124  	Ygdtr
   125  	Yidtr
   126  	Yldtr
   127  	Ymsw
   128  	Ytask
   129  	Ycr0
   130  	Ycr1
   131  	Ycr2
   132  	Ycr3
   133  	Ycr4
   134  	Ycr5
   135  	Ycr6
   136  	Ycr7
   137  	Ycr8
   138  	Ydr0
   139  	Ydr1
   140  	Ydr2
   141  	Ydr3
   142  	Ydr4
   143  	Ydr5
   144  	Ydr6
   145  	Ydr7
   146  	Ytr0
   147  	Ytr1
   148  	Ytr2
   149  	Ytr3
   150  	Ytr4
   151  	Ytr5
   152  	Ytr6
   153  	Ytr7
   154  	Ymr
   155  	Ymm
   156  	Yxr
   157  	Yxm
   158  	Yyr
   159  	Yym
   160  	Ytls
   161  	Ytextsize
   162  	Yindir
   163  	Ymax
   164  )
   165  
   166  const (
   167  	Zxxx = iota
   168  	Zlit
   169  	Zlitm_r
   170  	Z_rp
   171  	Zbr
   172  	Zcall
   173  	Zcallcon
   174  	Zcallduff
   175  	Zcallind
   176  	Zcallindreg
   177  	Zib_
   178  	Zib_rp
   179  	Zibo_m
   180  	Zibo_m_xm
   181  	Zil_
   182  	Zil_rp
   183  	Ziq_rp
   184  	Zilo_m
   185  	Zjmp
   186  	Zjmpcon
   187  	Zloop
   188  	Zo_iw
   189  	Zm_o
   190  	Zm_r
   191  	Zm2_r
   192  	Zm_r_xm
   193  	Zm_r_i_xm
   194  	Zm_r_xm_nr
   195  	Zr_m_xm_nr
   196  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   197  	Zibr_m
   198  	Zmb_r
   199  	Zaut_r
   200  	Zo_m
   201  	Zo_m64
   202  	Zpseudo
   203  	Zr_m
   204  	Zr_m_xm
   205  	Zrp_
   206  	Z_ib
   207  	Z_il
   208  	Zm_ibo
   209  	Zm_ilo
   210  	Zib_rr
   211  	Zil_rr
   212  	Zclr
   213  	Zbyte
   214  	Zvex_rm_v_r
   215  	Zvex_r_v_rm
   216  	Zvex_v_rm_r
   217  	Zvex_i_rm_r
   218  	Zvex_i_r_v
   219  	Zvex_i_rm_v_r
   220  	Zmax
   221  )
   222  
   223  const (
   224  	Px   = 0
   225  	Px1  = 1    // symbolic; exact value doesn't matter
   226  	P32  = 0x32 /* 32-bit only */
   227  	Pe   = 0x66 /* operand escape */
   228  	Pm   = 0x0f /* 2byte opcode escape */
   229  	Pq   = 0xff /* both escapes: 66 0f */
   230  	Pb   = 0xfe /* byte operands */
   231  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   232  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   233  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   234  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   235  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   236  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   237  	Pw   = 0x48 /* Rex.w */
   238  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   239  	Py   = 0x80 /* defaults to 64-bit mode */
   240  	Py1  = 0x81 // symbolic; exact value doesn't matter
   241  	Py3  = 0x83 // symbolic; exact value doesn't matter
   242  	Pvex = 0x84 // symbolic: exact value doesn't matter
   243  
   244  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   245  	Rxr = 1 << 2 /* extend modrm reg */
   246  	Rxx = 1 << 1 /* extend sib index */
   247  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   248  )
   249  
   250  const (
   251  	// Encoding for VEX prefix in tables.
   252  	// The P, L, and W fields are chosen to match
   253  	// their eventual locations in the VEX prefix bytes.
   254  
   255  	// P field - 2 bits
   256  	vex66 = 1 << 0
   257  	vexF3 = 2 << 0
   258  	vexF2 = 3 << 0
   259  	// L field - 1 bit
   260  	vexLZ  = 0 << 2
   261  	vexLIG = 0 << 2
   262  	vex128 = 0 << 2
   263  	vex256 = 1 << 2
   264  	// W field - 1 bit
   265  	vexWIG = 0 << 7
   266  	vexW0  = 0 << 7
   267  	vexW1  = 1 << 7
   268  	// M field - 5 bits, but mostly reserved; we can store up to 4
   269  	vex0F   = 1 << 3
   270  	vex0F38 = 2 << 3
   271  	vex0F3A = 3 << 3
   272  
   273  	// Combinations used in the manual.
   274  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   275  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   276  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   277  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   278  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   279  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   280  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   281  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   282  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   283  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   284  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   285  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   286  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   287  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   288  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   289  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   290  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   291  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   292  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   293  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   294  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   295  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   296  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   297  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   298  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   299  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   300  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   301  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   302  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   303  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   304  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   305  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   306  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   307  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   308  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   309  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   310  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   311  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   312  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   313  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   314  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   315  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   316  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   317  )
   318  
   319  var ycover [Ymax * Ymax]uint8
   320  
   321  var reg [MAXREG]int
   322  
   323  var regrex [MAXREG + 1]int
   324  
   325  var ynone = []ytab{
   326  	{Ynone, Ynone, Ynone, Zlit, 1},
   327  }
   328  
   329  var ytext = []ytab{
   330  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   331  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   332  }
   333  
   334  var ynop = []ytab{
   335  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   336  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   337  	{Ynone, Ynone, Yml, Zpseudo, 0},
   338  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   339  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   340  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   341  	{Yml, Ynone, Ynone, Zpseudo, 0},
   342  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   343  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   344  }
   345  
   346  var yfuncdata = []ytab{
   347  	{Yi32, Ynone, Ym, Zpseudo, 0},
   348  }
   349  
   350  var ypcdata = []ytab{
   351  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   352  }
   353  
   354  var yxorb = []ytab{
   355  	{Yi32, Ynone, Yal, Zib_, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  	{Yrb, Ynone, Ymb, Zr_m, 1},
   358  	{Ymb, Ynone, Yrb, Zm_r, 1},
   359  }
   360  
   361  var yaddl = []ytab{
   362  	{Yi8, Ynone, Yml, Zibo_m, 2},
   363  	{Yi32, Ynone, Yax, Zil_, 1},
   364  	{Yi32, Ynone, Yml, Zilo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  	{Yml, Ynone, Yrl, Zm_r, 1},
   367  }
   368  
   369  var yincl = []ytab{
   370  	{Ynone, Ynone, Yrl, Z_rp, 1},
   371  	{Ynone, Ynone, Yml, Zo_m, 2},
   372  }
   373  
   374  var yincq = []ytab{
   375  	{Ynone, Ynone, Yml, Zo_m, 2},
   376  }
   377  
   378  var ycmpb = []ytab{
   379  	{Yal, Ynone, Yi32, Z_ib, 1},
   380  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   381  	{Ymb, Ynone, Yrb, Zm_r, 1},
   382  	{Yrb, Ynone, Ymb, Zr_m, 1},
   383  }
   384  
   385  var ycmpl = []ytab{
   386  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   387  	{Yax, Ynone, Yi32, Z_il, 1},
   388  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   389  	{Yml, Ynone, Yrl, Zm_r, 1},
   390  	{Yrl, Ynone, Yml, Zr_m, 1},
   391  }
   392  
   393  var yshb = []ytab{
   394  	{Yi1, Ynone, Ymb, Zo_m, 2},
   395  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   396  	{Ycx, Ynone, Ymb, Zo_m, 2},
   397  }
   398  
   399  var yshl = []ytab{
   400  	{Yi1, Ynone, Yml, Zo_m, 2},
   401  	{Yi32, Ynone, Yml, Zibo_m, 2},
   402  	{Ycl, Ynone, Yml, Zo_m, 2},
   403  	{Ycx, Ynone, Yml, Zo_m, 2},
   404  }
   405  
   406  var ytestl = []ytab{
   407  	{Yi32, Ynone, Yax, Zil_, 1},
   408  	{Yi32, Ynone, Yml, Zilo_m, 2},
   409  	{Yrl, Ynone, Yml, Zr_m, 1},
   410  	{Yml, Ynone, Yrl, Zm_r, 1},
   411  }
   412  
   413  var ymovb = []ytab{
   414  	{Yrb, Ynone, Ymb, Zr_m, 1},
   415  	{Ymb, Ynone, Yrb, Zm_r, 1},
   416  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   417  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   418  }
   419  
   420  var ybtl = []ytab{
   421  	{Yi8, Ynone, Yml, Zibo_m, 2},
   422  	{Yrl, Ynone, Yml, Zr_m, 1},
   423  }
   424  
   425  var ymovw = []ytab{
   426  	{Yrl, Ynone, Yml, Zr_m, 1},
   427  	{Yml, Ynone, Yrl, Zm_r, 1},
   428  	{Yi0, Ynone, Yrl, Zclr, 1},
   429  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   430  	{Yi32, Ynone, Yml, Zilo_m, 2},
   431  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   432  }
   433  
   434  var ymovl = []ytab{
   435  	{Yrl, Ynone, Yml, Zr_m, 1},
   436  	{Yml, Ynone, Yrl, Zm_r, 1},
   437  	{Yi0, Ynone, Yrl, Zclr, 1},
   438  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   439  	{Yi32, Ynone, Yml, Zilo_m, 2},
   440  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   441  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   442  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   443  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   444  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   445  }
   446  
   447  var yret = []ytab{
   448  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   449  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   450  }
   451  
   452  var ymovq = []ytab{
   453  	// valid in 32-bit mode
   454  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   455  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   456  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   457  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   458  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   459  
   460  	// valid only in 64-bit mode, usually with 64-bit prefix
   461  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   462  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   463  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   464  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   465  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   466  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   467  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   468  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   469  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   470  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   471  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   472  }
   473  
   474  var ym_rl = []ytab{
   475  	{Ym, Ynone, Yrl, Zm_r, 1},
   476  }
   477  
   478  var yrl_m = []ytab{
   479  	{Yrl, Ynone, Ym, Zr_m, 1},
   480  }
   481  
   482  var ymb_rl = []ytab{
   483  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   484  }
   485  
   486  var yml_rl = []ytab{
   487  	{Yml, Ynone, Yrl, Zm_r, 1},
   488  }
   489  
   490  var yrl_ml = []ytab{
   491  	{Yrl, Ynone, Yml, Zr_m, 1},
   492  }
   493  
   494  var yml_mb = []ytab{
   495  	{Yrb, Ynone, Ymb, Zr_m, 1},
   496  	{Ymb, Ynone, Yrb, Zm_r, 1},
   497  }
   498  
   499  var yrb_mb = []ytab{
   500  	{Yrb, Ynone, Ymb, Zr_m, 1},
   501  }
   502  
   503  var yxchg = []ytab{
   504  	{Yax, Ynone, Yrl, Z_rp, 1},
   505  	{Yrl, Ynone, Yax, Zrp_, 1},
   506  	{Yrl, Ynone, Yml, Zr_m, 1},
   507  	{Yml, Ynone, Yrl, Zm_r, 1},
   508  }
   509  
   510  var ydivl = []ytab{
   511  	{Yml, Ynone, Ynone, Zm_o, 2},
   512  }
   513  
   514  var ydivb = []ytab{
   515  	{Ymb, Ynone, Ynone, Zm_o, 2},
   516  }
   517  
   518  var yimul = []ytab{
   519  	{Yml, Ynone, Ynone, Zm_o, 2},
   520  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   521  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   522  	{Yml, Ynone, Yrl, Zm_r, 2},
   523  }
   524  
   525  var yimul3 = []ytab{
   526  	{Yi8, Yml, Yrl, Zibm_r, 2},
   527  }
   528  
   529  var ybyte = []ytab{
   530  	{Yi64, Ynone, Ynone, Zbyte, 1},
   531  }
   532  
   533  var yin = []ytab{
   534  	{Yi32, Ynone, Ynone, Zib_, 1},
   535  	{Ynone, Ynone, Ynone, Zlit, 1},
   536  }
   537  
   538  var yint = []ytab{
   539  	{Yi32, Ynone, Ynone, Zib_, 1},
   540  }
   541  
   542  var ypushl = []ytab{
   543  	{Yrl, Ynone, Ynone, Zrp_, 1},
   544  	{Ym, Ynone, Ynone, Zm_o, 2},
   545  	{Yi8, Ynone, Ynone, Zib_, 1},
   546  	{Yi32, Ynone, Ynone, Zil_, 1},
   547  }
   548  
   549  var ypopl = []ytab{
   550  	{Ynone, Ynone, Yrl, Z_rp, 1},
   551  	{Ynone, Ynone, Ym, Zo_m, 2},
   552  }
   553  
   554  var ybswap = []ytab{
   555  	{Ynone, Ynone, Yrl, Z_rp, 2},
   556  }
   557  
   558  var yscond = []ytab{
   559  	{Ynone, Ynone, Ymb, Zo_m, 2},
   560  }
   561  
   562  var yjcond = []ytab{
   563  	{Ynone, Ynone, Ybr, Zbr, 0},
   564  	{Yi0, Ynone, Ybr, Zbr, 0},
   565  	{Yi1, Ynone, Ybr, Zbr, 1},
   566  }
   567  
   568  var yloop = []ytab{
   569  	{Ynone, Ynone, Ybr, Zloop, 1},
   570  }
   571  
   572  var ycall = []ytab{
   573  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   574  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   575  	{Ynone, Ynone, Yindir, Zcallind, 2},
   576  	{Ynone, Ynone, Ybr, Zcall, 0},
   577  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   578  }
   579  
   580  var yduff = []ytab{
   581  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   582  }
   583  
   584  var yjmp = []ytab{
   585  	{Ynone, Ynone, Yml, Zo_m64, 2},
   586  	{Ynone, Ynone, Ybr, Zjmp, 0},
   587  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   588  }
   589  
   590  var yfmvd = []ytab{
   591  	{Ym, Ynone, Yf0, Zm_o, 2},
   592  	{Yf0, Ynone, Ym, Zo_m, 2},
   593  	{Yrf, Ynone, Yf0, Zm_o, 2},
   594  	{Yf0, Ynone, Yrf, Zo_m, 2},
   595  }
   596  
   597  var yfmvdp = []ytab{
   598  	{Yf0, Ynone, Ym, Zo_m, 2},
   599  	{Yf0, Ynone, Yrf, Zo_m, 2},
   600  }
   601  
   602  var yfmvf = []ytab{
   603  	{Ym, Ynone, Yf0, Zm_o, 2},
   604  	{Yf0, Ynone, Ym, Zo_m, 2},
   605  }
   606  
   607  var yfmvx = []ytab{
   608  	{Ym, Ynone, Yf0, Zm_o, 2},
   609  }
   610  
   611  var yfmvp = []ytab{
   612  	{Yf0, Ynone, Ym, Zo_m, 2},
   613  }
   614  
   615  var yfcmv = []ytab{
   616  	{Yrf, Ynone, Yf0, Zm_o, 2},
   617  }
   618  
   619  var yfadd = []ytab{
   620  	{Ym, Ynone, Yf0, Zm_o, 2},
   621  	{Yrf, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Yrf, Zo_m, 2},
   623  }
   624  
   625  var yfxch = []ytab{
   626  	{Yf0, Ynone, Yrf, Zo_m, 2},
   627  	{Yrf, Ynone, Yf0, Zm_o, 2},
   628  }
   629  
   630  var ycompp = []ytab{
   631  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   632  }
   633  
   634  var ystsw = []ytab{
   635  	{Ynone, Ynone, Ym, Zo_m, 2},
   636  	{Ynone, Ynone, Yax, Zlit, 1},
   637  }
   638  
   639  var ysvrs = []ytab{
   640  	{Ynone, Ynone, Ym, Zo_m, 2},
   641  	{Ym, Ynone, Ynone, Zm_o, 2},
   642  }
   643  
   644  var ymm = []ytab{
   645  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   646  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   647  }
   648  
   649  var yxm = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   651  }
   652  
   653  var yxm_q4 = []ytab{
   654  	{Yxm, Ynone, Yxr, Zm_r, 1},
   655  }
   656  
   657  var yxcvm1 = []ytab{
   658  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   659  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   660  }
   661  
   662  var yxcvm2 = []ytab{
   663  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   664  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   665  }
   666  
   667  var yxr = []ytab{
   668  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   669  }
   670  
   671  var yxr_ml = []ytab{
   672  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   673  }
   674  
   675  var ymr = []ytab{
   676  	{Ymr, Ynone, Ymr, Zm_r, 1},
   677  }
   678  
   679  var ymr_ml = []ytab{
   680  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   681  }
   682  
   683  var yxcmpi = []ytab{
   684  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   685  }
   686  
   687  var yxmov = []ytab{
   688  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   689  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   690  }
   691  
   692  var yxcvfl = []ytab{
   693  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   694  }
   695  
   696  var yxcvlf = []ytab{
   697  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   698  }
   699  
   700  var yxcvfq = []ytab{
   701  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   702  }
   703  
   704  var yxcvqf = []ytab{
   705  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   706  }
   707  
   708  var yps = []ytab{
   709  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   710  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   711  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   712  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   713  }
   714  
   715  var yxrrl = []ytab{
   716  	{Yxr, Ynone, Yrl, Zm_r, 1},
   717  }
   718  
   719  var ymrxr = []ytab{
   720  	{Ymr, Ynone, Yxr, Zm_r, 1},
   721  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   722  }
   723  
   724  var ymshuf = []ytab{
   725  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   726  }
   727  
   728  var ymshufb = []ytab{
   729  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   730  }
   731  
   732  var yxshuf = []ytab{
   733  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   734  }
   735  
   736  var yextrw = []ytab{
   737  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   738  }
   739  
   740  var yextr = []ytab{
   741  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   742  }
   743  
   744  var yinsrw = []ytab{
   745  	{Yu8, Yml, Yxr, Zibm_r, 2},
   746  }
   747  
   748  var yinsr = []ytab{
   749  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   750  }
   751  
   752  var ypsdq = []ytab{
   753  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   754  }
   755  
   756  var ymskb = []ytab{
   757  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   758  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   759  }
   760  
   761  var ycrc32l = []ytab{
   762  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   763  }
   764  
   765  var yprefetch = []ytab{
   766  	{Ym, Ynone, Ynone, Zm_o, 2},
   767  }
   768  
   769  var yaes = []ytab{
   770  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   771  }
   772  
   773  var yxbegin = []ytab{
   774  	{Ynone, Ynone, Ybr, Zjmp, 1},
   775  }
   776  
   777  var yxabort = []ytab{
   778  	{Yu8, Ynone, Ynone, Zib_, 1},
   779  }
   780  
   781  var ylddqu = []ytab{
   782  	{Ym, Ynone, Yxr, Zm_r, 1},
   783  }
   784  
   785  // VEX instructions that come in two forms:
   786  //	VTHING xmm2/m128, xmmV, xmm1
   787  //	VTHING ymm2/m256, ymmV, ymm1
   788  // The opcode array in the corresponding Optab entry
   789  // should contain the (VEX prefixes, opcode byte) pair
   790  // for each of the two forms.
   791  // For example, the entries for VPXOR are:
   792  //
   793  //	VPXOR xmm2/m128, xmmV, xmm1
   794  //	VEX.NDS.128.66.0F.WIG EF /r
   795  //
   796  //	VPXOR ymm2/m256, ymmV, ymm1
   797  //	VEX.NDS.256.66.0F.WIG EF /r
   798  //
   799  // The NDS/NDD/DDS part can be dropped, producing this
   800  // Optab entry:
   801  //
   802  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   803  //
   804  var yvex_xy3 = []ytab{
   805  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   806  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   807  }
   808  
   809  var yvex_ri3 = []ytab{
   810  	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
   811  }
   812  
   813  var yvex_xyi3 = []ytab{
   814  	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
   815  	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
   816  	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
   817  	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
   818  }
   819  
   820  var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
   821  	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   822  }
   823  
   824  var yvex_xyi4 = []ytab{
   825  	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
   826  }
   827  
   828  var yvex_shift = []ytab{
   829  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   830  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   831  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   832  	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
   833  }
   834  
   835  var yvex_shift_dq = []ytab{
   836  	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
   837  	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
   838  }
   839  
   840  var yvex_r3 = []ytab{
   841  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   842  }
   843  
   844  var yvex_vmr3 = []ytab{
   845  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   846  }
   847  
   848  var yvex_xy2 = []ytab{
   849  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   850  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   851  }
   852  
   853  var yvex_xyr2 = []ytab{
   854  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   855  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   856  }
   857  
   858  var yvex_vmovdqa = []ytab{
   859  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   860  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   861  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   862  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   863  }
   864  
   865  var yvex_vmovntdq = []ytab{
   866  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   867  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   868  }
   869  
   870  var yvex_vpbroadcast = []ytab{
   871  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   872  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   873  }
   874  
   875  var yvex_vpbroadcast_sd = []ytab{
   876  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   877  }
   878  
   879  var ymmxmm0f38 = []ytab{
   880  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   881  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   882  }
   883  
   884  /*
   885   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   886   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   887   * to find the entry with the given p.As and then looks through the ytable for
   888   * that instruction (the second field in the optab struct) for a line whose
   889   * first two values match the Ytypes of the p.From and p.To operands.  The
   890   * function oclass computes the specific Ytype of an operand and then the set
   891   * of more general Ytypes that it satisfies is implied by the ycover table, set
   892   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   893   * from the more general 8-bit constants, but instinit says
   894   *
   895   *        ycover[Yi0*Ymax+Ys32] = 1
   896   *        ycover[Yi1*Ymax+Ys32] = 1
   897   *        ycover[Yi8*Ymax+Ys32] = 1
   898   *
   899   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   900   * if that's what an instruction can handle.
   901   *
   902   * In parallel with the scan through the ytable for the appropriate line, there
   903   * is a z pointer that starts out pointing at the strange magic byte list in
   904   * the Optab struct.  With each step past a non-matching ytable line, z
   905   * advances by the 4th entry in the line.  When a matching line is found, that
   906   * z pointer has the extra data to use in laying down the instruction bytes.
   907   * The actual bytes laid down are a function of the 3rd entry in the line (that
   908   * is, the Ztype) and the z bytes.
   909   *
   910   * For example, let's look at AADDL.  The optab line says:
   911   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   912   *
   913   * and yaddl says
   914   *        var yaddl = []ytab{
   915   *                {Yi8, Ynone, Yml, Zibo_m, 2},
   916   *                {Yi32, Ynone, Yax, Zil_, 1},
   917   *                {Yi32, Ynone, Yml, Zilo_m, 2},
   918   *                {Yrl, Ynone, Yml, Zr_m, 1},
   919   *                {Yml, Ynone, Yrl, Zm_r, 1},
   920   *        }
   921   *
   922   * so there are 5 possible types of ADDL instruction that can be laid down, and
   923   * possible states used to lay them down (Ztype and z pointer, assuming z
   924   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   925   *
   926   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   927   *        Yi32, Yax -> Zil_, z+2 (0x05)
   928   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   929   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   930   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   931   *
   932   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   933   * relatively straightforward as this program goes.
   934   *
   935   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   936   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   937   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   938   * Zilo_m is the same but a long (32-bit) immediate.
   939   */
   940  var optab =
   941  /*	as, ytab, andproto, opcode */
   942  []Optab{
   943  	{obj.AXXX, nil, 0, [23]uint8{}},
   944  	{AAAA, ynone, P32, [23]uint8{0x37}},
   945  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   946  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   947  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   948  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   949  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   950  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   951  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   952  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   953  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   954  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   955  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   956  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   957  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   958  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   959  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   960  	{AADJSP, nil, 0, [23]uint8{}},
   961  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   962  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   963  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   964  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   965  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   966  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   967  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   968  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   969  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   970  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   971  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   972  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   973  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   974  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   975  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   976  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   977  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   978  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   979  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   980  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   981  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   982  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   983  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   984  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   985  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   986  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   987  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   988  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   989  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   990  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   991  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   992  	{ABYTE, ybyte, Px, [23]uint8{1}},
   993  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   994  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   995  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   996  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   997  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   998  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   999  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1000  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1001  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1002  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1003  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1004  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1005  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1006  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1007  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1008  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1009  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1010  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1011  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1012  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1013  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1014  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1015  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1016  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1017  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1018  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1019  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1020  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1021  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1022  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1023  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1024  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1025  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1026  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1027  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1028  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1029  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1030  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1031  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1032  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1033  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1034  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1035  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1036  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1037  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1038  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1039  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1040  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1041  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1042  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1043  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1044  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1045  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1046  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1047  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1048  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1049  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1050  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1051  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1052  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1053  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1054  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1055  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1056  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1057  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1058  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1059  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1060  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1061  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1062  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1063  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1064  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1065  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1066  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1067  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1068  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1069  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1070  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1071  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1072  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1073  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1074  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1075  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1076  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1077  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1078  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1079  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1080  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1081  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1082  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1083  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1084  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1085  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1086  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1087  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1088  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1089  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1090  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1091  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1092  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1093  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1094  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1095  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1096  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1097  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1098  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1099  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1100  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1101  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1102  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1103  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1104  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1105  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1106  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1107  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1108  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1109  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1110  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1111  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1112  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1113  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1116  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1117  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1118  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1119  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1120  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1121  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1122  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1123  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1124  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1125  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1126  	{AINT, yint, Px, [23]uint8{0xcd}},
  1127  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1128  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1129  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1130  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1131  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1132  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1133  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1134  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1135  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1136  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1137  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1138  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1139  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1140  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1141  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1142  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1143  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1144  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1145  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1146  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1147  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1148  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1149  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1150  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1151  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1152  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1153  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1154  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1155  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1156  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1157  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1158  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1159  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1160  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1161  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1162  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1163  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1164  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1165  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1166  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1167  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1168  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1169  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1170  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1171  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1172  	{ALONG, ybyte, Px, [23]uint8{4}},
  1173  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1174  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1175  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1176  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1177  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1178  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1179  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1180  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1181  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1182  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1183  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1184  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1185  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1186  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1187  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1188  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1189  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1190  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1191  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1192  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1193  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1194  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1195  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1196  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1197  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1198  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1199  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1200  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1201  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1202  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1203  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1204  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1205  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1206  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1207  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1208  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1209  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1210  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1211  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1212  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1213  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1214  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1215  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1216  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1217  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1218  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1219  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1220  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1221  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1222  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1223  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1224  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1225  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1226  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1227  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1228  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1229  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1230  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1231  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1232  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1233  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1234  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1235  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1236  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1237  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1238  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1239  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1240  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1241  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1242  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1243  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1244  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1245  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1246  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1247  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1248  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1249  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1250  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1251  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1252  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1253  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1254  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1255  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1256  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1257  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1258  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1259  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1260  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1261  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1262  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1263  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1264  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1265  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1266  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1267  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1268  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1269  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1270  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1271  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1272  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1273  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1274  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1275  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1276  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1277  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1278  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1279  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1280  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1281  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1282  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1283  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1284  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1285  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1286  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1287  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1288  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1289  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1290  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1291  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1292  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1293  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1294  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1295  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1296  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1297  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1298  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1299  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1300  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1301  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1302  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1303  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1304  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1305  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1306  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1307  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1308  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1309  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1310  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1311  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1312  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1313  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1314  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1315  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1316  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1317  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1318  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1319  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1320  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1321  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1322  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1323  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1324  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1325  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1326  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1327  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1328  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1329  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1330  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1331  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1332  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1333  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1334  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1335  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1336  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1337  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1338  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1339  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1340  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1341  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1342  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1343  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1344  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1345  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1346  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1347  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1348  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1349  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1350  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1351  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1352  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1353  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1354  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1355  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1356  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1357  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1358  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1359  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1360  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1361  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1362  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1363  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1364  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1365  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1366  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1367  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1368  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1369  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1370  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1371  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1372  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1373  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1374  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1375  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1376  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1377  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1378  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1379  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1380  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1381  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1382  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1383  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1384  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1385  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1386  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1387  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1388  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1389  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1390  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1391  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1392  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1393  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1394  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1395  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1396  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1397  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1398  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1399  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1400  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1401  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1402  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1403  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1404  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1405  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1406  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1407  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1408  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1409  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1410  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1411  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1412  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1413  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1414  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1415  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1416  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1417  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1418  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1419  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1420  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1421  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1422  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1423  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1424  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1425  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1426  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1427  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1428  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1429  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1430  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1431  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1432  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1433  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1434  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1435  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1436  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1437  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1438  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1439  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1440  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1441  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1442  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1443  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1444  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1445  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1446  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1447  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1448  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1449  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1450  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1451  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1452  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1453  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1454  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1455  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1456  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1457  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1458  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1459  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1460  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1461  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1462  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1463  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1464  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1465  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1466  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1467  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1468  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1469  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1470  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1471  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1472  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1473  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1474  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1475  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1476  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1477  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1478  	{AWORD, ybyte, Px, [23]uint8{2}},
  1479  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1480  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1481  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1482  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1483  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1484  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1485  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1486  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1487  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1488  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1489  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1490  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1491  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1492  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1493  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1494  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1495  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1496  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1497  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1498  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1499  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1500  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1501  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1502  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1503  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1504  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1505  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1506  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1507  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1508  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1509  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1510  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1511  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1512  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1513  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1514  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1515  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1516  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1517  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1518  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1519  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1520  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1521  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1522  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1523  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1524  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1525  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1526  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1527  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1528  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1529  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1530  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1531  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1532  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1533  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1534  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1535  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1536  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1537  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1538  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1539  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1540  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1541  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1542  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1543  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1544  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1545  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1546  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1547  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1548  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1549  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1550  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1551  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1552  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1553  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1554  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1555  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1556  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1557  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1558  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1559  	{AFFREE, nil, 0, [23]uint8{}},
  1560  	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1561  	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1562  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1563  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1564  	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1565  	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1566  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1567  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1568  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1569  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1570  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1571  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1572  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1573  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1574  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1575  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1576  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1577  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1578  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1579  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1580  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1581  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1582  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1583  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1584  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1585  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1586  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1587  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1588  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1589  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1590  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1591  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1592  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1593  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1594  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1595  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1596  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1597  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1598  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1599  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1600  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1601  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1602  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1603  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1604  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1605  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1606  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1607  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1608  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1609  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1610  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1611  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1612  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1613  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1614  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1615  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1616  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1617  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1618  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1619  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1620  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1621  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1622  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1623  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1624  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1625  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1626  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1627  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1628  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1629  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1630  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1631  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1632  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1633  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1634  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1635  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1636  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1637  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1638  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1639  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1640  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1641  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1642  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1643  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1644  
  1645  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1646  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1647  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1648  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1649  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1650  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1651  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1652  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1653  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1654  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1655  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1656  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1657  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1658  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1659  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1660  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1661  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1662  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1663  
  1664  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1665  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1666  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1667  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1668  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1669  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1670  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1671  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1672  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1673  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1674  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
  1675  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
  1676  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
  1677  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
  1678  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
  1679  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
  1680  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
  1681  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
  1682  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
  1683  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
  1684  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
  1685  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
  1686  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
  1687  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
  1688  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
  1689  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
  1690  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
  1691  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
  1692  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
  1693  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
  1694  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
  1695  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
  1696  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
  1697  
  1698  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1699  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1700  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1701  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1702  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1703  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1704  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1705  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1706  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1707  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1708  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1709  	{obj.AEND, nil, 0, [23]uint8{}},
  1710  	{0, nil, 0, [23]uint8{}},
  1711  }
  1712  
  1713  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1714  
  1715  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1716  // This happens on systems like Solaris that call .so functions instead of system calls.
  1717  // It does not seem to be necessary for any other systems. This is probably working
  1718  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1719  // what that bug is. And this does fix it.
  1720  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1721  	if ctxt.Headtype == objabi.Hsolaris {
  1722  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1723  		return strings.HasPrefix(s.Name, "libc_")
  1724  	}
  1725  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1726  }
  1727  
  1728  // single-instruction no-ops of various lengths.
  1729  // constructed by hand and disassembled with gdb to verify.
  1730  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1731  var nop = [][16]uint8{
  1732  	{0x90},
  1733  	{0x66, 0x90},
  1734  	{0x0F, 0x1F, 0x00},
  1735  	{0x0F, 0x1F, 0x40, 0x00},
  1736  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1737  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1738  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1739  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1740  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1741  }
  1742  
  1743  // Native Client rejects the repeated 0x66 prefix.
  1744  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1745  func fillnop(p []byte, n int) {
  1746  	var m int
  1747  
  1748  	for n > 0 {
  1749  		m = n
  1750  		if m > len(nop) {
  1751  			m = len(nop)
  1752  		}
  1753  		copy(p[:m], nop[m-1][:m])
  1754  		p = p[m:]
  1755  		n -= m
  1756  	}
  1757  }
  1758  
  1759  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1760  	s.Grow(int64(c) + int64(pad))
  1761  	fillnop(s.P[c:], int(pad))
  1762  	return c + pad
  1763  }
  1764  
  1765  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1766  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1767  		return l
  1768  	}
  1769  	return q
  1770  }
  1771  
  1772  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1773  	if s.P != nil {
  1774  		return
  1775  	}
  1776  
  1777  	if ycover[0] == 0 {
  1778  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1779  	}
  1780  
  1781  	var asmbuf AsmBuf
  1782  
  1783  	for p := s.Func.Text; p != nil; p = p.Link {
  1784  		if p.To.Type == obj.TYPE_BRANCH {
  1785  			if p.Pcond == nil {
  1786  				p.Pcond = p
  1787  			}
  1788  		}
  1789  		if p.As == AADJSP {
  1790  			p.To.Type = obj.TYPE_REG
  1791  			p.To.Reg = REG_SP
  1792  			v := int32(-p.From.Offset)
  1793  			p.From.Offset = int64(v)
  1794  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1795  			if v < 0 {
  1796  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1797  				v = -v
  1798  				p.From.Offset = int64(v)
  1799  			}
  1800  
  1801  			if v == 0 {
  1802  				p.As = obj.ANOP
  1803  			}
  1804  		}
  1805  	}
  1806  
  1807  	var q *obj.Prog
  1808  	var count int64 // rough count of number of instructions
  1809  	for p := s.Func.Text; p != nil; p = p.Link {
  1810  		count++
  1811  		p.Back = 2 // use short branches first time through
  1812  		q = p.Pcond
  1813  		if q != nil && (q.Back&2 != 0) {
  1814  			p.Back |= 1 // backward jump
  1815  			q.Back |= 4 // loop head
  1816  		}
  1817  
  1818  		if p.As == AADJSP {
  1819  			p.To.Type = obj.TYPE_REG
  1820  			p.To.Reg = REG_SP
  1821  			v := int32(-p.From.Offset)
  1822  			p.From.Offset = int64(v)
  1823  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1824  			if v < 0 {
  1825  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1826  				v = -v
  1827  				p.From.Offset = int64(v)
  1828  			}
  1829  
  1830  			if v == 0 {
  1831  				p.As = obj.ANOP
  1832  			}
  1833  		}
  1834  	}
  1835  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1836  
  1837  	n := 0
  1838  	var c int32
  1839  	errors := ctxt.Errors
  1840  	for {
  1841  		loop := int32(0)
  1842  		for i := range s.R {
  1843  			s.R[i] = obj.Reloc{}
  1844  		}
  1845  		s.R = s.R[:0]
  1846  		s.P = s.P[:0]
  1847  		c = 0
  1848  		for p := s.Func.Text; p != nil; p = p.Link {
  1849  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1850  
  1851  				// pad everything to avoid crossing 32-byte boundary
  1852  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1853  					c = naclpad(ctxt, s, c, -c&31)
  1854  				}
  1855  
  1856  				// pad call deferreturn to start at 32-byte boundary
  1857  				// so that subtracting 5 in jmpdefer will jump back
  1858  				// to that boundary and rerun the call.
  1859  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1860  					c = naclpad(ctxt, s, c, -c&31)
  1861  				}
  1862  
  1863  				// pad call to end at 32-byte boundary
  1864  				if p.As == obj.ACALL {
  1865  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1866  				}
  1867  
  1868  				// the linker treats REP and STOSQ as different instructions
  1869  				// but in fact the REP is a prefix on the STOSQ.
  1870  				// make sure REP has room for 2 more bytes, so that
  1871  				// padding will not be inserted before the next instruction.
  1872  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1873  					c = naclpad(ctxt, s, c, -c&31)
  1874  				}
  1875  
  1876  				// same for LOCK.
  1877  				// various instructions follow; the longest is 4 bytes.
  1878  				// give ourselves 8 bytes so as to avoid surprises.
  1879  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1880  					c = naclpad(ctxt, s, c, -c&31)
  1881  				}
  1882  			}
  1883  
  1884  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1885  				// pad with NOPs
  1886  				v := -c & (LoopAlign - 1)
  1887  
  1888  				if v <= MaxLoopPad {
  1889  					s.Grow(int64(c) + int64(v))
  1890  					fillnop(s.P[c:], int(v))
  1891  					c += v
  1892  				}
  1893  			}
  1894  
  1895  			p.Pc = int64(c)
  1896  
  1897  			// process forward jumps to p
  1898  			for q = p.Rel; q != nil; q = q.Forwd {
  1899  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1900  				if q.Back&2 != 0 { // short
  1901  					if v > 127 {
  1902  						loop++
  1903  						q.Back ^= 2
  1904  					}
  1905  
  1906  					if q.As == AJCXZL || q.As == AXBEGIN {
  1907  						s.P[q.Pc+2] = byte(v)
  1908  					} else {
  1909  						s.P[q.Pc+1] = byte(v)
  1910  					}
  1911  				} else {
  1912  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1913  				}
  1914  			}
  1915  
  1916  			p.Rel = nil
  1917  
  1918  			p.Pc = int64(c)
  1919  			asmbuf.asmins(ctxt, s, p)
  1920  			m := asmbuf.Len()
  1921  			if int(p.Isize) != m {
  1922  				p.Isize = uint8(m)
  1923  				loop++
  1924  			}
  1925  
  1926  			s.Grow(p.Pc + int64(m))
  1927  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1928  			c += int32(m)
  1929  		}
  1930  
  1931  		n++
  1932  		if n > 20 {
  1933  			ctxt.Diag("span must be looping")
  1934  			log.Fatalf("loop")
  1935  		}
  1936  		if loop == 0 {
  1937  			break
  1938  		}
  1939  		if ctxt.Errors > errors {
  1940  			return
  1941  		}
  1942  	}
  1943  
  1944  	if ctxt.Headtype == objabi.Hnacl {
  1945  		c = naclpad(ctxt, s, c, -c&31)
  1946  	}
  1947  
  1948  	s.Size = int64(c)
  1949  
  1950  	if false { /* debug['a'] > 1 */
  1951  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1952  		var i int
  1953  		for i = 0; i < len(s.P); i++ {
  1954  			fmt.Printf(" %.2x", s.P[i])
  1955  			if i%16 == 15 {
  1956  				fmt.Printf("\n  %.6x", uint(i+1))
  1957  			}
  1958  		}
  1959  
  1960  		if i%16 != 0 {
  1961  			fmt.Printf("\n")
  1962  		}
  1963  
  1964  		for i := 0; i < len(s.R); i++ {
  1965  			r := &s.R[i]
  1966  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1967  		}
  1968  	}
  1969  }
  1970  
  1971  func instinit(ctxt *obj.Link) {
  1972  	if ycover[0] != 0 {
  1973  		// Already initialized; stop now.
  1974  		// This happens in the cmd/asm tests,
  1975  		// each of which re-initializes the arch.
  1976  		return
  1977  	}
  1978  
  1979  	switch ctxt.Headtype {
  1980  	case objabi.Hplan9:
  1981  		plan9privates = ctxt.Lookup("_privates")
  1982  	case objabi.Hnacl:
  1983  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  1984  	}
  1985  
  1986  	for i := 1; optab[i].as != 0; i++ {
  1987  		c := optab[i].as
  1988  		if opindex[c&obj.AMask] != nil {
  1989  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  1990  		}
  1991  		opindex[c&obj.AMask] = &optab[i]
  1992  	}
  1993  
  1994  	for i := 0; i < Ymax; i++ {
  1995  		ycover[i*Ymax+i] = 1
  1996  	}
  1997  
  1998  	ycover[Yi0*Ymax+Yi8] = 1
  1999  	ycover[Yi1*Ymax+Yi8] = 1
  2000  	ycover[Yu7*Ymax+Yi8] = 1
  2001  
  2002  	ycover[Yi0*Ymax+Yu7] = 1
  2003  	ycover[Yi1*Ymax+Yu7] = 1
  2004  
  2005  	ycover[Yi0*Ymax+Yu8] = 1
  2006  	ycover[Yi1*Ymax+Yu8] = 1
  2007  	ycover[Yu7*Ymax+Yu8] = 1
  2008  
  2009  	ycover[Yi0*Ymax+Ys32] = 1
  2010  	ycover[Yi1*Ymax+Ys32] = 1
  2011  	ycover[Yu7*Ymax+Ys32] = 1
  2012  	ycover[Yu8*Ymax+Ys32] = 1
  2013  	ycover[Yi8*Ymax+Ys32] = 1
  2014  
  2015  	ycover[Yi0*Ymax+Yi32] = 1
  2016  	ycover[Yi1*Ymax+Yi32] = 1
  2017  	ycover[Yu7*Ymax+Yi32] = 1
  2018  	ycover[Yu8*Ymax+Yi32] = 1
  2019  	ycover[Yi8*Ymax+Yi32] = 1
  2020  	ycover[Ys32*Ymax+Yi32] = 1
  2021  
  2022  	ycover[Yi0*Ymax+Yi64] = 1
  2023  	ycover[Yi1*Ymax+Yi64] = 1
  2024  	ycover[Yu7*Ymax+Yi64] = 1
  2025  	ycover[Yu8*Ymax+Yi64] = 1
  2026  	ycover[Yi8*Ymax+Yi64] = 1
  2027  	ycover[Ys32*Ymax+Yi64] = 1
  2028  	ycover[Yi32*Ymax+Yi64] = 1
  2029  
  2030  	ycover[Yal*Ymax+Yrb] = 1
  2031  	ycover[Ycl*Ymax+Yrb] = 1
  2032  	ycover[Yax*Ymax+Yrb] = 1
  2033  	ycover[Ycx*Ymax+Yrb] = 1
  2034  	ycover[Yrx*Ymax+Yrb] = 1
  2035  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2036  
  2037  	ycover[Ycl*Ymax+Ycx] = 1
  2038  
  2039  	ycover[Yax*Ymax+Yrx] = 1
  2040  	ycover[Ycx*Ymax+Yrx] = 1
  2041  
  2042  	ycover[Yax*Ymax+Yrl] = 1
  2043  	ycover[Ycx*Ymax+Yrl] = 1
  2044  	ycover[Yrx*Ymax+Yrl] = 1
  2045  	ycover[Yrl32*Ymax+Yrl] = 1
  2046  
  2047  	ycover[Yf0*Ymax+Yrf] = 1
  2048  
  2049  	ycover[Yal*Ymax+Ymb] = 1
  2050  	ycover[Ycl*Ymax+Ymb] = 1
  2051  	ycover[Yax*Ymax+Ymb] = 1
  2052  	ycover[Ycx*Ymax+Ymb] = 1
  2053  	ycover[Yrx*Ymax+Ymb] = 1
  2054  	ycover[Yrb*Ymax+Ymb] = 1
  2055  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2056  	ycover[Ym*Ymax+Ymb] = 1
  2057  
  2058  	ycover[Yax*Ymax+Yml] = 1
  2059  	ycover[Ycx*Ymax+Yml] = 1
  2060  	ycover[Yrx*Ymax+Yml] = 1
  2061  	ycover[Yrl*Ymax+Yml] = 1
  2062  	ycover[Yrl32*Ymax+Yml] = 1
  2063  	ycover[Ym*Ymax+Yml] = 1
  2064  
  2065  	ycover[Yax*Ymax+Ymm] = 1
  2066  	ycover[Ycx*Ymax+Ymm] = 1
  2067  	ycover[Yrx*Ymax+Ymm] = 1
  2068  	ycover[Yrl*Ymax+Ymm] = 1
  2069  	ycover[Yrl32*Ymax+Ymm] = 1
  2070  	ycover[Ym*Ymax+Ymm] = 1
  2071  	ycover[Ymr*Ymax+Ymm] = 1
  2072  
  2073  	ycover[Ym*Ymax+Yxm] = 1
  2074  	ycover[Yxr*Ymax+Yxm] = 1
  2075  
  2076  	ycover[Ym*Ymax+Yym] = 1
  2077  	ycover[Yyr*Ymax+Yym] = 1
  2078  
  2079  	for i := 0; i < MAXREG; i++ {
  2080  		reg[i] = -1
  2081  		if i >= REG_AL && i <= REG_R15B {
  2082  			reg[i] = (i - REG_AL) & 7
  2083  			if i >= REG_SPB && i <= REG_DIB {
  2084  				regrex[i] = 0x40
  2085  			}
  2086  			if i >= REG_R8B && i <= REG_R15B {
  2087  				regrex[i] = Rxr | Rxx | Rxb
  2088  			}
  2089  		}
  2090  
  2091  		if i >= REG_AH && i <= REG_BH {
  2092  			reg[i] = 4 + ((i - REG_AH) & 7)
  2093  		}
  2094  		if i >= REG_AX && i <= REG_R15 {
  2095  			reg[i] = (i - REG_AX) & 7
  2096  			if i >= REG_R8 {
  2097  				regrex[i] = Rxr | Rxx | Rxb
  2098  			}
  2099  		}
  2100  
  2101  		if i >= REG_F0 && i <= REG_F0+7 {
  2102  			reg[i] = (i - REG_F0) & 7
  2103  		}
  2104  		if i >= REG_M0 && i <= REG_M0+7 {
  2105  			reg[i] = (i - REG_M0) & 7
  2106  		}
  2107  		if i >= REG_X0 && i <= REG_X0+15 {
  2108  			reg[i] = (i - REG_X0) & 7
  2109  			if i >= REG_X0+8 {
  2110  				regrex[i] = Rxr | Rxx | Rxb
  2111  			}
  2112  		}
  2113  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2114  			reg[i] = (i - REG_Y0) & 7
  2115  			if i >= REG_Y0+8 {
  2116  				regrex[i] = Rxr | Rxx | Rxb
  2117  			}
  2118  		}
  2119  
  2120  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2121  			regrex[i] = Rxr
  2122  		}
  2123  	}
  2124  }
  2125  
  2126  var isAndroid = (objabi.GOOS == "android")
  2127  
  2128  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2129  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2130  		return 0
  2131  	}
  2132  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2133  		switch a.Reg {
  2134  		case REG_CS:
  2135  			return 0x2e
  2136  
  2137  		case REG_DS:
  2138  			return 0x3e
  2139  
  2140  		case REG_ES:
  2141  			return 0x26
  2142  
  2143  		case REG_FS:
  2144  			return 0x64
  2145  
  2146  		case REG_GS:
  2147  			return 0x65
  2148  
  2149  		case REG_TLS:
  2150  			// NOTE: Systems listed here should be only systems that
  2151  			// support direct TLS references like 8(TLS) implemented as
  2152  			// direct references from FS or GS. Systems that require
  2153  			// the initial-exec model, where you load the TLS base into
  2154  			// a register and then index from that register, do not reach
  2155  			// this code and should not be listed.
  2156  			if ctxt.Arch.Family == sys.I386 {
  2157  				switch ctxt.Headtype {
  2158  				default:
  2159  					if isAndroid {
  2160  						return 0x65 // GS
  2161  					}
  2162  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2163  
  2164  				case objabi.Hdarwin,
  2165  					objabi.Hdragonfly,
  2166  					objabi.Hfreebsd,
  2167  					objabi.Hnetbsd,
  2168  					objabi.Hopenbsd:
  2169  					return 0x65 // GS
  2170  				}
  2171  			}
  2172  
  2173  			switch ctxt.Headtype {
  2174  			default:
  2175  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2176  
  2177  			case objabi.Hlinux:
  2178  				if isAndroid {
  2179  					return 0x64 // FS
  2180  				}
  2181  
  2182  				if ctxt.Flag_shared {
  2183  					log.Fatalf("unknown TLS base register for linux with -shared")
  2184  				} else {
  2185  					return 0x64 // FS
  2186  				}
  2187  
  2188  			case objabi.Hdragonfly,
  2189  				objabi.Hfreebsd,
  2190  				objabi.Hnetbsd,
  2191  				objabi.Hopenbsd,
  2192  				objabi.Hsolaris:
  2193  				return 0x64 // FS
  2194  
  2195  			case objabi.Hdarwin:
  2196  				return 0x65 // GS
  2197  			}
  2198  		}
  2199  	}
  2200  
  2201  	if ctxt.Arch.Family == sys.I386 {
  2202  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2203  			// When building for inclusion into a shared library, an instruction of the form
  2204  			//     MOVL 0(CX)(TLS*1), AX
  2205  			// becomes
  2206  			//     mov %gs:(%ecx), %eax
  2207  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2208  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2209  			// a shared library the instruction it becomes
  2210  			//     mov 0x0(%ecx), $eax
  2211  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2212  			if a.Offset != 0 {
  2213  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2214  			}
  2215  			return 0x65 // GS
  2216  		}
  2217  		return 0
  2218  	}
  2219  
  2220  	switch a.Index {
  2221  	case REG_CS:
  2222  		return 0x2e
  2223  
  2224  	case REG_DS:
  2225  		return 0x3e
  2226  
  2227  	case REG_ES:
  2228  		return 0x26
  2229  
  2230  	case REG_TLS:
  2231  		if ctxt.Flag_shared {
  2232  			// When building for inclusion into a shared library, an instruction of the form
  2233  			//     MOV 0(CX)(TLS*1), AX
  2234  			// becomes
  2235  			//     mov %fs:(%rcx), %rax
  2236  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2237  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2238  			// a shared library the instruction does not require a prefix.
  2239  			if a.Offset != 0 {
  2240  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2241  			}
  2242  			return 0x64
  2243  		}
  2244  
  2245  	case REG_FS:
  2246  		return 0x64
  2247  
  2248  	case REG_GS:
  2249  		return 0x65
  2250  	}
  2251  
  2252  	return 0
  2253  }
  2254  
  2255  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2256  	switch a.Type {
  2257  	case obj.TYPE_NONE:
  2258  		return Ynone
  2259  
  2260  	case obj.TYPE_BRANCH:
  2261  		return Ybr
  2262  
  2263  	case obj.TYPE_INDIR:
  2264  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2265  			return Yindir
  2266  		}
  2267  		return Yxxx
  2268  
  2269  	case obj.TYPE_MEM:
  2270  		if a.Index == REG_SP {
  2271  			// Can't use SP as the index register
  2272  			return Yxxx
  2273  		}
  2274  		if ctxt.Arch.Family == sys.AMD64 {
  2275  			switch a.Name {
  2276  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2277  				// Global variables can't use index registers and their
  2278  				// base register is %rip (%rip is encoded as REG_NONE).
  2279  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2280  					return Yxxx
  2281  				}
  2282  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2283  				// These names must have a base of SP.  The old compiler
  2284  				// uses 0 for the base register. SSA uses REG_SP.
  2285  				if a.Reg != REG_SP && a.Reg != 0 {
  2286  					return Yxxx
  2287  				}
  2288  			case obj.NAME_NONE:
  2289  				// everything is ok
  2290  			default:
  2291  				// unknown name
  2292  				return Yxxx
  2293  			}
  2294  		}
  2295  		return Ym
  2296  
  2297  	case obj.TYPE_ADDR:
  2298  		switch a.Name {
  2299  		case obj.NAME_GOTREF:
  2300  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2301  			return Yxxx
  2302  
  2303  		case obj.NAME_EXTERN,
  2304  			obj.NAME_STATIC:
  2305  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2306  				return Yi32
  2307  			}
  2308  			return Yiauto // use pc-relative addressing
  2309  
  2310  		case obj.NAME_AUTO,
  2311  			obj.NAME_PARAM:
  2312  			return Yiauto
  2313  		}
  2314  
  2315  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2316  		// and got Yi32 in an earlier version of this code.
  2317  		// Keep doing that until we fix yduff etc.
  2318  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2319  			return Yi32
  2320  		}
  2321  
  2322  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2323  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2324  		}
  2325  		fallthrough
  2326  
  2327  		// fall through
  2328  
  2329  	case obj.TYPE_CONST:
  2330  		if a.Sym != nil {
  2331  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2332  		}
  2333  
  2334  		v := a.Offset
  2335  		if ctxt.Arch.Family == sys.I386 {
  2336  			v = int64(int32(v))
  2337  		}
  2338  		if v == 0 {
  2339  			if p.Mark&PRESERVEFLAGS != 0 {
  2340  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2341  				return Yu7
  2342  			}
  2343  			return Yi0
  2344  		}
  2345  		if v == 1 {
  2346  			return Yi1
  2347  		}
  2348  		if v >= 0 && v <= 127 {
  2349  			return Yu7
  2350  		}
  2351  		if v >= 0 && v <= 255 {
  2352  			return Yu8
  2353  		}
  2354  		if v >= -128 && v <= 127 {
  2355  			return Yi8
  2356  		}
  2357  		if ctxt.Arch.Family == sys.I386 {
  2358  			return Yi32
  2359  		}
  2360  		l := int32(v)
  2361  		if int64(l) == v {
  2362  			return Ys32 /* can sign extend */
  2363  		}
  2364  		if v>>32 == 0 {
  2365  			return Yi32 /* unsigned */
  2366  		}
  2367  		return Yi64
  2368  
  2369  	case obj.TYPE_TEXTSIZE:
  2370  		return Ytextsize
  2371  	}
  2372  
  2373  	if a.Type != obj.TYPE_REG {
  2374  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2375  		return Yxxx
  2376  	}
  2377  
  2378  	switch a.Reg {
  2379  	case REG_AL:
  2380  		return Yal
  2381  
  2382  	case REG_AX:
  2383  		return Yax
  2384  
  2385  		/*
  2386  			case REG_SPB:
  2387  		*/
  2388  	case REG_BPB,
  2389  		REG_SIB,
  2390  		REG_DIB,
  2391  		REG_R8B,
  2392  		REG_R9B,
  2393  		REG_R10B,
  2394  		REG_R11B,
  2395  		REG_R12B,
  2396  		REG_R13B,
  2397  		REG_R14B,
  2398  		REG_R15B:
  2399  		if ctxt.Arch.Family == sys.I386 {
  2400  			return Yxxx
  2401  		}
  2402  		fallthrough
  2403  
  2404  	case REG_DL,
  2405  		REG_BL,
  2406  		REG_AH,
  2407  		REG_CH,
  2408  		REG_DH,
  2409  		REG_BH:
  2410  		return Yrb
  2411  
  2412  	case REG_CL:
  2413  		return Ycl
  2414  
  2415  	case REG_CX:
  2416  		return Ycx
  2417  
  2418  	case REG_DX, REG_BX:
  2419  		return Yrx
  2420  
  2421  	case REG_R8, /* not really Yrl */
  2422  		REG_R9,
  2423  		REG_R10,
  2424  		REG_R11,
  2425  		REG_R12,
  2426  		REG_R13,
  2427  		REG_R14,
  2428  		REG_R15:
  2429  		if ctxt.Arch.Family == sys.I386 {
  2430  			return Yxxx
  2431  		}
  2432  		fallthrough
  2433  
  2434  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2435  		if ctxt.Arch.Family == sys.I386 {
  2436  			return Yrl32
  2437  		}
  2438  		return Yrl
  2439  
  2440  	case REG_F0 + 0:
  2441  		return Yf0
  2442  
  2443  	case REG_F0 + 1,
  2444  		REG_F0 + 2,
  2445  		REG_F0 + 3,
  2446  		REG_F0 + 4,
  2447  		REG_F0 + 5,
  2448  		REG_F0 + 6,
  2449  		REG_F0 + 7:
  2450  		return Yrf
  2451  
  2452  	case REG_M0 + 0,
  2453  		REG_M0 + 1,
  2454  		REG_M0 + 2,
  2455  		REG_M0 + 3,
  2456  		REG_M0 + 4,
  2457  		REG_M0 + 5,
  2458  		REG_M0 + 6,
  2459  		REG_M0 + 7:
  2460  		return Ymr
  2461  
  2462  	case REG_X0 + 0,
  2463  		REG_X0 + 1,
  2464  		REG_X0 + 2,
  2465  		REG_X0 + 3,
  2466  		REG_X0 + 4,
  2467  		REG_X0 + 5,
  2468  		REG_X0 + 6,
  2469  		REG_X0 + 7,
  2470  		REG_X0 + 8,
  2471  		REG_X0 + 9,
  2472  		REG_X0 + 10,
  2473  		REG_X0 + 11,
  2474  		REG_X0 + 12,
  2475  		REG_X0 + 13,
  2476  		REG_X0 + 14,
  2477  		REG_X0 + 15:
  2478  		return Yxr
  2479  
  2480  	case REG_Y0 + 0,
  2481  		REG_Y0 + 1,
  2482  		REG_Y0 + 2,
  2483  		REG_Y0 + 3,
  2484  		REG_Y0 + 4,
  2485  		REG_Y0 + 5,
  2486  		REG_Y0 + 6,
  2487  		REG_Y0 + 7,
  2488  		REG_Y0 + 8,
  2489  		REG_Y0 + 9,
  2490  		REG_Y0 + 10,
  2491  		REG_Y0 + 11,
  2492  		REG_Y0 + 12,
  2493  		REG_Y0 + 13,
  2494  		REG_Y0 + 14,
  2495  		REG_Y0 + 15:
  2496  		return Yyr
  2497  
  2498  	case REG_CS:
  2499  		return Ycs
  2500  	case REG_SS:
  2501  		return Yss
  2502  	case REG_DS:
  2503  		return Yds
  2504  	case REG_ES:
  2505  		return Yes
  2506  	case REG_FS:
  2507  		return Yfs
  2508  	case REG_GS:
  2509  		return Ygs
  2510  	case REG_TLS:
  2511  		return Ytls
  2512  
  2513  	case REG_GDTR:
  2514  		return Ygdtr
  2515  	case REG_IDTR:
  2516  		return Yidtr
  2517  	case REG_LDTR:
  2518  		return Yldtr
  2519  	case REG_MSW:
  2520  		return Ymsw
  2521  	case REG_TASK:
  2522  		return Ytask
  2523  
  2524  	case REG_CR + 0:
  2525  		return Ycr0
  2526  	case REG_CR + 1:
  2527  		return Ycr1
  2528  	case REG_CR + 2:
  2529  		return Ycr2
  2530  	case REG_CR + 3:
  2531  		return Ycr3
  2532  	case REG_CR + 4:
  2533  		return Ycr4
  2534  	case REG_CR + 5:
  2535  		return Ycr5
  2536  	case REG_CR + 6:
  2537  		return Ycr6
  2538  	case REG_CR + 7:
  2539  		return Ycr7
  2540  	case REG_CR + 8:
  2541  		return Ycr8
  2542  
  2543  	case REG_DR + 0:
  2544  		return Ydr0
  2545  	case REG_DR + 1:
  2546  		return Ydr1
  2547  	case REG_DR + 2:
  2548  		return Ydr2
  2549  	case REG_DR + 3:
  2550  		return Ydr3
  2551  	case REG_DR + 4:
  2552  		return Ydr4
  2553  	case REG_DR + 5:
  2554  		return Ydr5
  2555  	case REG_DR + 6:
  2556  		return Ydr6
  2557  	case REG_DR + 7:
  2558  		return Ydr7
  2559  
  2560  	case REG_TR + 0:
  2561  		return Ytr0
  2562  	case REG_TR + 1:
  2563  		return Ytr1
  2564  	case REG_TR + 2:
  2565  		return Ytr2
  2566  	case REG_TR + 3:
  2567  		return Ytr3
  2568  	case REG_TR + 4:
  2569  		return Ytr4
  2570  	case REG_TR + 5:
  2571  		return Ytr5
  2572  	case REG_TR + 6:
  2573  		return Ytr6
  2574  	case REG_TR + 7:
  2575  		return Ytr7
  2576  	}
  2577  
  2578  	return Yxxx
  2579  }
  2580  
  2581  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2582  // and hold assembly state.
  2583  type AsmBuf struct {
  2584  	buf     [100]byte
  2585  	off     int
  2586  	rexflag int
  2587  	vexflag int
  2588  	rep     int
  2589  	repn    int
  2590  	lock    bool
  2591  }
  2592  
  2593  // Put1 appends one byte to the end of the buffer.
  2594  func (a *AsmBuf) Put1(x byte) {
  2595  	a.buf[a.off] = x
  2596  	a.off++
  2597  }
  2598  
  2599  // Put2 appends two bytes to the end of the buffer.
  2600  func (a *AsmBuf) Put2(x, y byte) {
  2601  	a.buf[a.off+0] = x
  2602  	a.buf[a.off+1] = y
  2603  	a.off += 2
  2604  }
  2605  
  2606  // Put3 appends three bytes to the end of the buffer.
  2607  func (a *AsmBuf) Put3(x, y, z byte) {
  2608  	a.buf[a.off+0] = x
  2609  	a.buf[a.off+1] = y
  2610  	a.buf[a.off+2] = z
  2611  	a.off += 3
  2612  }
  2613  
  2614  // Put4 appends four bytes to the end of the buffer.
  2615  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2616  	a.buf[a.off+0] = x
  2617  	a.buf[a.off+1] = y
  2618  	a.buf[a.off+2] = z
  2619  	a.buf[a.off+3] = w
  2620  	a.off += 4
  2621  }
  2622  
  2623  // PutInt16 writes v into the buffer using little-endian encoding.
  2624  func (a *AsmBuf) PutInt16(v int16) {
  2625  	a.buf[a.off+0] = byte(v)
  2626  	a.buf[a.off+1] = byte(v >> 8)
  2627  	a.off += 2
  2628  }
  2629  
  2630  // PutInt32 writes v into the buffer using little-endian encoding.
  2631  func (a *AsmBuf) PutInt32(v int32) {
  2632  	a.buf[a.off+0] = byte(v)
  2633  	a.buf[a.off+1] = byte(v >> 8)
  2634  	a.buf[a.off+2] = byte(v >> 16)
  2635  	a.buf[a.off+3] = byte(v >> 24)
  2636  	a.off += 4
  2637  }
  2638  
  2639  // PutInt64 writes v into the buffer using little-endian encoding.
  2640  func (a *AsmBuf) PutInt64(v int64) {
  2641  	a.buf[a.off+0] = byte(v)
  2642  	a.buf[a.off+1] = byte(v >> 8)
  2643  	a.buf[a.off+2] = byte(v >> 16)
  2644  	a.buf[a.off+3] = byte(v >> 24)
  2645  	a.buf[a.off+4] = byte(v >> 32)
  2646  	a.buf[a.off+5] = byte(v >> 40)
  2647  	a.buf[a.off+6] = byte(v >> 48)
  2648  	a.buf[a.off+7] = byte(v >> 56)
  2649  	a.off += 8
  2650  }
  2651  
  2652  // Put copies b into the buffer.
  2653  func (a *AsmBuf) Put(b []byte) {
  2654  	copy(a.buf[a.off:], b)
  2655  	a.off += len(b)
  2656  }
  2657  
  2658  // Insert inserts b at offset i.
  2659  func (a *AsmBuf) Insert(i int, b byte) {
  2660  	a.off++
  2661  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2662  	a.buf[i] = b
  2663  }
  2664  
  2665  // Last returns the byte at the end of the buffer.
  2666  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2667  
  2668  // Len returns the length of the buffer.
  2669  func (a *AsmBuf) Len() int { return a.off }
  2670  
  2671  // Bytes returns the contents of the buffer.
  2672  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2673  
  2674  // Reset empties the buffer.
  2675  func (a *AsmBuf) Reset() { a.off = 0 }
  2676  
  2677  // At returns the byte at offset i.
  2678  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2679  
  2680  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2681  	var i int
  2682  
  2683  	switch index {
  2684  	default:
  2685  		goto bad
  2686  
  2687  	case REG_NONE:
  2688  		i = 4 << 3
  2689  		goto bas
  2690  
  2691  	case REG_R8,
  2692  		REG_R9,
  2693  		REG_R10,
  2694  		REG_R11,
  2695  		REG_R12,
  2696  		REG_R13,
  2697  		REG_R14,
  2698  		REG_R15:
  2699  		if ctxt.Arch.Family == sys.I386 {
  2700  			goto bad
  2701  		}
  2702  		fallthrough
  2703  
  2704  	case REG_AX,
  2705  		REG_CX,
  2706  		REG_DX,
  2707  		REG_BX,
  2708  		REG_BP,
  2709  		REG_SI,
  2710  		REG_DI:
  2711  		i = reg[index] << 3
  2712  	}
  2713  
  2714  	switch scale {
  2715  	default:
  2716  		goto bad
  2717  
  2718  	case 1:
  2719  		break
  2720  
  2721  	case 2:
  2722  		i |= 1 << 6
  2723  
  2724  	case 4:
  2725  		i |= 2 << 6
  2726  
  2727  	case 8:
  2728  		i |= 3 << 6
  2729  	}
  2730  
  2731  bas:
  2732  	switch base {
  2733  	default:
  2734  		goto bad
  2735  
  2736  	case REG_NONE: /* must be mod=00 */
  2737  		i |= 5
  2738  
  2739  	case REG_R8,
  2740  		REG_R9,
  2741  		REG_R10,
  2742  		REG_R11,
  2743  		REG_R12,
  2744  		REG_R13,
  2745  		REG_R14,
  2746  		REG_R15:
  2747  		if ctxt.Arch.Family == sys.I386 {
  2748  			goto bad
  2749  		}
  2750  		fallthrough
  2751  
  2752  	case REG_AX,
  2753  		REG_CX,
  2754  		REG_DX,
  2755  		REG_BX,
  2756  		REG_SP,
  2757  		REG_BP,
  2758  		REG_SI,
  2759  		REG_DI:
  2760  		i |= reg[base]
  2761  	}
  2762  
  2763  	asmbuf.Put1(byte(i))
  2764  	return
  2765  
  2766  bad:
  2767  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2768  	asmbuf.Put1(0)
  2769  	return
  2770  }
  2771  
  2772  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2773  	var rel obj.Reloc
  2774  
  2775  	v := vaddr(ctxt, p, a, &rel)
  2776  	if rel.Siz != 0 {
  2777  		if rel.Siz != 4 {
  2778  			ctxt.Diag("bad reloc")
  2779  		}
  2780  		r := obj.Addrel(cursym)
  2781  		*r = rel
  2782  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2783  	}
  2784  
  2785  	asmbuf.PutInt32(int32(v))
  2786  }
  2787  
  2788  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2789  	if r != nil {
  2790  		*r = obj.Reloc{}
  2791  	}
  2792  
  2793  	switch a.Name {
  2794  	case obj.NAME_STATIC,
  2795  		obj.NAME_GOTREF,
  2796  		obj.NAME_EXTERN:
  2797  		s := a.Sym
  2798  		if r == nil {
  2799  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2800  			log.Fatalf("reloc")
  2801  		}
  2802  
  2803  		if a.Name == obj.NAME_GOTREF {
  2804  			r.Siz = 4
  2805  			r.Type = objabi.R_GOTPCREL
  2806  		} else if useAbs(ctxt, s) {
  2807  			r.Siz = 4
  2808  			r.Type = objabi.R_ADDR
  2809  		} else {
  2810  			r.Siz = 4
  2811  			r.Type = objabi.R_PCREL
  2812  		}
  2813  
  2814  		r.Off = -1 // caller must fill in
  2815  		r.Sym = s
  2816  		r.Add = a.Offset
  2817  
  2818  		return 0
  2819  	}
  2820  
  2821  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2822  		if r == nil {
  2823  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2824  			log.Fatalf("reloc")
  2825  		}
  2826  
  2827  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2828  			r.Type = objabi.R_TLS_LE
  2829  			r.Siz = 4
  2830  			r.Off = -1 // caller must fill in
  2831  			r.Add = a.Offset
  2832  		}
  2833  		return 0
  2834  	}
  2835  
  2836  	return a.Offset
  2837  }
  2838  
  2839  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2840  	var base int
  2841  	var rel obj.Reloc
  2842  
  2843  	rex &= 0x40 | Rxr
  2844  	switch {
  2845  	case int64(int32(a.Offset)) == a.Offset:
  2846  		// Offset fits in sign-extended 32 bits.
  2847  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2848  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2849  		// This is allowed for assembly that wants to use 32-bit hex
  2850  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2851  	default:
  2852  		ctxt.Diag("offset too large in %s", p)
  2853  	}
  2854  	v := int32(a.Offset)
  2855  	rel.Siz = 0
  2856  
  2857  	switch a.Type {
  2858  	case obj.TYPE_ADDR:
  2859  		if a.Name == obj.NAME_NONE {
  2860  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2861  		}
  2862  		if a.Index == REG_TLS {
  2863  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2864  		}
  2865  		goto bad
  2866  
  2867  	case obj.TYPE_REG:
  2868  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2869  			goto bad
  2870  		}
  2871  		if v != 0 {
  2872  			goto bad
  2873  		}
  2874  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2875  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2876  		return
  2877  	}
  2878  
  2879  	if a.Type != obj.TYPE_MEM {
  2880  		goto bad
  2881  	}
  2882  
  2883  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2884  		base := int(a.Reg)
  2885  		switch a.Name {
  2886  		case obj.NAME_EXTERN,
  2887  			obj.NAME_GOTREF,
  2888  			obj.NAME_STATIC:
  2889  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2890  				goto bad
  2891  			}
  2892  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2893  				// The base register has already been set. It holds the PC
  2894  				// of this instruction returned by a PC-reading thunk.
  2895  				// See obj6.go:rewriteToPcrel.
  2896  			} else {
  2897  				base = REG_NONE
  2898  			}
  2899  			v = int32(vaddr(ctxt, p, a, &rel))
  2900  
  2901  		case obj.NAME_AUTO,
  2902  			obj.NAME_PARAM:
  2903  			base = REG_SP
  2904  		}
  2905  
  2906  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2907  		if base == REG_NONE {
  2908  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2909  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2910  			goto putrelv
  2911  		}
  2912  
  2913  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2914  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2915  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2916  			return
  2917  		}
  2918  
  2919  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2920  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2921  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2922  			asmbuf.Put1(byte(v))
  2923  			return
  2924  		}
  2925  
  2926  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2927  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2928  		goto putrelv
  2929  	}
  2930  
  2931  	base = int(a.Reg)
  2932  	switch a.Name {
  2933  	case obj.NAME_STATIC,
  2934  		obj.NAME_GOTREF,
  2935  		obj.NAME_EXTERN:
  2936  		if a.Sym == nil {
  2937  			ctxt.Diag("bad addr: %v", p)
  2938  		}
  2939  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2940  			// The base register has already been set. It holds the PC
  2941  			// of this instruction returned by a PC-reading thunk.
  2942  			// See obj6.go:rewriteToPcrel.
  2943  		} else {
  2944  			base = REG_NONE
  2945  		}
  2946  		v = int32(vaddr(ctxt, p, a, &rel))
  2947  
  2948  	case obj.NAME_AUTO,
  2949  		obj.NAME_PARAM:
  2950  		base = REG_SP
  2951  	}
  2952  
  2953  	if base == REG_TLS {
  2954  		v = int32(vaddr(ctxt, p, a, &rel))
  2955  	}
  2956  
  2957  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  2958  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2959  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  2960  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2961  				ctxt.Diag("%v has offset against gotref", p)
  2962  			}
  2963  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2964  			goto putrelv
  2965  		}
  2966  
  2967  		// temporary
  2968  		asmbuf.Put2(
  2969  			byte(0<<6|4<<0|r<<3), // sib present
  2970  			0<<6|4<<3|5<<0,       // DS:d32
  2971  		)
  2972  		goto putrelv
  2973  	}
  2974  
  2975  	if base == REG_SP || base == REG_R12 {
  2976  		if v == 0 {
  2977  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2978  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2979  			return
  2980  		}
  2981  
  2982  		if v >= -128 && v < 128 {
  2983  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2984  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2985  			asmbuf.Put1(byte(v))
  2986  			return
  2987  		}
  2988  
  2989  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2990  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2991  		goto putrelv
  2992  	}
  2993  
  2994  	if REG_AX <= base && base <= REG_R15 {
  2995  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  2996  			rel = obj.Reloc{}
  2997  			rel.Type = objabi.R_TLS_LE
  2998  			rel.Siz = 4
  2999  			rel.Sym = nil
  3000  			rel.Add = int64(v)
  3001  			v = 0
  3002  		}
  3003  
  3004  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3005  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3006  			return
  3007  		}
  3008  
  3009  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3010  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3011  			return
  3012  		}
  3013  
  3014  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3015  		goto putrelv
  3016  	}
  3017  
  3018  	goto bad
  3019  
  3020  putrelv:
  3021  	if rel.Siz != 0 {
  3022  		if rel.Siz != 4 {
  3023  			ctxt.Diag("bad rel")
  3024  			goto bad
  3025  		}
  3026  
  3027  		r := obj.Addrel(cursym)
  3028  		*r = rel
  3029  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3030  	}
  3031  
  3032  	asmbuf.PutInt32(v)
  3033  	return
  3034  
  3035  bad:
  3036  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3037  	return
  3038  }
  3039  
  3040  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3041  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3042  }
  3043  
  3044  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3045  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3046  }
  3047  
  3048  func bytereg(a *obj.Addr, t *uint8) {
  3049  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3050  		a.Reg += REG_AL - REG_AX
  3051  		*t = 0
  3052  	}
  3053  }
  3054  
  3055  func unbytereg(a *obj.Addr, t *uint8) {
  3056  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3057  		a.Reg += REG_AX - REG_AL
  3058  		*t = 0
  3059  	}
  3060  }
  3061  
  3062  const (
  3063  	E = 0xff
  3064  )
  3065  
  3066  var ymovtab = []Movtab{
  3067  	/* push */
  3068  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3069  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3070  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3071  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3072  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3073  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3074  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3075  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3076  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3077  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3078  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3079  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3080  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3081  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3082  
  3083  	/* pop */
  3084  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3085  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3086  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3087  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3088  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3089  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3090  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3091  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3092  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3093  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3094  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3095  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3096  
  3097  	/* mov seg */
  3098  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3099  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3100  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3101  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3102  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3103  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3104  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3105  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3106  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3107  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3108  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3109  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3110  
  3111  	/* mov cr */
  3112  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3113  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3114  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3115  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3116  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3117  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3118  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3119  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3120  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3121  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3122  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3123  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3124  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3125  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3126  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3127  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3128  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3129  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3130  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3131  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3132  
  3133  	/* mov dr */
  3134  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3135  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3136  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3137  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3138  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3139  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3140  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3141  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3142  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3143  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3144  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3145  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3146  
  3147  	/* mov tr */
  3148  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3149  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3150  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3151  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3152  
  3153  	/* lgdt, sgdt, lidt, sidt */
  3154  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3155  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3156  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3157  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3158  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3159  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3160  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3161  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3162  
  3163  	/* lldt, sldt */
  3164  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3165  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3166  
  3167  	/* lmsw, smsw */
  3168  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3169  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3170  
  3171  	/* ltr, str */
  3172  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3173  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3174  
  3175  	/* load full pointer - unsupported
  3176  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3177  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3178  	*/
  3179  
  3180  	/* double shift */
  3181  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3182  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3183  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3184  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3185  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3186  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3187  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3188  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3189  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3190  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3191  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3192  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3193  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3194  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3195  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3196  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3197  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3198  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3199  
  3200  	/* load TLS base */
  3201  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3202  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3203  	{0, 0, 0, 0, 0, [4]uint8{}},
  3204  }
  3205  
  3206  func isax(a *obj.Addr) bool {
  3207  	switch a.Reg {
  3208  	case REG_AX, REG_AL, REG_AH:
  3209  		return true
  3210  	}
  3211  
  3212  	if a.Index == REG_AX {
  3213  		return true
  3214  	}
  3215  	return false
  3216  }
  3217  
  3218  func subreg(p *obj.Prog, from int, to int) {
  3219  	if false { /* debug['Q'] */
  3220  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3221  	}
  3222  
  3223  	if int(p.From.Reg) == from {
  3224  		p.From.Reg = int16(to)
  3225  		p.Ft = 0
  3226  	}
  3227  
  3228  	if int(p.To.Reg) == from {
  3229  		p.To.Reg = int16(to)
  3230  		p.Tt = 0
  3231  	}
  3232  
  3233  	if int(p.From.Index) == from {
  3234  		p.From.Index = int16(to)
  3235  		p.Ft = 0
  3236  	}
  3237  
  3238  	if int(p.To.Index) == from {
  3239  		p.To.Index = int16(to)
  3240  		p.Tt = 0
  3241  	}
  3242  
  3243  	if false { /* debug['Q'] */
  3244  		fmt.Printf("%v\n", p)
  3245  	}
  3246  }
  3247  
  3248  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3249  	switch op {
  3250  	case Pm, Pe, Pf2, Pf3:
  3251  		if osize != 1 {
  3252  			if op != Pm {
  3253  				asmbuf.Put1(byte(op))
  3254  			}
  3255  			asmbuf.Put1(Pm)
  3256  			z++
  3257  			op = int(o.op[z])
  3258  			break
  3259  		}
  3260  		fallthrough
  3261  
  3262  	default:
  3263  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3264  			asmbuf.Put1(Pm)
  3265  		}
  3266  	}
  3267  
  3268  	asmbuf.Put1(byte(op))
  3269  	return z
  3270  }
  3271  
  3272  var bpduff1 = []byte{
  3273  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3274  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3275  }
  3276  
  3277  var bpduff2 = []byte{
  3278  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3279  }
  3280  
  3281  // Emit VEX prefix and opcode byte.
  3282  // The three addresses are the r/m, vvvv, and reg fields.
  3283  // The reg and rm arguments appear in the same order as the
  3284  // arguments to asmand, which typically follows the call to asmvex.
  3285  // The final two arguments are the VEX prefix (see encoding above)
  3286  // and the opcode byte.
  3287  // For details about vex prefix see:
  3288  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3289  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3290  	asmbuf.vexflag = 1
  3291  	rexR := 0
  3292  	if r != nil {
  3293  		rexR = regrex[r.Reg] & Rxr
  3294  	}
  3295  	rexB := 0
  3296  	rexX := 0
  3297  	if rm != nil {
  3298  		rexB = regrex[rm.Reg] & Rxb
  3299  		rexX = regrex[rm.Index] & Rxx
  3300  	}
  3301  	vexM := (vex >> 3) & 0xF
  3302  	vexWLP := vex & 0x87
  3303  	vexV := byte(0)
  3304  	if v != nil {
  3305  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3306  	}
  3307  	vexV ^= 0xF
  3308  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3309  		// Can use 2-byte encoding.
  3310  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3311  	} else {
  3312  		// Must use 3-byte encoding.
  3313  		asmbuf.Put3(0xc4,
  3314  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3315  			vexV<<3|vexWLP,
  3316  		)
  3317  	}
  3318  	asmbuf.Put1(opcode)
  3319  }
  3320  
  3321  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3322  	o := opindex[p.As&obj.AMask]
  3323  
  3324  	if o == nil {
  3325  		ctxt.Diag("asmins: missing op %v", p)
  3326  		return
  3327  	}
  3328  
  3329  	pre := prefixof(ctxt, p, &p.From)
  3330  	if pre != 0 {
  3331  		asmbuf.Put1(byte(pre))
  3332  	}
  3333  	pre = prefixof(ctxt, p, &p.To)
  3334  	if pre != 0 {
  3335  		asmbuf.Put1(byte(pre))
  3336  	}
  3337  
  3338  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3339  	// which encodes as SHRQ $32(DX*0), AX.
  3340  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3341  	// Change encoding generated by assemblers and compilers and remove.
  3342  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3343  		p.From3 = new(obj.Addr)
  3344  		p.From3.Type = obj.TYPE_REG
  3345  		p.From3.Reg = p.From.Index
  3346  		p.From.Index = 0
  3347  	}
  3348  
  3349  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3350  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3351  	switch p.As {
  3352  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3353  		if p.From3Type() == obj.TYPE_NONE {
  3354  			p.From3 = new(obj.Addr)
  3355  			*p.From3 = p.From
  3356  			p.From = obj.Addr{}
  3357  			p.From.Type = obj.TYPE_CONST
  3358  			p.From.Offset = p.To.Offset
  3359  			p.To.Offset = 0
  3360  		}
  3361  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3362  		if p.From3Type() == obj.TYPE_NONE {
  3363  			p.From3 = new(obj.Addr)
  3364  			*p.From3 = p.To
  3365  			p.To = obj.Addr{}
  3366  			p.To.Type = obj.TYPE_CONST
  3367  			p.To.Offset = p.From3.Offset
  3368  			p.From3.Offset = 0
  3369  		}
  3370  	}
  3371  
  3372  	if p.Ft == 0 {
  3373  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3374  	}
  3375  	if p.Tt == 0 {
  3376  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3377  	}
  3378  
  3379  	ft := int(p.Ft) * Ymax
  3380  	f3t := Ynone * Ymax
  3381  	if p.From3 != nil {
  3382  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3383  	}
  3384  	tt := int(p.Tt) * Ymax
  3385  
  3386  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3387  	z := 0
  3388  	var a *obj.Addr
  3389  	var l int
  3390  	var op int
  3391  	var q *obj.Prog
  3392  	var r *obj.Reloc
  3393  	var rel obj.Reloc
  3394  	var v int64
  3395  	for i := range o.ytab {
  3396  		yt := &o.ytab[i]
  3397  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3398  			switch o.prefix {
  3399  			case Px1: /* first option valid only in 32-bit mode */
  3400  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3401  					z += int(yt.zoffset) + xo
  3402  					continue
  3403  				}
  3404  			case Pq: /* 16 bit escape and opcode escape */
  3405  				asmbuf.Put2(Pe, Pm)
  3406  
  3407  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3408  				asmbuf.rexflag |= Pw
  3409  				asmbuf.Put2(Pe, Pm)
  3410  
  3411  			case Pq4: /*  66 0F 38 */
  3412  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3413  
  3414  			case Pf2, /* xmm opcode escape */
  3415  				Pf3:
  3416  				asmbuf.Put2(o.prefix, Pm)
  3417  
  3418  			case Pef3:
  3419  				asmbuf.Put3(Pe, Pf3, Pm)
  3420  
  3421  			case Pfw: /* xmm opcode escape + REX.W */
  3422  				asmbuf.rexflag |= Pw
  3423  				asmbuf.Put2(Pf3, Pm)
  3424  
  3425  			case Pm: /* opcode escape */
  3426  				asmbuf.Put1(Pm)
  3427  
  3428  			case Pe: /* 16 bit escape */
  3429  				asmbuf.Put1(Pe)
  3430  
  3431  			case Pw: /* 64-bit escape */
  3432  				if ctxt.Arch.Family != sys.AMD64 {
  3433  					ctxt.Diag("asmins: illegal 64: %v", p)
  3434  				}
  3435  				asmbuf.rexflag |= Pw
  3436  
  3437  			case Pw8: /* 64-bit escape if z >= 8 */
  3438  				if z >= 8 {
  3439  					if ctxt.Arch.Family != sys.AMD64 {
  3440  						ctxt.Diag("asmins: illegal 64: %v", p)
  3441  					}
  3442  					asmbuf.rexflag |= Pw
  3443  				}
  3444  
  3445  			case Pb: /* botch */
  3446  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3447  					goto bad
  3448  				}
  3449  				// NOTE(rsc): This is probably safe to do always,
  3450  				// but when enabled it chooses different encodings
  3451  				// than the old cmd/internal/obj/i386 code did,
  3452  				// which breaks our "same bits out" checks.
  3453  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3454  				// in the original obj/i386, and it would encode
  3455  				// (using a valid, shorter form) as 3c 00 if we enabled
  3456  				// the call to bytereg here.
  3457  				if ctxt.Arch.Family == sys.AMD64 {
  3458  					bytereg(&p.From, &p.Ft)
  3459  					bytereg(&p.To, &p.Tt)
  3460  				}
  3461  
  3462  			case P32: /* 32 bit but illegal if 64-bit mode */
  3463  				if ctxt.Arch.Family == sys.AMD64 {
  3464  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3465  				}
  3466  
  3467  			case Py: /* 64-bit only, no prefix */
  3468  				if ctxt.Arch.Family != sys.AMD64 {
  3469  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3470  				}
  3471  
  3472  			case Py1: /* 64-bit only if z < 1, no prefix */
  3473  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3474  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3475  				}
  3476  
  3477  			case Py3: /* 64-bit only if z < 3, no prefix */
  3478  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3479  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3480  				}
  3481  			}
  3482  
  3483  			if z >= len(o.op) {
  3484  				log.Fatalf("asmins bad table %v", p)
  3485  			}
  3486  			op = int(o.op[z])
  3487  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3488  			if op == 0x0f && o.prefix != Pvex {
  3489  				asmbuf.Put1(byte(op))
  3490  				z++
  3491  				op = int(o.op[z])
  3492  			}
  3493  
  3494  			switch yt.zcase {
  3495  			default:
  3496  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3497  				return
  3498  
  3499  			case Zpseudo:
  3500  				break
  3501  
  3502  			case Zlit:
  3503  				for ; ; z++ {
  3504  					op = int(o.op[z])
  3505  					if op == 0 {
  3506  						break
  3507  					}
  3508  					asmbuf.Put1(byte(op))
  3509  				}
  3510  
  3511  			case Zlitm_r:
  3512  				for ; ; z++ {
  3513  					op = int(o.op[z])
  3514  					if op == 0 {
  3515  						break
  3516  					}
  3517  					asmbuf.Put1(byte(op))
  3518  				}
  3519  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3520  
  3521  			case Zmb_r:
  3522  				bytereg(&p.From, &p.Ft)
  3523  				fallthrough
  3524  
  3525  			case Zm_r:
  3526  				asmbuf.Put1(byte(op))
  3527  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3528  
  3529  			case Zm2_r:
  3530  				asmbuf.Put2(byte(op), o.op[z+1])
  3531  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3532  
  3533  			case Zm_r_xm:
  3534  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3535  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3536  
  3537  			case Zm_r_xm_nr:
  3538  				asmbuf.rexflag = 0
  3539  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3540  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3541  
  3542  			case Zm_r_i_xm:
  3543  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3544  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.From3)
  3545  				asmbuf.Put1(byte(p.To.Offset))
  3546  
  3547  			case Zibm_r, Zibr_m:
  3548  				for {
  3549  					tmp1 := z
  3550  					z++
  3551  					op = int(o.op[tmp1])
  3552  					if op == 0 {
  3553  						break
  3554  					}
  3555  					asmbuf.Put1(byte(op))
  3556  				}
  3557  				if yt.zcase == Zibr_m {
  3558  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.From3)
  3559  				} else {
  3560  					asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3561  				}
  3562  				asmbuf.Put1(byte(p.From.Offset))
  3563  
  3564  			case Zaut_r:
  3565  				asmbuf.Put1(0x8d) // leal
  3566  				if p.From.Type != obj.TYPE_ADDR {
  3567  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3568  				}
  3569  				p.From.Type = obj.TYPE_MEM
  3570  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3571  				p.From.Type = obj.TYPE_ADDR
  3572  
  3573  			case Zm_o:
  3574  				asmbuf.Put1(byte(op))
  3575  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3576  
  3577  			case Zr_m:
  3578  				asmbuf.Put1(byte(op))
  3579  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3580  
  3581  			case Zvex_rm_v_r:
  3582  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3583  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3584  
  3585  			case Zvex_i_r_v:
  3586  				asmbuf.asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
  3587  				regnum := byte(0x7)
  3588  				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
  3589  					regnum &= byte(p.From3.Reg - REG_X0)
  3590  				} else {
  3591  					regnum &= byte(p.From3.Reg - REG_Y0)
  3592  				}
  3593  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3594  				asmbuf.Put1(byte(p.From.Offset))
  3595  
  3596  			case Zvex_i_rm_v_r:
  3597  				asmbuf.asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3598  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3599  				asmbuf.Put1(byte(p.From3.Offset))
  3600  
  3601  			case Zvex_i_rm_r:
  3602  				asmbuf.asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
  3603  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3604  				asmbuf.Put1(byte(p.From.Offset))
  3605  
  3606  			case Zvex_v_rm_r:
  3607  				asmbuf.asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3608  				asmbuf.asmand(ctxt, cursym, p, p.From3, &p.To)
  3609  
  3610  			case Zvex_r_v_rm:
  3611  				asmbuf.asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3612  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3613  
  3614  			case Zr_m_xm:
  3615  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3616  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3617  
  3618  			case Zr_m_xm_nr:
  3619  				asmbuf.rexflag = 0
  3620  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3621  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3622  
  3623  			case Zo_m:
  3624  				asmbuf.Put1(byte(op))
  3625  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3626  
  3627  			case Zcallindreg:
  3628  				r = obj.Addrel(cursym)
  3629  				r.Off = int32(p.Pc)
  3630  				r.Type = objabi.R_CALLIND
  3631  				r.Siz = 0
  3632  				fallthrough
  3633  
  3634  			case Zo_m64:
  3635  				asmbuf.Put1(byte(op))
  3636  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3637  
  3638  			case Zm_ibo:
  3639  				asmbuf.Put1(byte(op))
  3640  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3641  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3642  
  3643  			case Zibo_m:
  3644  				asmbuf.Put1(byte(op))
  3645  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3646  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3647  
  3648  			case Zibo_m_xm:
  3649  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3650  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3651  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3652  
  3653  			case Z_ib, Zib_:
  3654  				if yt.zcase == Zib_ {
  3655  					a = &p.From
  3656  				} else {
  3657  					a = &p.To
  3658  				}
  3659  				asmbuf.Put1(byte(op))
  3660  				if p.As == AXABORT {
  3661  					asmbuf.Put1(o.op[z+1])
  3662  				}
  3663  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3664  
  3665  			case Zib_rp:
  3666  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3667  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3668  
  3669  			case Zil_rp:
  3670  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3671  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3672  				if o.prefix == Pe {
  3673  					v = vaddr(ctxt, p, &p.From, nil)
  3674  					asmbuf.PutInt16(int16(v))
  3675  				} else {
  3676  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3677  				}
  3678  
  3679  			case Zo_iw:
  3680  				asmbuf.Put1(byte(op))
  3681  				if p.From.Type != obj.TYPE_NONE {
  3682  					v = vaddr(ctxt, p, &p.From, nil)
  3683  					asmbuf.PutInt16(int16(v))
  3684  				}
  3685  
  3686  			case Ziq_rp:
  3687  				v = vaddr(ctxt, p, &p.From, &rel)
  3688  				l = int(v >> 32)
  3689  				if l == 0 && rel.Siz != 8 {
  3690  					//p->mark |= 0100;
  3691  					//print("zero: %llux %v\n", v, p);
  3692  					asmbuf.rexflag &^= (0x40 | Rxw)
  3693  
  3694  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3695  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3696  					if rel.Type != 0 {
  3697  						r = obj.Addrel(cursym)
  3698  						*r = rel
  3699  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3700  					}
  3701  
  3702  					asmbuf.PutInt32(int32(v))
  3703  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3704  
  3705  					//p->mark |= 0100;
  3706  					//print("sign: %llux %v\n", v, p);
  3707  					asmbuf.Put1(0xc7)
  3708  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3709  
  3710  					asmbuf.PutInt32(int32(v)) // need all 8
  3711  				} else {
  3712  					//print("all: %llux %v\n", v, p);
  3713  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3714  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3715  					if rel.Type != 0 {
  3716  						r = obj.Addrel(cursym)
  3717  						*r = rel
  3718  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3719  					}
  3720  
  3721  					asmbuf.PutInt64(v)
  3722  				}
  3723  
  3724  			case Zib_rr:
  3725  				asmbuf.Put1(byte(op))
  3726  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3727  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3728  
  3729  			case Z_il, Zil_:
  3730  				if yt.zcase == Zil_ {
  3731  					a = &p.From
  3732  				} else {
  3733  					a = &p.To
  3734  				}
  3735  				asmbuf.Put1(byte(op))
  3736  				if o.prefix == Pe {
  3737  					v = vaddr(ctxt, p, a, nil)
  3738  					asmbuf.PutInt16(int16(v))
  3739  				} else {
  3740  					asmbuf.relput4(ctxt, cursym, p, a)
  3741  				}
  3742  
  3743  			case Zm_ilo, Zilo_m:
  3744  				asmbuf.Put1(byte(op))
  3745  				if yt.zcase == Zilo_m {
  3746  					a = &p.From
  3747  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3748  				} else {
  3749  					a = &p.To
  3750  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3751  				}
  3752  
  3753  				if o.prefix == Pe {
  3754  					v = vaddr(ctxt, p, a, nil)
  3755  					asmbuf.PutInt16(int16(v))
  3756  				} else {
  3757  					asmbuf.relput4(ctxt, cursym, p, a)
  3758  				}
  3759  
  3760  			case Zil_rr:
  3761  				asmbuf.Put1(byte(op))
  3762  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3763  				if o.prefix == Pe {
  3764  					v = vaddr(ctxt, p, &p.From, nil)
  3765  					asmbuf.PutInt16(int16(v))
  3766  				} else {
  3767  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3768  				}
  3769  
  3770  			case Z_rp:
  3771  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3772  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3773  
  3774  			case Zrp_:
  3775  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3776  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3777  
  3778  			case Zclr:
  3779  				asmbuf.rexflag &^= Pw
  3780  				asmbuf.Put1(byte(op))
  3781  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3782  
  3783  			case Zcallcon, Zjmpcon:
  3784  				if yt.zcase == Zcallcon {
  3785  					asmbuf.Put1(byte(op))
  3786  				} else {
  3787  					asmbuf.Put1(o.op[z+1])
  3788  				}
  3789  				r = obj.Addrel(cursym)
  3790  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3791  				r.Type = objabi.R_PCREL
  3792  				r.Siz = 4
  3793  				r.Add = p.To.Offset
  3794  				asmbuf.PutInt32(0)
  3795  
  3796  			case Zcallind:
  3797  				asmbuf.Put2(byte(op), o.op[z+1])
  3798  				r = obj.Addrel(cursym)
  3799  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3800  				if ctxt.Arch.Family == sys.AMD64 {
  3801  					r.Type = objabi.R_PCREL
  3802  				} else {
  3803  					r.Type = objabi.R_ADDR
  3804  				}
  3805  				r.Siz = 4
  3806  				r.Add = p.To.Offset
  3807  				r.Sym = p.To.Sym
  3808  				asmbuf.PutInt32(0)
  3809  
  3810  			case Zcall, Zcallduff:
  3811  				if p.To.Sym == nil {
  3812  					ctxt.Diag("call without target")
  3813  					log.Fatalf("bad code")
  3814  				}
  3815  
  3816  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3817  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3818  				}
  3819  
  3820  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3821  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3822  					// (the call jumps into the middle of the function).
  3823  					// This makes it possible to see call sites for duffcopy/duffzero in
  3824  					// BP-based profiling tools like Linux perf (which is the
  3825  					// whole point of obj.Framepointer_enabled).
  3826  					// MOVQ BP, -16(SP)
  3827  					// LEAQ -16(SP), BP
  3828  					asmbuf.Put(bpduff1)
  3829  				}
  3830  				asmbuf.Put1(byte(op))
  3831  				r = obj.Addrel(cursym)
  3832  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3833  				r.Sym = p.To.Sym
  3834  				r.Add = p.To.Offset
  3835  				r.Type = objabi.R_CALL
  3836  				r.Siz = 4
  3837  				asmbuf.PutInt32(0)
  3838  
  3839  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3840  					// Pop BP pushed above.
  3841  					// MOVQ 0(BP), BP
  3842  					asmbuf.Put(bpduff2)
  3843  				}
  3844  
  3845  			// TODO: jump across functions needs reloc
  3846  			case Zbr, Zjmp, Zloop:
  3847  				if p.As == AXBEGIN {
  3848  					asmbuf.Put1(byte(op))
  3849  				}
  3850  				if p.To.Sym != nil {
  3851  					if yt.zcase != Zjmp {
  3852  						ctxt.Diag("branch to ATEXT")
  3853  						log.Fatalf("bad code")
  3854  					}
  3855  
  3856  					asmbuf.Put1(o.op[z+1])
  3857  					r = obj.Addrel(cursym)
  3858  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3859  					r.Sym = p.To.Sym
  3860  					r.Type = objabi.R_PCREL
  3861  					r.Siz = 4
  3862  					asmbuf.PutInt32(0)
  3863  					break
  3864  				}
  3865  
  3866  				// Assumes q is in this function.
  3867  				// TODO: Check in input, preserve in brchain.
  3868  
  3869  				// Fill in backward jump now.
  3870  				q = p.Pcond
  3871  
  3872  				if q == nil {
  3873  					ctxt.Diag("jmp/branch/loop without target")
  3874  					log.Fatalf("bad code")
  3875  				}
  3876  
  3877  				if p.Back&1 != 0 {
  3878  					v = q.Pc - (p.Pc + 2)
  3879  					if v >= -128 && p.As != AXBEGIN {
  3880  						if p.As == AJCXZL {
  3881  							asmbuf.Put1(0x67)
  3882  						}
  3883  						asmbuf.Put2(byte(op), byte(v))
  3884  					} else if yt.zcase == Zloop {
  3885  						ctxt.Diag("loop too far: %v", p)
  3886  					} else {
  3887  						v -= 5 - 2
  3888  						if p.As == AXBEGIN {
  3889  							v--
  3890  						}
  3891  						if yt.zcase == Zbr {
  3892  							asmbuf.Put1(0x0f)
  3893  							v--
  3894  						}
  3895  
  3896  						asmbuf.Put1(o.op[z+1])
  3897  						asmbuf.PutInt32(int32(v))
  3898  					}
  3899  
  3900  					break
  3901  				}
  3902  
  3903  				// Annotate target; will fill in later.
  3904  				p.Forwd = q.Rel
  3905  
  3906  				q.Rel = p
  3907  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3908  					if p.As == AJCXZL {
  3909  						asmbuf.Put1(0x67)
  3910  					}
  3911  					asmbuf.Put2(byte(op), 0)
  3912  				} else if yt.zcase == Zloop {
  3913  					ctxt.Diag("loop too far: %v", p)
  3914  				} else {
  3915  					if yt.zcase == Zbr {
  3916  						asmbuf.Put1(0x0f)
  3917  					}
  3918  					asmbuf.Put1(o.op[z+1])
  3919  					asmbuf.PutInt32(0)
  3920  				}
  3921  
  3922  				break
  3923  
  3924  			/*
  3925  				v = q->pc - p->pc - 2;
  3926  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3927  					*ctxt->andptr++ = op;
  3928  					*ctxt->andptr++ = v;
  3929  				} else {
  3930  					v -= 5-2;
  3931  					if(yt.zcase == Zbr) {
  3932  						*ctxt->andptr++ = 0x0f;
  3933  						v--;
  3934  					}
  3935  					*ctxt->andptr++ = o->op[z+1];
  3936  					*ctxt->andptr++ = v;
  3937  					*ctxt->andptr++ = v>>8;
  3938  					*ctxt->andptr++ = v>>16;
  3939  					*ctxt->andptr++ = v>>24;
  3940  				}
  3941  			*/
  3942  
  3943  			case Zbyte:
  3944  				v = vaddr(ctxt, p, &p.From, &rel)
  3945  				if rel.Siz != 0 {
  3946  					rel.Siz = uint8(op)
  3947  					r = obj.Addrel(cursym)
  3948  					*r = rel
  3949  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3950  				}
  3951  
  3952  				asmbuf.Put1(byte(v))
  3953  				if op > 1 {
  3954  					asmbuf.Put1(byte(v >> 8))
  3955  					if op > 2 {
  3956  						asmbuf.PutInt16(int16(v >> 16))
  3957  						if op > 4 {
  3958  							asmbuf.PutInt32(int32(v >> 32))
  3959  						}
  3960  					}
  3961  				}
  3962  			}
  3963  
  3964  			return
  3965  		}
  3966  		z += int(yt.zoffset) + xo
  3967  	}
  3968  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3969  		var pp obj.Prog
  3970  		var t []byte
  3971  		if p.As == mo[0].as {
  3972  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3973  				t = mo[0].op[:]
  3974  				switch mo[0].code {
  3975  				default:
  3976  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3977  
  3978  				case 0: /* lit */
  3979  					for z = 0; t[z] != E; z++ {
  3980  						asmbuf.Put1(t[z])
  3981  					}
  3982  
  3983  				case 1: /* r,m */
  3984  					asmbuf.Put1(t[0])
  3985  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  3986  
  3987  				case 2: /* m,r */
  3988  					asmbuf.Put1(t[0])
  3989  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  3990  
  3991  				case 3: /* r,m - 2op */
  3992  					asmbuf.Put2(t[0], t[1])
  3993  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  3994  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3995  
  3996  				case 4: /* m,r - 2op */
  3997  					asmbuf.Put2(t[0], t[1])
  3998  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  3999  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4000  
  4001  				case 5: /* load full pointer, trash heap */
  4002  					if t[0] != 0 {
  4003  						asmbuf.Put1(t[0])
  4004  					}
  4005  					switch p.To.Index {
  4006  					default:
  4007  						goto bad
  4008  
  4009  					case REG_DS:
  4010  						asmbuf.Put1(0xc5)
  4011  
  4012  					case REG_SS:
  4013  						asmbuf.Put2(0x0f, 0xb2)
  4014  
  4015  					case REG_ES:
  4016  						asmbuf.Put1(0xc4)
  4017  
  4018  					case REG_FS:
  4019  						asmbuf.Put2(0x0f, 0xb4)
  4020  
  4021  					case REG_GS:
  4022  						asmbuf.Put2(0x0f, 0xb5)
  4023  					}
  4024  
  4025  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4026  
  4027  				case 6: /* double shift */
  4028  					if t[0] == Pw {
  4029  						if ctxt.Arch.Family != sys.AMD64 {
  4030  							ctxt.Diag("asmins: illegal 64: %v", p)
  4031  						}
  4032  						asmbuf.rexflag |= Pw
  4033  						t = t[1:]
  4034  					} else if t[0] == Pe {
  4035  						asmbuf.Put1(Pe)
  4036  						t = t[1:]
  4037  					}
  4038  
  4039  					switch p.From.Type {
  4040  					default:
  4041  						goto bad
  4042  
  4043  					case obj.TYPE_CONST:
  4044  						asmbuf.Put2(0x0f, t[0])
  4045  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4046  						asmbuf.Put1(byte(p.From.Offset))
  4047  
  4048  					case obj.TYPE_REG:
  4049  						switch p.From.Reg {
  4050  						default:
  4051  							goto bad
  4052  
  4053  						case REG_CL, REG_CX:
  4054  							asmbuf.Put2(0x0f, t[1])
  4055  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4056  						}
  4057  					}
  4058  
  4059  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4060  				// where you load the TLS base register into a register and then index off that
  4061  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4062  				// are handled in prefixof above and should not be listed here.
  4063  				case 7: /* mov tls, r */
  4064  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4065  						ctxt.Diag("invalid load of TLS: %v", p)
  4066  					}
  4067  
  4068  					if ctxt.Arch.Family == sys.I386 {
  4069  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4070  						// where you load the TLS base register into a register and then index off that
  4071  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4072  						// are handled in prefixof above and should not be listed here.
  4073  						switch ctxt.Headtype {
  4074  						default:
  4075  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4076  
  4077  						case objabi.Hlinux,
  4078  							objabi.Hnacl:
  4079  							if ctxt.Flag_shared {
  4080  								// Note that this is not generating the same insns as the other cases.
  4081  								//     MOV TLS, dst
  4082  								// becomes
  4083  								//     call __x86.get_pc_thunk.dst
  4084  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4085  								// which is encoded as
  4086  								//     call __x86.get_pc_thunk.dst
  4087  								//     movq 0(dst), dst
  4088  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4089  								// is g, which we can't check here, but will when we assemble the second
  4090  								// instruction.
  4091  								dst := p.To.Reg
  4092  								asmbuf.Put1(0xe8)
  4093  								r = obj.Addrel(cursym)
  4094  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4095  								r.Type = objabi.R_CALL
  4096  								r.Siz = 4
  4097  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4098  								asmbuf.PutInt32(0)
  4099  
  4100  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4101  								r = obj.Addrel(cursym)
  4102  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4103  								r.Type = objabi.R_TLS_IE
  4104  								r.Siz = 4
  4105  								r.Add = 2
  4106  								asmbuf.PutInt32(0)
  4107  							} else {
  4108  								// ELF TLS base is 0(GS).
  4109  								pp.From = p.From
  4110  
  4111  								pp.From.Type = obj.TYPE_MEM
  4112  								pp.From.Reg = REG_GS
  4113  								pp.From.Offset = 0
  4114  								pp.From.Index = REG_NONE
  4115  								pp.From.Scale = 0
  4116  								asmbuf.Put2(0x65, // GS
  4117  									0x8B)
  4118  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4119  							}
  4120  						case objabi.Hplan9:
  4121  							pp.From = obj.Addr{}
  4122  							pp.From.Type = obj.TYPE_MEM
  4123  							pp.From.Name = obj.NAME_EXTERN
  4124  							pp.From.Sym = plan9privates
  4125  							pp.From.Offset = 0
  4126  							pp.From.Index = REG_NONE
  4127  							asmbuf.Put1(0x8B)
  4128  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4129  
  4130  						case objabi.Hwindows:
  4131  							// Windows TLS base is always 0x14(FS).
  4132  							pp.From = p.From
  4133  
  4134  							pp.From.Type = obj.TYPE_MEM
  4135  							pp.From.Reg = REG_FS
  4136  							pp.From.Offset = 0x14
  4137  							pp.From.Index = REG_NONE
  4138  							pp.From.Scale = 0
  4139  							asmbuf.Put2(0x64, // FS
  4140  								0x8B)
  4141  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4142  						}
  4143  						break
  4144  					}
  4145  
  4146  					switch ctxt.Headtype {
  4147  					default:
  4148  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4149  
  4150  					case objabi.Hlinux:
  4151  						if !ctxt.Flag_shared {
  4152  							log.Fatalf("unknown TLS base location for linux without -shared")
  4153  						}
  4154  						// Note that this is not generating the same insn as the other cases.
  4155  						//     MOV TLS, R_to
  4156  						// becomes
  4157  						//     movq g@gottpoff(%rip), R_to
  4158  						// which is encoded as
  4159  						//     movq 0(%rip), R_to
  4160  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4161  						// is g, which we can't check here, but will when we assemble the second
  4162  						// instruction.
  4163  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4164  
  4165  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4166  						r = obj.Addrel(cursym)
  4167  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4168  						r.Type = objabi.R_TLS_IE
  4169  						r.Siz = 4
  4170  						r.Add = -4
  4171  						asmbuf.PutInt32(0)
  4172  
  4173  					case objabi.Hplan9:
  4174  						pp.From = obj.Addr{}
  4175  						pp.From.Type = obj.TYPE_MEM
  4176  						pp.From.Name = obj.NAME_EXTERN
  4177  						pp.From.Sym = plan9privates
  4178  						pp.From.Offset = 0
  4179  						pp.From.Index = REG_NONE
  4180  						asmbuf.rexflag |= Pw
  4181  						asmbuf.Put1(0x8B)
  4182  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4183  
  4184  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4185  						// TLS base is 0(FS).
  4186  						pp.From = p.From
  4187  
  4188  						pp.From.Type = obj.TYPE_MEM
  4189  						pp.From.Name = obj.NAME_NONE
  4190  						pp.From.Reg = REG_NONE
  4191  						pp.From.Offset = 0
  4192  						pp.From.Index = REG_NONE
  4193  						pp.From.Scale = 0
  4194  						asmbuf.rexflag |= Pw
  4195  						asmbuf.Put2(0x64, // FS
  4196  							0x8B)
  4197  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4198  
  4199  					case objabi.Hwindows:
  4200  						// Windows TLS base is always 0x28(GS).
  4201  						pp.From = p.From
  4202  
  4203  						pp.From.Type = obj.TYPE_MEM
  4204  						pp.From.Name = obj.NAME_NONE
  4205  						pp.From.Reg = REG_GS
  4206  						pp.From.Offset = 0x28
  4207  						pp.From.Index = REG_NONE
  4208  						pp.From.Scale = 0
  4209  						asmbuf.rexflag |= Pw
  4210  						asmbuf.Put2(0x65, // GS
  4211  							0x8B)
  4212  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4213  					}
  4214  				}
  4215  				return
  4216  			}
  4217  		}
  4218  	}
  4219  	goto bad
  4220  
  4221  bad:
  4222  	if ctxt.Arch.Family != sys.AMD64 {
  4223  		/*
  4224  		 * here, the assembly has failed.
  4225  		 * if its a byte instruction that has
  4226  		 * unaddressable registers, try to
  4227  		 * exchange registers and reissue the
  4228  		 * instruction with the operands renamed.
  4229  		 */
  4230  		pp := *p
  4231  
  4232  		unbytereg(&pp.From, &pp.Ft)
  4233  		unbytereg(&pp.To, &pp.Tt)
  4234  
  4235  		z := int(p.From.Reg)
  4236  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4237  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4238  			// For now, different to keep bit-for-bit compatibility.
  4239  			if ctxt.Arch.Family == sys.I386 {
  4240  				breg := byteswapreg(ctxt, &p.To)
  4241  				if breg != REG_AX {
  4242  					asmbuf.Put1(0x87) // xchg lhs,bx
  4243  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4244  					subreg(&pp, z, breg)
  4245  					asmbuf.doasm(ctxt, cursym, &pp)
  4246  					asmbuf.Put1(0x87) // xchg lhs,bx
  4247  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4248  				} else {
  4249  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4250  					subreg(&pp, z, REG_AX)
  4251  					asmbuf.doasm(ctxt, cursym, &pp)
  4252  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4253  				}
  4254  				return
  4255  			}
  4256  
  4257  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4258  				// We certainly don't want to exchange
  4259  				// with AX if the op is MUL or DIV.
  4260  				asmbuf.Put1(0x87) // xchg lhs,bx
  4261  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4262  				subreg(&pp, z, REG_BX)
  4263  				asmbuf.doasm(ctxt, cursym, &pp)
  4264  				asmbuf.Put1(0x87) // xchg lhs,bx
  4265  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4266  			} else {
  4267  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4268  				subreg(&pp, z, REG_AX)
  4269  				asmbuf.doasm(ctxt, cursym, &pp)
  4270  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4271  			}
  4272  			return
  4273  		}
  4274  
  4275  		z = int(p.To.Reg)
  4276  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4277  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4278  			// For now, different to keep bit-for-bit compatibility.
  4279  			if ctxt.Arch.Family == sys.I386 {
  4280  				breg := byteswapreg(ctxt, &p.From)
  4281  				if breg != REG_AX {
  4282  					asmbuf.Put1(0x87) //xchg rhs,bx
  4283  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4284  					subreg(&pp, z, breg)
  4285  					asmbuf.doasm(ctxt, cursym, &pp)
  4286  					asmbuf.Put1(0x87) // xchg rhs,bx
  4287  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4288  				} else {
  4289  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4290  					subreg(&pp, z, REG_AX)
  4291  					asmbuf.doasm(ctxt, cursym, &pp)
  4292  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4293  				}
  4294  				return
  4295  			}
  4296  
  4297  			if isax(&p.From) {
  4298  				asmbuf.Put1(0x87) // xchg rhs,bx
  4299  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4300  				subreg(&pp, z, REG_BX)
  4301  				asmbuf.doasm(ctxt, cursym, &pp)
  4302  				asmbuf.Put1(0x87) // xchg rhs,bx
  4303  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4304  			} else {
  4305  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4306  				subreg(&pp, z, REG_AX)
  4307  				asmbuf.doasm(ctxt, cursym, &pp)
  4308  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4309  			}
  4310  			return
  4311  		}
  4312  	}
  4313  
  4314  	ctxt.Diag("invalid instruction: %v", p)
  4315  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4316  	return
  4317  }
  4318  
  4319  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4320  // which is not referenced in a.
  4321  // If a is empty, it returns BX to account for MULB-like instructions
  4322  // that might use DX and AX.
  4323  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4324  	cana, canb, canc, cand := true, true, true, true
  4325  	if a.Type == obj.TYPE_NONE {
  4326  		cana, cand = false, false
  4327  	}
  4328  
  4329  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4330  		switch a.Reg {
  4331  		case REG_NONE:
  4332  			cana, cand = false, false
  4333  		case REG_AX, REG_AL, REG_AH:
  4334  			cana = false
  4335  		case REG_BX, REG_BL, REG_BH:
  4336  			canb = false
  4337  		case REG_CX, REG_CL, REG_CH:
  4338  			canc = false
  4339  		case REG_DX, REG_DL, REG_DH:
  4340  			cand = false
  4341  		}
  4342  	}
  4343  
  4344  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4345  		switch a.Index {
  4346  		case REG_AX:
  4347  			cana = false
  4348  		case REG_BX:
  4349  			canb = false
  4350  		case REG_CX:
  4351  			canc = false
  4352  		case REG_DX:
  4353  			cand = false
  4354  		}
  4355  	}
  4356  
  4357  	switch {
  4358  	case cana:
  4359  		return REG_AX
  4360  	case canb:
  4361  		return REG_BX
  4362  	case canc:
  4363  		return REG_CX
  4364  	case cand:
  4365  		return REG_DX
  4366  	default:
  4367  		ctxt.Diag("impossible byte register")
  4368  		log.Fatalf("bad code")
  4369  		return 0
  4370  	}
  4371  }
  4372  
  4373  func isbadbyte(a *obj.Addr) bool {
  4374  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4375  }
  4376  
  4377  var naclret = []uint8{
  4378  	0x5e, // POPL SI
  4379  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4380  	0x83,
  4381  	0xe6,
  4382  	0xe0, // ANDL $~31, SI
  4383  	0x4c,
  4384  	0x01,
  4385  	0xfe, // ADDQ R15, SI
  4386  	0xff,
  4387  	0xe6, // JMP SI
  4388  }
  4389  
  4390  var naclret8 = []uint8{
  4391  	0x5d, // POPL BP
  4392  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4393  	0x83,
  4394  	0xe5,
  4395  	0xe0, // ANDL $~31, BP
  4396  	0xff,
  4397  	0xe5, // JMP BP
  4398  }
  4399  
  4400  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4401  
  4402  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4403  
  4404  var naclmovs = []uint8{
  4405  	0x89,
  4406  	0xf6, // MOVL SI, SI
  4407  	0x49,
  4408  	0x8d,
  4409  	0x34,
  4410  	0x37, // LEAQ (R15)(SI*1), SI
  4411  	0x89,
  4412  	0xff, // MOVL DI, DI
  4413  	0x49,
  4414  	0x8d,
  4415  	0x3c,
  4416  	0x3f, // LEAQ (R15)(DI*1), DI
  4417  }
  4418  
  4419  var naclstos = []uint8{
  4420  	0x89,
  4421  	0xff, // MOVL DI, DI
  4422  	0x49,
  4423  	0x8d,
  4424  	0x3c,
  4425  	0x3f, // LEAQ (R15)(DI*1), DI
  4426  }
  4427  
  4428  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4429  	if reg >= REG_R8 {
  4430  		asmbuf.Put1(0x45)
  4431  	}
  4432  	reg = (reg - REG_AX) & 7
  4433  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4434  }
  4435  
  4436  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4437  	asmbuf.Reset()
  4438  
  4439  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4440  		switch p.As {
  4441  		case obj.ARET:
  4442  			asmbuf.Put(naclret8)
  4443  			return
  4444  
  4445  		case obj.ACALL,
  4446  			obj.AJMP:
  4447  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4448  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4449  			}
  4450  
  4451  		case AINT:
  4452  			asmbuf.Put1(0xf4)
  4453  			return
  4454  		}
  4455  	}
  4456  
  4457  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4458  		if p.As == AREP {
  4459  			asmbuf.rep++
  4460  			return
  4461  		}
  4462  
  4463  		if p.As == AREPN {
  4464  			asmbuf.repn++
  4465  			return
  4466  		}
  4467  
  4468  		if p.As == ALOCK {
  4469  			asmbuf.lock = true
  4470  			return
  4471  		}
  4472  
  4473  		if p.As != ALEAQ && p.As != ALEAL {
  4474  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4475  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4476  			}
  4477  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4478  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4479  			}
  4480  		}
  4481  
  4482  		switch p.As {
  4483  		case obj.ARET:
  4484  			asmbuf.Put(naclret)
  4485  			return
  4486  
  4487  		case obj.ACALL,
  4488  			obj.AJMP:
  4489  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4490  				// ANDL $~31, reg
  4491  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4492  				// ADDQ R15, reg
  4493  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4494  			}
  4495  
  4496  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4497  				// ANDL $~31, reg
  4498  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4499  				// ADDQ R15, reg
  4500  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4501  			}
  4502  
  4503  		case AINT:
  4504  			asmbuf.Put1(0xf4)
  4505  			return
  4506  
  4507  		case ASCASB,
  4508  			ASCASW,
  4509  			ASCASL,
  4510  			ASCASQ,
  4511  			ASTOSB,
  4512  			ASTOSW,
  4513  			ASTOSL,
  4514  			ASTOSQ:
  4515  			asmbuf.Put(naclstos)
  4516  
  4517  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4518  			asmbuf.Put(naclmovs)
  4519  		}
  4520  
  4521  		if asmbuf.rep != 0 {
  4522  			asmbuf.Put1(0xf3)
  4523  			asmbuf.rep = 0
  4524  		}
  4525  
  4526  		if asmbuf.repn != 0 {
  4527  			asmbuf.Put1(0xf2)
  4528  			asmbuf.repn = 0
  4529  		}
  4530  
  4531  		if asmbuf.lock {
  4532  			asmbuf.Put1(0xf0)
  4533  			asmbuf.lock = false
  4534  		}
  4535  	}
  4536  
  4537  	asmbuf.rexflag = 0
  4538  	asmbuf.vexflag = 0
  4539  	mark := asmbuf.Len()
  4540  	asmbuf.doasm(ctxt, cursym, p)
  4541  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4542  		/*
  4543  		 * as befits the whole approach of the architecture,
  4544  		 * the rex prefix must appear before the first opcode byte
  4545  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4546  		 * before the 0f opcode escape!), or it might be ignored.
  4547  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4548  		 */
  4549  		if ctxt.Arch.Family != sys.AMD64 {
  4550  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4551  		}
  4552  		n := asmbuf.Len()
  4553  		var np int
  4554  		for np = mark; np < n; np++ {
  4555  			c := asmbuf.At(np)
  4556  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4557  				break
  4558  			}
  4559  		}
  4560  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4561  	}
  4562  
  4563  	n := asmbuf.Len()
  4564  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4565  		r := &cursym.R[i]
  4566  		if int64(r.Off) < p.Pc {
  4567  			break
  4568  		}
  4569  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4570  			r.Off++
  4571  		}
  4572  		if r.Type == objabi.R_PCREL {
  4573  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4574  				// PC-relative addressing is relative to the end of the instruction,
  4575  				// but the relocations applied by the linker are relative to the end
  4576  				// of the relocation. Because immediate instruction
  4577  				// arguments can follow the PC-relative memory reference in the
  4578  				// instruction encoding, the two may not coincide. In this case,
  4579  				// adjust addend so that linker can keep relocating relative to the
  4580  				// end of the relocation.
  4581  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4582  			} else if ctxt.Arch.Family == sys.I386 {
  4583  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4584  				// assumes that the previous instruction loaded the PC of the end
  4585  				// of that instruction into CX, so the adjustment is relative to
  4586  				// that.
  4587  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4588  			}
  4589  		}
  4590  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4591  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4592  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4593  		}
  4594  
  4595  	}
  4596  
  4597  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4598  		switch p.To.Reg {
  4599  		case REG_SP:
  4600  			asmbuf.Put(naclspfix)
  4601  		case REG_BP:
  4602  			asmbuf.Put(naclbpfix)
  4603  		}
  4604  	}
  4605  }