github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type Movtab struct {
    76  	as   obj.As
    77  	ft   uint8
    78  	f3t  uint8
    79  	tt   uint8
    80  	code uint8
    81  	op   [4]uint8
    82  }
    83  
    84  const (
    85  	Yxxx = iota
    86  	Ynone
    87  	Yi0 // $0
    88  	Yi1 // $1
    89  	Yu2 // $x, x fits in uint2
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Yyr
   152  	Yym
   153  	Ytls
   154  	Ytextsize
   155  	Yindir
   156  	Ymax
   157  )
   158  
   159  const (
   160  	Zxxx = iota
   161  	Zlit
   162  	Zlitm_r
   163  	Z_rp
   164  	Zbr
   165  	Zcall
   166  	Zcallcon
   167  	Zcallduff
   168  	Zcallind
   169  	Zcallindreg
   170  	Zib_
   171  	Zib_rp
   172  	Zibo_m
   173  	Zibo_m_xm
   174  	Zil_
   175  	Zil_rp
   176  	Ziq_rp
   177  	Zilo_m
   178  	Zjmp
   179  	Zjmpcon
   180  	Zloop
   181  	Zo_iw
   182  	Zm_o
   183  	Zm_r
   184  	Zm2_r
   185  	Zm_r_xm
   186  	Zm_r_i_xm
   187  	Zm_r_xm_nr
   188  	Zr_m_xm_nr
   189  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   190  	Zibr_m
   191  	Zmb_r
   192  	Zaut_r
   193  	Zo_m
   194  	Zo_m64
   195  	Zpseudo
   196  	Zr_m
   197  	Zr_m_xm
   198  	Zrp_
   199  	Z_ib
   200  	Z_il
   201  	Zm_ibo
   202  	Zm_ilo
   203  	Zib_rr
   204  	Zil_rr
   205  	Zclr
   206  	Zbyte
   207  	Zvex_rm_v_r
   208  	Zvex_r_v_rm
   209  	Zvex_v_rm_r
   210  	Zvex_i_rm_r
   211  	Zvex_i_r_v
   212  	Zvex_i_rm_v_r
   213  	Zmax
   214  )
   215  
   216  const (
   217  	Px   = 0
   218  	Px1  = 1    // symbolic; exact value doesn't matter
   219  	P32  = 0x32 /* 32-bit only */
   220  	Pe   = 0x66 /* operand escape */
   221  	Pm   = 0x0f /* 2byte opcode escape */
   222  	Pq   = 0xff /* both escapes: 66 0f */
   223  	Pb   = 0xfe /* byte operands */
   224  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   225  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   226  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   227  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   228  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   229  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   230  	Pw   = 0x48 /* Rex.w */
   231  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   232  	Py   = 0x80 /* defaults to 64-bit mode */
   233  	Py1  = 0x81 // symbolic; exact value doesn't matter
   234  	Py3  = 0x83 // symbolic; exact value doesn't matter
   235  	Pvex = 0x84 // symbolic: exact value doesn't matter
   236  
   237  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   238  	Rxr = 1 << 2 /* extend modrm reg */
   239  	Rxx = 1 << 1 /* extend sib index */
   240  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   241  )
   242  
   243  const (
   244  	// Encoding for VEX prefix in tables.
   245  	// The P, L, and W fields are chosen to match
   246  	// their eventual locations in the VEX prefix bytes.
   247  
   248  	// V field - 4 bits; ignored by encoder
   249  	vexNOVSR = 0 // No VEX-SPECIFIED-REGISTER
   250  	vexNDS   = 0
   251  	vexNDD   = 0
   252  	vexDDS   = 0
   253  	// P field - 2 bits
   254  	vex66 = 1 << 0
   255  	vexF3 = 2 << 0
   256  	vexF2 = 3 << 0
   257  	// L field - 1 bit
   258  	vexLZ  = 0 << 2
   259  	vexLIG = 0 << 2
   260  	vex128 = 0 << 2
   261  	vex256 = 1 << 2
   262  	// W field - 1 bit
   263  	vexWIG = 0 << 7
   264  	vexW0  = 0 << 7
   265  	vexW1  = 1 << 7
   266  	// M field - 5 bits, but mostly reserved; we can store up to 4
   267  	vex0F   = 1 << 3
   268  	vex0F38 = 2 << 3
   269  	vex0F3A = 3 << 3
   270  
   271  	// Combinations used in the manual.
   272  	VEX_DDS_LIG_66_0F38_W1    = vexDDS | vexLIG | vex66 | vex0F38 | vexW1
   273  	VEX_NDD_128_66_0F_WIG     = vexNDD | vex128 | vex66 | vex0F | vexWIG
   274  	VEX_NDD_256_66_0F_WIG     = vexNDD | vex256 | vex66 | vex0F | vexWIG
   275  	VEX_NDD_LZ_F2_0F38_W0     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW0
   276  	VEX_NDD_LZ_F2_0F38_W1     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW1
   277  	VEX_NDS_128_66_0F_WIG     = vexNDS | vex128 | vex66 | vex0F | vexWIG
   278  	VEX_NDS_128_66_0F38_WIG   = vexNDS | vex128 | vex66 | vex0F38 | vexWIG
   279  	VEX_NDS_128_F2_0F_WIG     = vexNDS | vex128 | vexF2 | vex0F | vexWIG
   280  	VEX_NDS_256_66_0F_WIG     = vexNDS | vex256 | vex66 | vex0F | vexWIG
   281  	VEX_NDS_256_66_0F38_WIG   = vexNDS | vex256 | vex66 | vex0F38 | vexWIG
   282  	VEX_NDS_256_66_0F3A_W0    = vexNDS | vex256 | vex66 | vex0F3A | vexW0
   283  	VEX_NDS_256_66_0F3A_WIG   = vexNDS | vex256 | vex66 | vex0F3A | vexWIG
   284  	VEX_NDS_LZ_0F38_W0        = vexNDS | vexLZ | vex0F38 | vexW0
   285  	VEX_NDS_LZ_0F38_W1        = vexNDS | vexLZ | vex0F38 | vexW1
   286  	VEX_NDS_LZ_66_0F38_W0     = vexNDS | vexLZ | vex66 | vex0F38 | vexW0
   287  	VEX_NDS_LZ_66_0F38_W1     = vexNDS | vexLZ | vex66 | vex0F38 | vexW1
   288  	VEX_NDS_LZ_F2_0F38_W0     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW0
   289  	VEX_NDS_LZ_F2_0F38_W1     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW1
   290  	VEX_NDS_LZ_F3_0F38_W0     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW0
   291  	VEX_NDS_LZ_F3_0F38_W1     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW1
   292  	VEX_NOVSR_128_66_0F_WIG   = vexNOVSR | vex128 | vex66 | vex0F | vexWIG
   293  	VEX_NOVSR_128_66_0F38_W0  = vexNOVSR | vex128 | vex66 | vex0F38 | vexW0
   294  	VEX_NOVSR_128_66_0F38_WIG = vexNOVSR | vex128 | vex66 | vex0F38 | vexWIG
   295  	VEX_NOVSR_128_F2_0F_WIG   = vexNOVSR | vex128 | vexF2 | vex0F | vexWIG
   296  	VEX_NOVSR_128_F3_0F_WIG   = vexNOVSR | vex128 | vexF3 | vex0F | vexWIG
   297  	VEX_NOVSR_256_66_0F_WIG   = vexNOVSR | vex256 | vex66 | vex0F | vexWIG
   298  	VEX_NOVSR_256_66_0F38_W0  = vexNOVSR | vex256 | vex66 | vex0F38 | vexW0
   299  	VEX_NOVSR_256_66_0F38_WIG = vexNOVSR | vex256 | vex66 | vex0F38 | vexWIG
   300  	VEX_NOVSR_256_F2_0F_WIG   = vexNOVSR | vex256 | vexF2 | vex0F | vexWIG
   301  	VEX_NOVSR_256_F3_0F_WIG   = vexNOVSR | vex256 | vexF3 | vex0F | vexWIG
   302  	VEX_NOVSR_LZ_F2_0F3A_W0   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW0
   303  	VEX_NOVSR_LZ_F2_0F3A_W1   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW1
   304  )
   305  
   306  var ycover [Ymax * Ymax]uint8
   307  
   308  var reg [MAXREG]int
   309  
   310  var regrex [MAXREG + 1]int
   311  
   312  var ynone = []ytab{
   313  	{Zlit, 1, argList{}},
   314  }
   315  
   316  var ytext = []ytab{
   317  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   318  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   319  }
   320  
   321  var ynop = []ytab{
   322  	{Zpseudo, 0, argList{}},
   323  	{Zpseudo, 0, argList{Yiauto}},
   324  	{Zpseudo, 0, argList{Yml}},
   325  	{Zpseudo, 0, argList{Yrf}},
   326  	{Zpseudo, 0, argList{Yxr}},
   327  	{Zpseudo, 0, argList{Yiauto}},
   328  	{Zpseudo, 0, argList{Yml}},
   329  	{Zpseudo, 0, argList{Yrf}},
   330  	{Zpseudo, 1, argList{Yxr}},
   331  }
   332  
   333  var yfuncdata = []ytab{
   334  	{Zpseudo, 0, argList{Yi32, Ym}},
   335  }
   336  
   337  var ypcdata = []ytab{
   338  	{Zpseudo, 0, argList{Yi32, Yi32}},
   339  }
   340  
   341  var yxorb = []ytab{
   342  	{Zib_, 1, argList{Yi32, Yal}},
   343  	{Zibo_m, 2, argList{Yi32, Ymb}},
   344  	{Zr_m, 1, argList{Yrb, Ymb}},
   345  	{Zm_r, 1, argList{Ymb, Yrb}},
   346  }
   347  
   348  var yaddl = []ytab{
   349  	{Zibo_m, 2, argList{Yi8, Yml}},
   350  	{Zil_, 1, argList{Yi32, Yax}},
   351  	{Zilo_m, 2, argList{Yi32, Yml}},
   352  	{Zr_m, 1, argList{Yrl, Yml}},
   353  	{Zm_r, 1, argList{Yml, Yrl}},
   354  }
   355  
   356  var yincl = []ytab{
   357  	{Z_rp, 1, argList{Yrl}},
   358  	{Zo_m, 2, argList{Yml}},
   359  }
   360  
   361  var yincq = []ytab{
   362  	{Zo_m, 2, argList{Yml}},
   363  }
   364  
   365  var ycmpb = []ytab{
   366  	{Z_ib, 1, argList{Yal, Yi32}},
   367  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   368  	{Zm_r, 1, argList{Ymb, Yrb}},
   369  	{Zr_m, 1, argList{Yrb, Ymb}},
   370  }
   371  
   372  var ycmpl = []ytab{
   373  	{Zm_ibo, 2, argList{Yml, Yi8}},
   374  	{Z_il, 1, argList{Yax, Yi32}},
   375  	{Zm_ilo, 2, argList{Yml, Yi32}},
   376  	{Zm_r, 1, argList{Yml, Yrl}},
   377  	{Zr_m, 1, argList{Yrl, Yml}},
   378  }
   379  
   380  var yshb = []ytab{
   381  	{Zo_m, 2, argList{Yi1, Ymb}},
   382  	{Zibo_m, 2, argList{Yu8, Ymb}},
   383  	{Zo_m, 2, argList{Ycx, Ymb}},
   384  }
   385  
   386  var yshl = []ytab{
   387  	{Zo_m, 2, argList{Yi1, Yml}},
   388  	{Zibo_m, 2, argList{Yu8, Yml}},
   389  	{Zo_m, 2, argList{Ycl, Yml}},
   390  	{Zo_m, 2, argList{Ycx, Yml}},
   391  }
   392  
   393  var ytestl = []ytab{
   394  	{Zil_, 1, argList{Yi32, Yax}},
   395  	{Zilo_m, 2, argList{Yi32, Yml}},
   396  	{Zr_m, 1, argList{Yrl, Yml}},
   397  	{Zm_r, 1, argList{Yml, Yrl}},
   398  }
   399  
   400  var ymovb = []ytab{
   401  	{Zr_m, 1, argList{Yrb, Ymb}},
   402  	{Zm_r, 1, argList{Ymb, Yrb}},
   403  	{Zib_rp, 1, argList{Yi32, Yrb}},
   404  	{Zibo_m, 2, argList{Yi32, Ymb}},
   405  }
   406  
   407  var ybtl = []ytab{
   408  	{Zibo_m, 2, argList{Yi8, Yml}},
   409  	{Zr_m, 1, argList{Yrl, Yml}},
   410  }
   411  
   412  var ymovw = []ytab{
   413  	{Zr_m, 1, argList{Yrl, Yml}},
   414  	{Zm_r, 1, argList{Yml, Yrl}},
   415  	{Zclr, 1, argList{Yi0, Yrl}},
   416  	{Zil_rp, 1, argList{Yi32, Yrl}},
   417  	{Zilo_m, 2, argList{Yi32, Yml}},
   418  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   419  }
   420  
   421  var ymovl = []ytab{
   422  	{Zr_m, 1, argList{Yrl, Yml}},
   423  	{Zm_r, 1, argList{Yml, Yrl}},
   424  	{Zclr, 1, argList{Yi0, Yrl}},
   425  	{Zil_rp, 1, argList{Yi32, Yrl}},
   426  	{Zilo_m, 2, argList{Yi32, Yml}},
   427  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   428  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   429  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   430  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   431  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   432  }
   433  
   434  var yret = []ytab{
   435  	{Zo_iw, 1, argList{}},
   436  	{Zo_iw, 1, argList{Yi32}},
   437  }
   438  
   439  var ymovq = []ytab{
   440  	// valid in 32-bit mode
   441  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   442  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   443  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   444  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   445  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   446  
   447  	// valid only in 64-bit mode, usually with 64-bit prefix
   448  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   449  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   450  	{Zclr, 1, argList{Yi0, Yrl}},      // 0x31
   451  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   452  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   453  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   454  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   455  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   456  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   457  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   458  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   459  }
   460  
   461  var ym_rl = []ytab{
   462  	{Zm_r, 1, argList{Ym, Yrl}},
   463  }
   464  
   465  var yrl_m = []ytab{
   466  	{Zr_m, 1, argList{Yrl, Ym}},
   467  }
   468  
   469  var ymb_rl = []ytab{
   470  	{Zmb_r, 1, argList{Ymb, Yrl}},
   471  }
   472  
   473  var yml_rl = []ytab{
   474  	{Zm_r, 1, argList{Yml, Yrl}},
   475  }
   476  
   477  var yrl_ml = []ytab{
   478  	{Zr_m, 1, argList{Yrl, Yml}},
   479  }
   480  
   481  var yml_mb = []ytab{
   482  	{Zr_m, 1, argList{Yrb, Ymb}},
   483  	{Zm_r, 1, argList{Ymb, Yrb}},
   484  }
   485  
   486  var yrb_mb = []ytab{
   487  	{Zr_m, 1, argList{Yrb, Ymb}},
   488  }
   489  
   490  var yxchg = []ytab{
   491  	{Z_rp, 1, argList{Yax, Yrl}},
   492  	{Zrp_, 1, argList{Yrl, Yax}},
   493  	{Zr_m, 1, argList{Yrl, Yml}},
   494  	{Zm_r, 1, argList{Yml, Yrl}},
   495  }
   496  
   497  var ydivl = []ytab{
   498  	{Zm_o, 2, argList{Yml}},
   499  }
   500  
   501  var ydivb = []ytab{
   502  	{Zm_o, 2, argList{Ymb}},
   503  }
   504  
   505  var yimul = []ytab{
   506  	{Zm_o, 2, argList{Yml}},
   507  	{Zib_rr, 1, argList{Yi8, Yrl}},
   508  	{Zil_rr, 1, argList{Yi32, Yrl}},
   509  	{Zm_r, 2, argList{Yml, Yrl}},
   510  }
   511  
   512  var yimul3 = []ytab{
   513  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   514  }
   515  
   516  var ybyte = []ytab{
   517  	{Zbyte, 1, argList{Yi64}},
   518  }
   519  
   520  var yin = []ytab{
   521  	{Zib_, 1, argList{Yi32}},
   522  	{Zlit, 1, argList{}},
   523  }
   524  
   525  var yint = []ytab{
   526  	{Zib_, 1, argList{Yi32}},
   527  }
   528  
   529  var ypushl = []ytab{
   530  	{Zrp_, 1, argList{Yrl}},
   531  	{Zm_o, 2, argList{Ym}},
   532  	{Zib_, 1, argList{Yi8}},
   533  	{Zil_, 1, argList{Yi32}},
   534  }
   535  
   536  var ypopl = []ytab{
   537  	{Z_rp, 1, argList{Yrl}},
   538  	{Zo_m, 2, argList{Ym}},
   539  }
   540  
   541  var yclflush = []ytab{
   542  	{Zo_m, 2, argList{Ym}},
   543  }
   544  
   545  var ybswap = []ytab{
   546  	{Z_rp, 2, argList{Yrl}},
   547  }
   548  
   549  var yscond = []ytab{
   550  	{Zo_m, 2, argList{Ymb}},
   551  }
   552  
   553  var yjcond = []ytab{
   554  	{Zbr, 0, argList{Ybr}},
   555  	{Zbr, 0, argList{Yi0, Ybr}},
   556  	{Zbr, 1, argList{Yi1, Ybr}},
   557  }
   558  
   559  var yloop = []ytab{
   560  	{Zloop, 1, argList{Ybr}},
   561  }
   562  
   563  var ycall = []ytab{
   564  	{Zcallindreg, 0, argList{Yml}},
   565  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   566  	{Zcallind, 2, argList{Yindir}},
   567  	{Zcall, 0, argList{Ybr}},
   568  	{Zcallcon, 1, argList{Yi32}},
   569  }
   570  
   571  var yduff = []ytab{
   572  	{Zcallduff, 1, argList{Yi32}},
   573  }
   574  
   575  var yjmp = []ytab{
   576  	{Zo_m64, 2, argList{Yml}},
   577  	{Zjmp, 0, argList{Ybr}},
   578  	{Zjmpcon, 1, argList{Yi32}},
   579  }
   580  
   581  var yfmvd = []ytab{
   582  	{Zm_o, 2, argList{Ym, Yf0}},
   583  	{Zo_m, 2, argList{Yf0, Ym}},
   584  	{Zm_o, 2, argList{Yrf, Yf0}},
   585  	{Zo_m, 2, argList{Yf0, Yrf}},
   586  }
   587  
   588  var yfmvdp = []ytab{
   589  	{Zo_m, 2, argList{Yf0, Ym}},
   590  	{Zo_m, 2, argList{Yf0, Yrf}},
   591  }
   592  
   593  var yfmvf = []ytab{
   594  	{Zm_o, 2, argList{Ym, Yf0}},
   595  	{Zo_m, 2, argList{Yf0, Ym}},
   596  }
   597  
   598  var yfmvx = []ytab{
   599  	{Zm_o, 2, argList{Ym, Yf0}},
   600  }
   601  
   602  var yfmvp = []ytab{
   603  	{Zo_m, 2, argList{Yf0, Ym}},
   604  }
   605  
   606  var yfcmv = []ytab{
   607  	{Zm_o, 2, argList{Yrf, Yf0}},
   608  }
   609  
   610  var yfadd = []ytab{
   611  	{Zm_o, 2, argList{Ym, Yf0}},
   612  	{Zm_o, 2, argList{Yrf, Yf0}},
   613  	{Zo_m, 2, argList{Yf0, Yrf}},
   614  }
   615  
   616  var yfxch = []ytab{
   617  	{Zo_m, 2, argList{Yf0, Yrf}},
   618  	{Zm_o, 2, argList{Yrf, Yf0}},
   619  }
   620  
   621  var ycompp = []ytab{
   622  	{Zo_m, 2, argList{Yf0, Yrf}}, /* botch is really f0,f1 */
   623  }
   624  
   625  var ystsw = []ytab{
   626  	{Zo_m, 2, argList{Ym}},
   627  	{Zlit, 1, argList{Yax}},
   628  }
   629  
   630  var ysvrs_mo = []ytab{
   631  	{Zm_o, 2, argList{Ym}},
   632  }
   633  
   634  // unaryDst version of "ysvrs_mo".
   635  var ysvrs_om = []ytab{
   636  	{Zo_m, 2, argList{Ym}},
   637  }
   638  
   639  var ymm = []ytab{
   640  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   641  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   642  }
   643  
   644  var yxm = []ytab{
   645  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   646  }
   647  
   648  var yxm_q4 = []ytab{
   649  	{Zm_r, 1, argList{Yxm, Yxr}},
   650  }
   651  
   652  var yxcvm1 = []ytab{
   653  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   654  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   655  }
   656  
   657  var yxcvm2 = []ytab{
   658  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   659  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   660  }
   661  
   662  var yxr = []ytab{
   663  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   664  }
   665  
   666  var yxr_ml = []ytab{
   667  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   668  }
   669  
   670  var ymr = []ytab{
   671  	{Zm_r, 1, argList{Ymr, Ymr}},
   672  }
   673  
   674  var ymr_ml = []ytab{
   675  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   676  }
   677  
   678  var yxcmpi = []ytab{
   679  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   680  }
   681  
   682  var yxmov = []ytab{
   683  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   684  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   685  }
   686  
   687  var yxcvfl = []ytab{
   688  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   689  }
   690  
   691  var yxcvlf = []ytab{
   692  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   693  }
   694  
   695  var yxcvfq = []ytab{
   696  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   697  }
   698  
   699  var yxcvqf = []ytab{
   700  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   701  }
   702  
   703  var yps = []ytab{
   704  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   705  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   706  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   707  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   708  }
   709  
   710  var yxrrl = []ytab{
   711  	{Zm_r, 1, argList{Yxr, Yrl}},
   712  }
   713  
   714  var ymrxr = []ytab{
   715  	{Zm_r, 1, argList{Ymr, Yxr}},
   716  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   717  }
   718  
   719  var ymshuf = []ytab{
   720  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   721  }
   722  
   723  var ymshufb = []ytab{
   724  	{Zm2_r, 2, argList{Yxm, Yxr}},
   725  }
   726  
   727  // It should never have more than 1 entry,
   728  // because some optab entries you opcode secuences that
   729  // are longer than 2 bytes (zoffset=2 here),
   730  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   731  // to name a few.
   732  var yxshuf = []ytab{
   733  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   734  }
   735  
   736  var yextrw = []ytab{
   737  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   738  }
   739  
   740  var yextr = []ytab{
   741  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   742  }
   743  
   744  var yinsrw = []ytab{
   745  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   746  }
   747  
   748  var yinsr = []ytab{
   749  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   750  }
   751  
   752  var ypsdq = []ytab{
   753  	{Zibo_m, 2, argList{Yi8, Yxr}},
   754  }
   755  
   756  var ymskb = []ytab{
   757  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   758  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   759  }
   760  
   761  var ycrc32l = []ytab{
   762  	{Zlitm_r, 0, argList{Yml, Yrl}},
   763  }
   764  
   765  var yprefetch = []ytab{
   766  	{Zm_o, 2, argList{Ym}},
   767  }
   768  
   769  var yaes = []ytab{
   770  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   771  }
   772  
   773  var yxbegin = []ytab{
   774  	{Zjmp, 1, argList{Ybr}},
   775  }
   776  
   777  var yxabort = []ytab{
   778  	{Zib_, 1, argList{Yu8}},
   779  }
   780  
   781  var ylddqu = []ytab{
   782  	{Zm_r, 1, argList{Ym, Yxr}},
   783  }
   784  
   785  var ypalignr = []ytab{
   786  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   787  }
   788  
   789  // VEX instructions that come in two forms:
   790  //	VTHING xmm2/m128, xmmV, xmm1
   791  //	VTHING ymm2/m256, ymmV, ymm1
   792  // The opcode array in the corresponding Optab entry
   793  // should contain the (VEX prefixes, opcode byte) pair
   794  // for each of the two forms.
   795  // For example, the entries for VPXOR are:
   796  //
   797  //	VPXOR xmm2/m128, xmmV, xmm1
   798  //	VEX.NDS.128.66.0F.WIG EF /r
   799  //
   800  //	VPXOR ymm2/m256, ymmV, ymm1
   801  //	VEX.NDS.256.66.0F.WIG EF /r
   802  //
   803  // Produce this Optab entry:
   804  //
   805  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xEF, VEX_NDS_256_66_0F_WIG, 0xEF}}
   806  //
   807  var yvex_xy3 = []ytab{
   808  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   809  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   810  }
   811  
   812  var yvex_x3 = []ytab{
   813  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   814  }
   815  
   816  var yvex_ri3 = []ytab{
   817  	{Zvex_i_rm_r, 2, argList{Yi8, Ymb, Yrl}},
   818  }
   819  
   820  var yvex_xyi3 = []ytab{
   821  	{Zvex_i_rm_r, 2, argList{Yu8, Yxm, Yxr}},
   822  	{Zvex_i_rm_r, 2, argList{Yu8, Yym, Yyr}},
   823  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   824  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   825  }
   826  
   827  var yvex_yyi4 = []ytab{
   828  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
   829  }
   830  
   831  var yvex_xyi4 = []ytab{
   832  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yyr, Yyr}},
   833  }
   834  
   835  var yvex_shift = []ytab{
   836  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   837  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   838  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   839  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
   840  }
   841  
   842  var yvex_shift_dq = []ytab{
   843  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   844  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   845  }
   846  
   847  var yvex_r3 = []ytab{
   848  	{Zvex_rm_v_r, 2, argList{Yml, Yrl, Yrl}},
   849  }
   850  
   851  var yvex_vmr3 = []ytab{
   852  	{Zvex_v_rm_r, 2, argList{Yrl, Yml, Yrl}},
   853  }
   854  
   855  var yvex_xy2 = []ytab{
   856  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   857  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   858  }
   859  
   860  var yvex_xyr2 = []ytab{
   861  	{Zvex_rm_v_r, 2, argList{Yxr, Yrl}},
   862  	{Zvex_rm_v_r, 2, argList{Yyr, Yrl}},
   863  }
   864  
   865  var yvex_vmovdqa = []ytab{
   866  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   867  	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
   868  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   869  	{Zvex_r_v_rm, 2, argList{Yyr, Yym}},
   870  }
   871  
   872  var yvex_vmovntdq = []ytab{
   873  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   874  	{Zvex_r_v_rm, 2, argList{Yyr, Ym}},
   875  }
   876  
   877  var yvex_vpbroadcast = []ytab{
   878  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   879  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   880  }
   881  
   882  var yvex_vpbroadcast_sd = []ytab{
   883  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   884  }
   885  
   886  var ymmxmm0f38 = []ytab{
   887  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   888  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   889  }
   890  
   891  var yextractps = []ytab{
   892  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   893  }
   894  
   895  /*
   896   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   897   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   898   * to find the entry with the given p.As and then looks through the ytable for
   899   * that instruction (the second field in the optab struct) for a line whose
   900   * first two values match the Ytypes of the p.From and p.To operands.  The
   901   * function oclass computes the specific Ytype of an operand and then the set
   902   * of more general Ytypes that it satisfies is implied by the ycover table, set
   903   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   904   * from the more general 8-bit constants, but instinit says
   905   *
   906   *        ycover[Yi0*Ymax+Ys32] = 1
   907   *        ycover[Yi1*Ymax+Ys32] = 1
   908   *        ycover[Yi8*Ymax+Ys32] = 1
   909   *
   910   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   911   * if that's what an instruction can handle.
   912   *
   913   * In parallel with the scan through the ytable for the appropriate line, there
   914   * is a z pointer that starts out pointing at the strange magic byte list in
   915   * the Optab struct.  With each step past a non-matching ytable line, z
   916   * advances by the 4th entry in the line.  When a matching line is found, that
   917   * z pointer has the extra data to use in laying down the instruction bytes.
   918   * The actual bytes laid down are a function of the 3rd entry in the line (that
   919   * is, the Ztype) and the z bytes.
   920   *
   921   * For example, let's look at AADDL.  The optab line says:
   922   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   923   *
   924   * and yaddl says
   925   *        var yaddl = []ytab{
   926   *                {Yi8, Ynone, Yml, Zibo_m, 2},
   927   *                {Yi32, Ynone, Yax, Zil_, 1},
   928   *                {Yi32, Ynone, Yml, Zilo_m, 2},
   929   *                {Yrl, Ynone, Yml, Zr_m, 1},
   930   *                {Yml, Ynone, Yrl, Zm_r, 1},
   931   *        }
   932   *
   933   * so there are 5 possible types of ADDL instruction that can be laid down, and
   934   * possible states used to lay them down (Ztype and z pointer, assuming z
   935   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   936   *
   937   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   938   *        Yi32, Yax -> Zil_, z+2 (0x05)
   939   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   940   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   941   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   942   *
   943   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   944   * relatively straightforward as this program goes.
   945   *
   946   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   947   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   948   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   949   * Zilo_m is the same but a long (32-bit) immediate.
   950   */
   951  var optab =
   952  /*	as, ytab, andproto, opcode */
   953  []Optab{
   954  	{obj.AXXX, nil, 0, [23]uint8{}},
   955  	{AAAA, ynone, P32, [23]uint8{0x37}},
   956  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   957  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   958  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   959  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   960  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   961  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   962  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   963  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   964  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   965  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   966  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   967  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   968  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   969  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   970  	{AADDSUBPD, yxm, Pq, [23]uint8{0xd0}},
   971  	{AADDSUBPS, yxm, Pf2, [23]uint8{0xd0}},
   972  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   973  	{AADJSP, nil, 0, [23]uint8{}},
   974  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   975  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   976  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   977  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   978  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   979  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   980  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   981  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   982  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   983  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   984  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   985  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   986  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   987  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   988  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   989  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   990  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   991  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   992  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   993  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   994  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   995  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   996  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   997  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   998  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   999  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1000  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1001  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1002  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1003  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1004  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1005  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1006  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1007  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1008  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1009  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1010  	{ACLFLUSH, yclflush, Pm, [23]uint8{0xae, 07}},
  1011  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1012  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1013  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1014  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1015  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1016  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1017  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1018  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1019  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1020  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1021  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1022  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1023  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1024  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1025  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1026  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1027  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1028  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1029  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1030  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1031  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1032  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1033  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1034  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1035  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1036  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1037  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1038  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1039  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1040  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1041  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1042  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1043  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1044  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1045  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1046  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1047  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1048  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1049  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1050  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1051  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1052  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1053  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1054  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1055  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1056  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1057  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1058  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1059  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1060  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1061  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1062  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1063  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1064  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1065  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1066  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1067  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1068  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1069  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1070  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1071  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1072  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1073  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1074  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1075  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1076  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1077  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1078  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1079  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1080  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1081  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1082  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1083  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1084  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1085  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1086  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1087  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1088  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1089  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1090  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1091  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1092  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1093  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1094  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1095  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1096  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1097  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1098  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1099  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1100  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1101  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1102  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1103  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1104  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1105  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1106  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1107  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1108  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1109  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1110  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1111  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1112  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1113  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1114  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1115  	{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
  1116  	{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
  1117  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1118  	{AEXTRACTPS, yextractps, Pq, [23]uint8{0x3a, 0x17, 0}},
  1119  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1120  	{AFXRSTOR, ysvrs_mo, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1121  	{AFXSAVE, ysvrs_om, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1122  	{AFXRSTOR64, ysvrs_mo, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1123  	{AFXSAVE64, ysvrs_om, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1124  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1125  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1126  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1127  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1128  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1129  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1130  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1131  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1132  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1133  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1134  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1135  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1136  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1137  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1138  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1139  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1140  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1141  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1142  	{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
  1143  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1144  	{AINT, yint, Px, [23]uint8{0xcd}},
  1145  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1146  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1147  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1148  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1149  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1150  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1151  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1152  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1153  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1154  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1155  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1156  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1157  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1158  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1159  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1160  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1161  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1162  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1163  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1164  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1165  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1166  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1167  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1168  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1169  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1170  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1171  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1172  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1173  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1174  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1175  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1176  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1177  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1178  	{ALDMXCSR, ysvrs_mo, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1179  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1180  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1181  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1182  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1183  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1184  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1185  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1186  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1187  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1188  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1189  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1190  	{ALONG, ybyte, Px, [23]uint8{4}},
  1191  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1192  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1193  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1194  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1195  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1196  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1197  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1198  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1199  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1200  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1201  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1202  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1203  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1204  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1205  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1206  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1207  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1208  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1209  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1210  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1211  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1212  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1213  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1214  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1215  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1216  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1217  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1218  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1219  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1220  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1221  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1222  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1223  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1224  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1225  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1226  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1227  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1228  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1229  	{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
  1230  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1231  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1232  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1233  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1234  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1235  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1236  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1237  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1238  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1239  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1240  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1241  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1242  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1243  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1244  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1245  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1246  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1247  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1248  	{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
  1249  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1250  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1251  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1252  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1253  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1254  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1255  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1256  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1257  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1258  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1259  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1260  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1261  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1262  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1263  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1264  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1265  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1266  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1267  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1268  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1269  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1270  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1271  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1272  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1273  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1274  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1275  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1276  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1277  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1278  	{APABSB, yxm_q4, Pq4, [23]uint8{0x1c}},
  1279  	{APABSD, yxm_q4, Pq4, [23]uint8{0x1e}},
  1280  	{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
  1281  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1282  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1283  	{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
  1284  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1285  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1286  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1287  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1288  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1289  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1290  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1291  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1292  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1293  	{APALIGNR, ypalignr, Pq, [23]uint8{0x3a, 0x0f}},
  1294  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1295  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1296  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1297  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1298  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1299  	{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
  1300  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1301  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1302  	{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
  1303  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1304  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1305  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1306  	{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
  1307  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1308  	{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
  1309  	{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
  1310  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1311  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1312  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1313  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1314  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1315  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1316  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1317  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1318  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1319  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1320  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1321  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1322  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1323  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1324  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1325  	{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
  1326  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1327  	{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
  1328  	{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
  1329  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1330  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1331  	{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
  1332  	{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
  1333  	{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
  1334  	{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
  1335  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1336  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1337  	{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
  1338  	{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
  1339  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1340  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1341  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1342  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1343  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1344  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1345  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1346  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1347  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1348  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1349  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1350  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1351  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1352  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1353  	{APMULHRSW, yxm_q4, Pq4, [23]uint8{0x0b}},
  1354  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1355  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1356  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1357  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1358  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1359  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1360  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1361  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1362  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1363  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1364  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1365  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1366  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1367  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1368  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1369  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1370  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1371  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1372  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1373  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1374  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1375  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1376  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1377  	{APSIGNB, yxm_q4, Pq4, [23]uint8{0x08}},
  1378  	{APSIGND, yxm_q4, Pq4, [23]uint8{0x0a}},
  1379  	{APSIGNW, yxm_q4, Pq4, [23]uint8{0x09}},
  1380  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1381  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1382  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1383  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1384  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1385  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1386  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1387  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1388  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1389  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1390  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1391  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1392  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1393  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1394  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1395  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1396  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1397  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1398  	{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
  1399  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1400  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1401  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1402  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1403  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1404  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1405  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1406  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1407  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1408  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1409  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1410  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1411  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1412  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1413  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1414  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1415  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1416  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1417  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1418  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1419  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1420  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1421  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1422  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1423  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1424  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1425  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1426  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1427  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1428  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1429  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1430  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1431  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1432  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1433  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1434  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1435  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1436  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1437  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1438  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1439  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1440  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1441  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1442  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1443  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1444  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1445  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1446  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1447  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1448  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1449  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1450  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1451  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1452  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1453  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1454  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1455  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1456  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1457  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1458  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1459  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1460  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1461  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1462  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1463  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1464  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1465  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1466  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1467  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1468  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1469  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1470  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1471  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1472  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1473  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1474  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1475  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1476  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1477  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1478  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1479  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1480  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1481  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1482  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1483  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1484  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1485  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1486  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1487  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1488  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1489  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1490  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1491  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1492  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1493  	{ASTMXCSR, ysvrs_om, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1494  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1495  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1496  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1497  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1498  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1499  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1501  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1502  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1503  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1504  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1505  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1506  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1507  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1508  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1509  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1510  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1511  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1512  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1513  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1514  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1515  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1516  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1517  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1518  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1519  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1520  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1521  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1522  	{AWORD, ybyte, Px, [23]uint8{2}},
  1523  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1524  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1525  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1526  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1527  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1528  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1529  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1531  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1532  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1533  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1534  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1535  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1536  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1537  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1538  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1539  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1540  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1541  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1542  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1543  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1544  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1545  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1546  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1547  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1548  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1549  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1550  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1551  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1552  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1553  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1555  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1556  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1557  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1558  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1559  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1560  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1561  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1562  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1563  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1564  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1565  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1566  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1567  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1568  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1569  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1570  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1571  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1572  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1573  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1574  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1575  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1576  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1577  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1578  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1579  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1580  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1581  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1582  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1583  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1584  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1585  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1586  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1587  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1588  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1589  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1590  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1591  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1592  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1593  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1594  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1595  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1596  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1597  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1598  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1599  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1600  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1601  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1602  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1603  	{AFFREE, nil, 0, [23]uint8{}},
  1604  	{AFLDCW, ysvrs_mo, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1605  	{AFLDENV, ysvrs_mo, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1606  	{AFRSTOR, ysvrs_mo, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1607  	{AFSAVE, ysvrs_om, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1608  	{AFSTCW, ysvrs_om, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1609  	{AFSTENV, ysvrs_om, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1610  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1611  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1612  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1613  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1614  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1615  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1616  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1617  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1618  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1619  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1620  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1621  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1622  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1623  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1624  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1625  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1626  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1627  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1628  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1629  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1630  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1631  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1632  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1633  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1634  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1635  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1636  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1637  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1638  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1639  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1640  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1641  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1642  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1643  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1644  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1645  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1646  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1647  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1648  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1649  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1650  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1651  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1652  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1653  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1654  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1655  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1656  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1657  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1658  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1659  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1660  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1661  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1662  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1663  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1664  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1665  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1666  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1667  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1668  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1669  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1670  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1671  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1672  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1673  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1674  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1675  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1676  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1677  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1678  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1679  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1680  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1681  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1682  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1683  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1684  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1685  	{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
  1686  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1687  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1688  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1689  
  1690  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W0, 0xF2}},
  1691  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W1, 0xF2}},
  1692  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W0, 0xF7}},
  1693  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W1, 0xF7}},
  1694  	{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
  1695  	{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
  1696  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W0, 0xF5}},
  1697  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_0F38_W1, 0xF5}},
  1698  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_NDD_LZ_F2_0F38_W0, 0xF6}},
  1699  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_NDD_LZ_F2_0F38_W1, 0xF6}},
  1700  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_F2_0F38_W0, 0xF5}},
  1701  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_F2_0F38_W1, 0xF5}},
  1702  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_F3_0F38_W0, 0xF5}},
  1703  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_NDS_LZ_F3_0F38_W1, 0xF5}},
  1704  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_F3_0F38_W0, 0xF7}},
  1705  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_F3_0F38_W1, 0xF7}},
  1706  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_66_0F38_W0, 0xF7}},
  1707  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_66_0F38_W1, 0xF7}},
  1708  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_F2_0F38_W0, 0xF7}},
  1709  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_NDS_LZ_F2_0F38_W1, 0xF7}},
  1710  
  1711  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1712  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_NOVSR_128_F3_0F_WIG, 0x6F, VEX_NOVSR_128_F3_0F_WIG, 0x7F, VEX_NOVSR_256_F3_0F_WIG, 0x6F, VEX_NOVSR_256_F3_0F_WIG, 0x7F}},
  1713  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_NOVSR_128_66_0F_WIG, 0x6F, VEX_NOVSR_128_66_0F_WIG, 0x7F, VEX_NOVSR_256_66_0F_WIG, 0x6F, VEX_NOVSR_256_66_0F_WIG, 0x7F}},
  1714  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_NOVSR_128_66_0F_WIG, 0xE7, VEX_NOVSR_256_66_0F_WIG, 0xE7}},
  1715  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0x74, VEX_NDS_256_66_0F_WIG, 0x74}},
  1716  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xEF, VEX_NDS_256_66_0F_WIG, 0xEF}},
  1717  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_NOVSR_128_66_0F_WIG, 0xD7, VEX_NOVSR_256_66_0F_WIG, 0xD7}},
  1718  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xDB, VEX_NDS_256_66_0F_WIG, 0xDB}},
  1719  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_NOVSR_128_66_0F38_W0, 0x78, VEX_NOVSR_256_66_0F38_W0, 0x78}},
  1720  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_NOVSR_128_66_0F38_WIG, 0x17, VEX_NOVSR_256_66_0F38_WIG, 0x17}},
  1721  	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F38_WIG, 0x00, VEX_NDS_256_66_0F38_WIG, 0x00}},
  1722  	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_NOVSR_128_66_0F_WIG, 0x70, VEX_NOVSR_256_66_0F_WIG, 0x70, VEX_NOVSR_128_66_0F_WIG, 0x70, VEX_NOVSR_256_66_0F_WIG, 0x70}},
  1723  	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xeb, VEX_NDS_256_66_0F_WIG, 0xeb}},
  1724  	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xd4, VEX_NDS_256_66_0F_WIG, 0xd4}},
  1725  	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xfe, VEX_NDS_256_66_0F_WIG, 0xfe}},
  1726  	{AVADDSD, yvex_x3, Pvex, [23]uint8{VEX_NDS_128_F2_0F_WIG, 0x58}},
  1727  	{AVSUBSD, yvex_x3, Pvex, [23]uint8{VEX_NDS_128_F2_0F_WIG, 0x5c}},
  1728  	{AVFMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_DDS_LIG_66_0F38_W1, 0xa9}},
  1729  	{AVFMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_DDS_LIG_66_0F38_W1, 0xb9}},
  1730  	{AVFNMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_DDS_LIG_66_0F38_W1, 0xad}},
  1731  	{AVFNMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_DDS_LIG_66_0F38_W1, 0xbd}},
  1732  	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0x72, 0xf0, VEX_NDS_256_66_0F_WIG, 0x72, 0xf0, VEX_NDD_128_66_0F_WIG, 0xf2, VEX_NDD_256_66_0F_WIG, 0xf2}},
  1733  	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_NDD_128_66_0F_WIG, 0x73, 0xf0, VEX_NDD_256_66_0F_WIG, 0x73, 0xf0, VEX_NDS_128_66_0F_WIG, 0xf3, VEX_NDS_256_66_0F_WIG, 0xf3}},
  1734  	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_NDD_128_66_0F_WIG, 0x72, 0xd0, VEX_NDD_256_66_0F_WIG, 0x72, 0xd0, VEX_NDD_128_66_0F_WIG, 0xd2, VEX_NDD_256_66_0F_WIG, 0xd2}},
  1735  	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_NDD_128_66_0F_WIG, 0x73, 0xd0, VEX_NDD_256_66_0F_WIG, 0x73, 0xd0, VEX_NDS_128_66_0F_WIG, 0xd3, VEX_NDS_256_66_0F_WIG, 0xd3}},
  1736  	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_NDD_128_66_0F_WIG, 0x73, 0xd8, VEX_NDD_256_66_0F_WIG, 0x73, 0xd8}},
  1737  	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_NDD_128_66_0F_WIG, 0x73, 0xf8, VEX_NDD_256_66_0F_WIG, 0x73, 0xf8}},
  1738  	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_NDS_256_66_0F3A_W0, 0x06}},
  1739  	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_NDS_256_66_0F3A_WIG, 0x0f}},
  1740  	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_NDS_256_66_0F3A_WIG, 0x02}},
  1741  	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_NDS_256_66_0F3A_WIG, 0x38}},
  1742  	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_NDS_256_66_0F3A_WIG, 0x46}},
  1743  	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_NOVSR_LZ_F2_0F3A_W0, 0xf0}},
  1744  	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_NOVSR_LZ_F2_0F3A_W1, 0xf0}},
  1745  	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_NOVSR_256_66_0F38_W0, 0x19}},
  1746  	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_NOVSR_128_66_0F38_W0, 0x18, VEX_NOVSR_256_66_0F38_W0, 0x18}},
  1747  	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_NOVSR_128_F2_0F_WIG, 0x12, VEX_NOVSR_256_F2_0F_WIG, 0x12}},
  1748  	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_NOVSR_128_F3_0F_WIG, 0x16, VEX_NOVSR_256_F3_0F_WIG, 0x16}},
  1749  	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_NOVSR_128_F3_0F_WIG, 0x12, VEX_NOVSR_256_F3_0F_WIG, 0x12}},
  1750  
  1751  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1752  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1753  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1754  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1755  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1756  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1757  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1758  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1759  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1760  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1761  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1762  	{obj.AEND, nil, 0, [23]uint8{}},
  1763  	{0, nil, 0, [23]uint8{}},
  1764  }
  1765  
  1766  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1767  
  1768  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1769  // This happens on systems like Solaris that call .so functions instead of system calls.
  1770  // It does not seem to be necessary for any other systems. This is probably working
  1771  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1772  // what that bug is. And this does fix it.
  1773  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1774  	if ctxt.Headtype == objabi.Hsolaris {
  1775  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1776  		return strings.HasPrefix(s.Name, "libc_")
  1777  	}
  1778  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1779  }
  1780  
  1781  // single-instruction no-ops of various lengths.
  1782  // constructed by hand and disassembled with gdb to verify.
  1783  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1784  var nop = [][16]uint8{
  1785  	{0x90},
  1786  	{0x66, 0x90},
  1787  	{0x0F, 0x1F, 0x00},
  1788  	{0x0F, 0x1F, 0x40, 0x00},
  1789  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1790  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1791  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1792  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1793  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1794  }
  1795  
  1796  // Native Client rejects the repeated 0x66 prefix.
  1797  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1798  func fillnop(p []byte, n int) {
  1799  	var m int
  1800  
  1801  	for n > 0 {
  1802  		m = n
  1803  		if m > len(nop) {
  1804  			m = len(nop)
  1805  		}
  1806  		copy(p[:m], nop[m-1][:m])
  1807  		p = p[m:]
  1808  		n -= m
  1809  	}
  1810  }
  1811  
  1812  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1813  	s.Grow(int64(c) + int64(pad))
  1814  	fillnop(s.P[c:], int(pad))
  1815  	return c + pad
  1816  }
  1817  
  1818  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1819  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1820  		return l
  1821  	}
  1822  	return q
  1823  }
  1824  
  1825  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1826  	if s.P != nil {
  1827  		return
  1828  	}
  1829  
  1830  	if ycover[0] == 0 {
  1831  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1832  	}
  1833  
  1834  	var asmbuf AsmBuf
  1835  
  1836  	for p := s.Func.Text; p != nil; p = p.Link {
  1837  		if p.To.Type == obj.TYPE_BRANCH {
  1838  			if p.Pcond == nil {
  1839  				p.Pcond = p
  1840  			}
  1841  		}
  1842  		if p.As == AADJSP {
  1843  			p.To.Type = obj.TYPE_REG
  1844  			p.To.Reg = REG_SP
  1845  			v := int32(-p.From.Offset)
  1846  			p.From.Offset = int64(v)
  1847  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1848  			if v < 0 {
  1849  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1850  				v = -v
  1851  				p.From.Offset = int64(v)
  1852  			}
  1853  
  1854  			if v == 0 {
  1855  				p.As = obj.ANOP
  1856  			}
  1857  		}
  1858  	}
  1859  
  1860  	var q *obj.Prog
  1861  	var count int64 // rough count of number of instructions
  1862  	for p := s.Func.Text; p != nil; p = p.Link {
  1863  		count++
  1864  		p.Back = 2 // use short branches first time through
  1865  		q = p.Pcond
  1866  		if q != nil && (q.Back&2 != 0) {
  1867  			p.Back |= 1 // backward jump
  1868  			q.Back |= 4 // loop head
  1869  		}
  1870  
  1871  		if p.As == AADJSP {
  1872  			p.To.Type = obj.TYPE_REG
  1873  			p.To.Reg = REG_SP
  1874  			v := int32(-p.From.Offset)
  1875  			p.From.Offset = int64(v)
  1876  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1877  			if v < 0 {
  1878  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1879  				v = -v
  1880  				p.From.Offset = int64(v)
  1881  			}
  1882  
  1883  			if v == 0 {
  1884  				p.As = obj.ANOP
  1885  			}
  1886  		}
  1887  	}
  1888  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1889  
  1890  	n := 0
  1891  	var c int32
  1892  	errors := ctxt.Errors
  1893  	for {
  1894  		loop := int32(0)
  1895  		for i := range s.R {
  1896  			s.R[i] = obj.Reloc{}
  1897  		}
  1898  		s.R = s.R[:0]
  1899  		s.P = s.P[:0]
  1900  		c = 0
  1901  		for p := s.Func.Text; p != nil; p = p.Link {
  1902  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1903  
  1904  				// pad everything to avoid crossing 32-byte boundary
  1905  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1906  					c = naclpad(ctxt, s, c, -c&31)
  1907  				}
  1908  
  1909  				// pad call deferreturn to start at 32-byte boundary
  1910  				// so that subtracting 5 in jmpdefer will jump back
  1911  				// to that boundary and rerun the call.
  1912  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1913  					c = naclpad(ctxt, s, c, -c&31)
  1914  				}
  1915  
  1916  				// pad call to end at 32-byte boundary
  1917  				if p.As == obj.ACALL {
  1918  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1919  				}
  1920  
  1921  				// the linker treats REP and STOSQ as different instructions
  1922  				// but in fact the REP is a prefix on the STOSQ.
  1923  				// make sure REP has room for 2 more bytes, so that
  1924  				// padding will not be inserted before the next instruction.
  1925  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1926  					c = naclpad(ctxt, s, c, -c&31)
  1927  				}
  1928  
  1929  				// same for LOCK.
  1930  				// various instructions follow; the longest is 4 bytes.
  1931  				// give ourselves 8 bytes so as to avoid surprises.
  1932  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1933  					c = naclpad(ctxt, s, c, -c&31)
  1934  				}
  1935  			}
  1936  
  1937  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1938  				// pad with NOPs
  1939  				v := -c & (LoopAlign - 1)
  1940  
  1941  				if v <= MaxLoopPad {
  1942  					s.Grow(int64(c) + int64(v))
  1943  					fillnop(s.P[c:], int(v))
  1944  					c += v
  1945  				}
  1946  			}
  1947  
  1948  			p.Pc = int64(c)
  1949  
  1950  			// process forward jumps to p
  1951  			for q = p.Rel; q != nil; q = q.Forwd {
  1952  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1953  				if q.Back&2 != 0 { // short
  1954  					if v > 127 {
  1955  						loop++
  1956  						q.Back ^= 2
  1957  					}
  1958  
  1959  					if q.As == AJCXZL || q.As == AXBEGIN {
  1960  						s.P[q.Pc+2] = byte(v)
  1961  					} else {
  1962  						s.P[q.Pc+1] = byte(v)
  1963  					}
  1964  				} else {
  1965  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1966  				}
  1967  			}
  1968  
  1969  			p.Rel = nil
  1970  
  1971  			p.Pc = int64(c)
  1972  			asmbuf.asmins(ctxt, s, p)
  1973  			m := asmbuf.Len()
  1974  			if int(p.Isize) != m {
  1975  				p.Isize = uint8(m)
  1976  				loop++
  1977  			}
  1978  
  1979  			s.Grow(p.Pc + int64(m))
  1980  			copy(s.P[p.Pc:], asmbuf.Bytes())
  1981  			c += int32(m)
  1982  		}
  1983  
  1984  		n++
  1985  		if n > 20 {
  1986  			ctxt.Diag("span must be looping")
  1987  			log.Fatalf("loop")
  1988  		}
  1989  		if loop == 0 {
  1990  			break
  1991  		}
  1992  		if ctxt.Errors > errors {
  1993  			return
  1994  		}
  1995  	}
  1996  
  1997  	if ctxt.Headtype == objabi.Hnacl {
  1998  		c = naclpad(ctxt, s, c, -c&31)
  1999  	}
  2000  
  2001  	s.Size = int64(c)
  2002  
  2003  	if false { /* debug['a'] > 1 */
  2004  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2005  		var i int
  2006  		for i = 0; i < len(s.P); i++ {
  2007  			fmt.Printf(" %.2x", s.P[i])
  2008  			if i%16 == 15 {
  2009  				fmt.Printf("\n  %.6x", uint(i+1))
  2010  			}
  2011  		}
  2012  
  2013  		if i%16 != 0 {
  2014  			fmt.Printf("\n")
  2015  		}
  2016  
  2017  		for i := 0; i < len(s.R); i++ {
  2018  			r := &s.R[i]
  2019  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2020  		}
  2021  	}
  2022  }
  2023  
  2024  func instinit(ctxt *obj.Link) {
  2025  	if ycover[0] != 0 {
  2026  		// Already initialized; stop now.
  2027  		// This happens in the cmd/asm tests,
  2028  		// each of which re-initializes the arch.
  2029  		return
  2030  	}
  2031  
  2032  	switch ctxt.Headtype {
  2033  	case objabi.Hplan9:
  2034  		plan9privates = ctxt.Lookup("_privates")
  2035  	case objabi.Hnacl:
  2036  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  2037  	}
  2038  
  2039  	for i := 1; optab[i].as != 0; i++ {
  2040  		c := optab[i].as
  2041  		if opindex[c&obj.AMask] != nil {
  2042  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2043  		}
  2044  		opindex[c&obj.AMask] = &optab[i]
  2045  	}
  2046  
  2047  	for i := 0; i < Ymax; i++ {
  2048  		ycover[i*Ymax+i] = 1
  2049  	}
  2050  
  2051  	ycover[Yi0*Ymax+Yu2] = 1
  2052  	ycover[Yi1*Ymax+Yu2] = 1
  2053  
  2054  	ycover[Yi0*Ymax+Yi8] = 1
  2055  	ycover[Yi1*Ymax+Yi8] = 1
  2056  	ycover[Yu2*Ymax+Yi8] = 1
  2057  	ycover[Yu7*Ymax+Yi8] = 1
  2058  
  2059  	ycover[Yi0*Ymax+Yu7] = 1
  2060  	ycover[Yi1*Ymax+Yu7] = 1
  2061  	ycover[Yu2*Ymax+Yu7] = 1
  2062  
  2063  	ycover[Yi0*Ymax+Yu8] = 1
  2064  	ycover[Yi1*Ymax+Yu8] = 1
  2065  	ycover[Yu2*Ymax+Yu8] = 1
  2066  	ycover[Yu7*Ymax+Yu8] = 1
  2067  
  2068  	ycover[Yi0*Ymax+Ys32] = 1
  2069  	ycover[Yi1*Ymax+Ys32] = 1
  2070  	ycover[Yu2*Ymax+Ys32] = 1
  2071  	ycover[Yu7*Ymax+Ys32] = 1
  2072  	ycover[Yu8*Ymax+Ys32] = 1
  2073  	ycover[Yi8*Ymax+Ys32] = 1
  2074  
  2075  	ycover[Yi0*Ymax+Yi32] = 1
  2076  	ycover[Yi1*Ymax+Yi32] = 1
  2077  	ycover[Yu2*Ymax+Yi32] = 1
  2078  	ycover[Yu7*Ymax+Yi32] = 1
  2079  	ycover[Yu8*Ymax+Yi32] = 1
  2080  	ycover[Yi8*Ymax+Yi32] = 1
  2081  	ycover[Ys32*Ymax+Yi32] = 1
  2082  
  2083  	ycover[Yi0*Ymax+Yi64] = 1
  2084  	ycover[Yi1*Ymax+Yi64] = 1
  2085  	ycover[Yu7*Ymax+Yi64] = 1
  2086  	ycover[Yu2*Ymax+Yi64] = 1
  2087  	ycover[Yu8*Ymax+Yi64] = 1
  2088  	ycover[Yi8*Ymax+Yi64] = 1
  2089  	ycover[Ys32*Ymax+Yi64] = 1
  2090  	ycover[Yi32*Ymax+Yi64] = 1
  2091  
  2092  	ycover[Yal*Ymax+Yrb] = 1
  2093  	ycover[Ycl*Ymax+Yrb] = 1
  2094  	ycover[Yax*Ymax+Yrb] = 1
  2095  	ycover[Ycx*Ymax+Yrb] = 1
  2096  	ycover[Yrx*Ymax+Yrb] = 1
  2097  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2098  
  2099  	ycover[Ycl*Ymax+Ycx] = 1
  2100  
  2101  	ycover[Yax*Ymax+Yrx] = 1
  2102  	ycover[Ycx*Ymax+Yrx] = 1
  2103  
  2104  	ycover[Yax*Ymax+Yrl] = 1
  2105  	ycover[Ycx*Ymax+Yrl] = 1
  2106  	ycover[Yrx*Ymax+Yrl] = 1
  2107  	ycover[Yrl32*Ymax+Yrl] = 1
  2108  
  2109  	ycover[Yf0*Ymax+Yrf] = 1
  2110  
  2111  	ycover[Yal*Ymax+Ymb] = 1
  2112  	ycover[Ycl*Ymax+Ymb] = 1
  2113  	ycover[Yax*Ymax+Ymb] = 1
  2114  	ycover[Ycx*Ymax+Ymb] = 1
  2115  	ycover[Yrx*Ymax+Ymb] = 1
  2116  	ycover[Yrb*Ymax+Ymb] = 1
  2117  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2118  	ycover[Ym*Ymax+Ymb] = 1
  2119  
  2120  	ycover[Yax*Ymax+Yml] = 1
  2121  	ycover[Ycx*Ymax+Yml] = 1
  2122  	ycover[Yrx*Ymax+Yml] = 1
  2123  	ycover[Yrl*Ymax+Yml] = 1
  2124  	ycover[Yrl32*Ymax+Yml] = 1
  2125  	ycover[Ym*Ymax+Yml] = 1
  2126  
  2127  	ycover[Yax*Ymax+Ymm] = 1
  2128  	ycover[Ycx*Ymax+Ymm] = 1
  2129  	ycover[Yrx*Ymax+Ymm] = 1
  2130  	ycover[Yrl*Ymax+Ymm] = 1
  2131  	ycover[Yrl32*Ymax+Ymm] = 1
  2132  	ycover[Ym*Ymax+Ymm] = 1
  2133  	ycover[Ymr*Ymax+Ymm] = 1
  2134  
  2135  	ycover[Ym*Ymax+Yxm] = 1
  2136  	ycover[Yxr*Ymax+Yxm] = 1
  2137  
  2138  	ycover[Ym*Ymax+Yym] = 1
  2139  	ycover[Yyr*Ymax+Yym] = 1
  2140  
  2141  	for i := 0; i < MAXREG; i++ {
  2142  		reg[i] = -1
  2143  		if i >= REG_AL && i <= REG_R15B {
  2144  			reg[i] = (i - REG_AL) & 7
  2145  			if i >= REG_SPB && i <= REG_DIB {
  2146  				regrex[i] = 0x40
  2147  			}
  2148  			if i >= REG_R8B && i <= REG_R15B {
  2149  				regrex[i] = Rxr | Rxx | Rxb
  2150  			}
  2151  		}
  2152  
  2153  		if i >= REG_AH && i <= REG_BH {
  2154  			reg[i] = 4 + ((i - REG_AH) & 7)
  2155  		}
  2156  		if i >= REG_AX && i <= REG_R15 {
  2157  			reg[i] = (i - REG_AX) & 7
  2158  			if i >= REG_R8 {
  2159  				regrex[i] = Rxr | Rxx | Rxb
  2160  			}
  2161  		}
  2162  
  2163  		if i >= REG_F0 && i <= REG_F0+7 {
  2164  			reg[i] = (i - REG_F0) & 7
  2165  		}
  2166  		if i >= REG_M0 && i <= REG_M0+7 {
  2167  			reg[i] = (i - REG_M0) & 7
  2168  		}
  2169  		if i >= REG_X0 && i <= REG_X0+15 {
  2170  			reg[i] = (i - REG_X0) & 7
  2171  			if i >= REG_X0+8 {
  2172  				regrex[i] = Rxr | Rxx | Rxb
  2173  			}
  2174  		}
  2175  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2176  			reg[i] = (i - REG_Y0) & 7
  2177  			if i >= REG_Y0+8 {
  2178  				regrex[i] = Rxr | Rxx | Rxb
  2179  			}
  2180  		}
  2181  
  2182  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2183  			regrex[i] = Rxr
  2184  		}
  2185  	}
  2186  }
  2187  
  2188  var isAndroid = (objabi.GOOS == "android")
  2189  
  2190  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2191  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2192  		return 0
  2193  	}
  2194  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2195  		switch a.Reg {
  2196  		case REG_CS:
  2197  			return 0x2e
  2198  
  2199  		case REG_DS:
  2200  			return 0x3e
  2201  
  2202  		case REG_ES:
  2203  			return 0x26
  2204  
  2205  		case REG_FS:
  2206  			return 0x64
  2207  
  2208  		case REG_GS:
  2209  			return 0x65
  2210  
  2211  		case REG_TLS:
  2212  			// NOTE: Systems listed here should be only systems that
  2213  			// support direct TLS references like 8(TLS) implemented as
  2214  			// direct references from FS or GS. Systems that require
  2215  			// the initial-exec model, where you load the TLS base into
  2216  			// a register and then index from that register, do not reach
  2217  			// this code and should not be listed.
  2218  			if ctxt.Arch.Family == sys.I386 {
  2219  				switch ctxt.Headtype {
  2220  				default:
  2221  					if isAndroid {
  2222  						return 0x65 // GS
  2223  					}
  2224  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2225  
  2226  				case objabi.Hdarwin,
  2227  					objabi.Hdragonfly,
  2228  					objabi.Hfreebsd,
  2229  					objabi.Hnetbsd,
  2230  					objabi.Hopenbsd:
  2231  					return 0x65 // GS
  2232  				}
  2233  			}
  2234  
  2235  			switch ctxt.Headtype {
  2236  			default:
  2237  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2238  
  2239  			case objabi.Hlinux:
  2240  				if isAndroid {
  2241  					return 0x64 // FS
  2242  				}
  2243  
  2244  				if ctxt.Flag_shared {
  2245  					log.Fatalf("unknown TLS base register for linux with -shared")
  2246  				} else {
  2247  					return 0x64 // FS
  2248  				}
  2249  
  2250  			case objabi.Hdragonfly,
  2251  				objabi.Hfreebsd,
  2252  				objabi.Hnetbsd,
  2253  				objabi.Hopenbsd,
  2254  				objabi.Hsolaris:
  2255  				return 0x64 // FS
  2256  
  2257  			case objabi.Hdarwin:
  2258  				return 0x65 // GS
  2259  			}
  2260  		}
  2261  	}
  2262  
  2263  	if ctxt.Arch.Family == sys.I386 {
  2264  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2265  			// When building for inclusion into a shared library, an instruction of the form
  2266  			//     MOVL 0(CX)(TLS*1), AX
  2267  			// becomes
  2268  			//     mov %gs:(%ecx), %eax
  2269  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2270  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2271  			// a shared library the instruction it becomes
  2272  			//     mov 0x0(%ecx), $eax
  2273  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2274  			if a.Offset != 0 {
  2275  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2276  			}
  2277  			return 0x65 // GS
  2278  		}
  2279  		return 0
  2280  	}
  2281  
  2282  	switch a.Index {
  2283  	case REG_CS:
  2284  		return 0x2e
  2285  
  2286  	case REG_DS:
  2287  		return 0x3e
  2288  
  2289  	case REG_ES:
  2290  		return 0x26
  2291  
  2292  	case REG_TLS:
  2293  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2294  			// When building for inclusion into a shared library, an instruction of the form
  2295  			//     MOV 0(CX)(TLS*1), AX
  2296  			// becomes
  2297  			//     mov %fs:(%rcx), %rax
  2298  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2299  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2300  			// a shared library the instruction does not require a prefix.
  2301  			if a.Offset != 0 {
  2302  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2303  			}
  2304  			return 0x64
  2305  		}
  2306  
  2307  	case REG_FS:
  2308  		return 0x64
  2309  
  2310  	case REG_GS:
  2311  		return 0x65
  2312  	}
  2313  
  2314  	return 0
  2315  }
  2316  
  2317  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2318  	switch a.Type {
  2319  	case obj.TYPE_NONE:
  2320  		return Ynone
  2321  
  2322  	case obj.TYPE_BRANCH:
  2323  		return Ybr
  2324  
  2325  	case obj.TYPE_INDIR:
  2326  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2327  			return Yindir
  2328  		}
  2329  		return Yxxx
  2330  
  2331  	case obj.TYPE_MEM:
  2332  		if a.Index == REG_SP {
  2333  			// Can't use SP as the index register
  2334  			return Yxxx
  2335  		}
  2336  		if ctxt.Arch.Family == sys.AMD64 {
  2337  			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
  2338  			// where the sign extension doesn't matter).
  2339  			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
  2340  			if !(a.Offset == int64(int32(a.Offset)) ||
  2341  				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
  2342  				return Yxxx
  2343  			}
  2344  			switch a.Name {
  2345  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2346  				// Global variables can't use index registers and their
  2347  				// base register is %rip (%rip is encoded as REG_NONE).
  2348  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2349  					return Yxxx
  2350  				}
  2351  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2352  				// These names must have a base of SP.  The old compiler
  2353  				// uses 0 for the base register. SSA uses REG_SP.
  2354  				if a.Reg != REG_SP && a.Reg != 0 {
  2355  					return Yxxx
  2356  				}
  2357  			case obj.NAME_NONE:
  2358  				// everything is ok
  2359  			default:
  2360  				// unknown name
  2361  				return Yxxx
  2362  			}
  2363  		}
  2364  		return Ym
  2365  
  2366  	case obj.TYPE_ADDR:
  2367  		switch a.Name {
  2368  		case obj.NAME_GOTREF:
  2369  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2370  			return Yxxx
  2371  
  2372  		case obj.NAME_EXTERN,
  2373  			obj.NAME_STATIC:
  2374  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2375  				return Yi32
  2376  			}
  2377  			return Yiauto // use pc-relative addressing
  2378  
  2379  		case obj.NAME_AUTO,
  2380  			obj.NAME_PARAM:
  2381  			return Yiauto
  2382  		}
  2383  
  2384  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2385  		// and got Yi32 in an earlier version of this code.
  2386  		// Keep doing that until we fix yduff etc.
  2387  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2388  			return Yi32
  2389  		}
  2390  
  2391  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2392  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2393  		}
  2394  		fallthrough
  2395  
  2396  		// fall through
  2397  
  2398  	case obj.TYPE_CONST:
  2399  		if a.Sym != nil {
  2400  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2401  		}
  2402  
  2403  		v := a.Offset
  2404  		if ctxt.Arch.Family == sys.I386 {
  2405  			v = int64(int32(v))
  2406  		}
  2407  		if v == 0 {
  2408  			if p.Mark&PRESERVEFLAGS != 0 {
  2409  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2410  				return Yu7
  2411  			}
  2412  			return Yi0
  2413  		}
  2414  		if v == 1 {
  2415  			return Yi1
  2416  		}
  2417  		if v >= 0 && v <= 3 {
  2418  			return Yu2
  2419  		}
  2420  		if v >= 0 && v <= 127 {
  2421  			return Yu7
  2422  		}
  2423  		if v >= 0 && v <= 255 {
  2424  			return Yu8
  2425  		}
  2426  		if v >= -128 && v <= 127 {
  2427  			return Yi8
  2428  		}
  2429  		if ctxt.Arch.Family == sys.I386 {
  2430  			return Yi32
  2431  		}
  2432  		l := int32(v)
  2433  		if int64(l) == v {
  2434  			return Ys32 /* can sign extend */
  2435  		}
  2436  		if v>>32 == 0 {
  2437  			return Yi32 /* unsigned */
  2438  		}
  2439  		return Yi64
  2440  
  2441  	case obj.TYPE_TEXTSIZE:
  2442  		return Ytextsize
  2443  	}
  2444  
  2445  	if a.Type != obj.TYPE_REG {
  2446  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2447  		return Yxxx
  2448  	}
  2449  
  2450  	switch a.Reg {
  2451  	case REG_AL:
  2452  		return Yal
  2453  
  2454  	case REG_AX:
  2455  		return Yax
  2456  
  2457  		/*
  2458  			case REG_SPB:
  2459  		*/
  2460  	case REG_BPB,
  2461  		REG_SIB,
  2462  		REG_DIB,
  2463  		REG_R8B,
  2464  		REG_R9B,
  2465  		REG_R10B,
  2466  		REG_R11B,
  2467  		REG_R12B,
  2468  		REG_R13B,
  2469  		REG_R14B,
  2470  		REG_R15B:
  2471  		if ctxt.Arch.Family == sys.I386 {
  2472  			return Yxxx
  2473  		}
  2474  		fallthrough
  2475  
  2476  	case REG_DL,
  2477  		REG_BL,
  2478  		REG_AH,
  2479  		REG_CH,
  2480  		REG_DH,
  2481  		REG_BH:
  2482  		return Yrb
  2483  
  2484  	case REG_CL:
  2485  		return Ycl
  2486  
  2487  	case REG_CX:
  2488  		return Ycx
  2489  
  2490  	case REG_DX, REG_BX:
  2491  		return Yrx
  2492  
  2493  	case REG_R8, /* not really Yrl */
  2494  		REG_R9,
  2495  		REG_R10,
  2496  		REG_R11,
  2497  		REG_R12,
  2498  		REG_R13,
  2499  		REG_R14,
  2500  		REG_R15:
  2501  		if ctxt.Arch.Family == sys.I386 {
  2502  			return Yxxx
  2503  		}
  2504  		fallthrough
  2505  
  2506  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2507  		if ctxt.Arch.Family == sys.I386 {
  2508  			return Yrl32
  2509  		}
  2510  		return Yrl
  2511  
  2512  	case REG_F0 + 0:
  2513  		return Yf0
  2514  
  2515  	case REG_F0 + 1,
  2516  		REG_F0 + 2,
  2517  		REG_F0 + 3,
  2518  		REG_F0 + 4,
  2519  		REG_F0 + 5,
  2520  		REG_F0 + 6,
  2521  		REG_F0 + 7:
  2522  		return Yrf
  2523  
  2524  	case REG_M0 + 0,
  2525  		REG_M0 + 1,
  2526  		REG_M0 + 2,
  2527  		REG_M0 + 3,
  2528  		REG_M0 + 4,
  2529  		REG_M0 + 5,
  2530  		REG_M0 + 6,
  2531  		REG_M0 + 7:
  2532  		return Ymr
  2533  
  2534  	case REG_X0 + 0,
  2535  		REG_X0 + 1,
  2536  		REG_X0 + 2,
  2537  		REG_X0 + 3,
  2538  		REG_X0 + 4,
  2539  		REG_X0 + 5,
  2540  		REG_X0 + 6,
  2541  		REG_X0 + 7,
  2542  		REG_X0 + 8,
  2543  		REG_X0 + 9,
  2544  		REG_X0 + 10,
  2545  		REG_X0 + 11,
  2546  		REG_X0 + 12,
  2547  		REG_X0 + 13,
  2548  		REG_X0 + 14,
  2549  		REG_X0 + 15:
  2550  		return Yxr
  2551  
  2552  	case REG_Y0 + 0,
  2553  		REG_Y0 + 1,
  2554  		REG_Y0 + 2,
  2555  		REG_Y0 + 3,
  2556  		REG_Y0 + 4,
  2557  		REG_Y0 + 5,
  2558  		REG_Y0 + 6,
  2559  		REG_Y0 + 7,
  2560  		REG_Y0 + 8,
  2561  		REG_Y0 + 9,
  2562  		REG_Y0 + 10,
  2563  		REG_Y0 + 11,
  2564  		REG_Y0 + 12,
  2565  		REG_Y0 + 13,
  2566  		REG_Y0 + 14,
  2567  		REG_Y0 + 15:
  2568  		return Yyr
  2569  
  2570  	case REG_CS:
  2571  		return Ycs
  2572  	case REG_SS:
  2573  		return Yss
  2574  	case REG_DS:
  2575  		return Yds
  2576  	case REG_ES:
  2577  		return Yes
  2578  	case REG_FS:
  2579  		return Yfs
  2580  	case REG_GS:
  2581  		return Ygs
  2582  	case REG_TLS:
  2583  		return Ytls
  2584  
  2585  	case REG_GDTR:
  2586  		return Ygdtr
  2587  	case REG_IDTR:
  2588  		return Yidtr
  2589  	case REG_LDTR:
  2590  		return Yldtr
  2591  	case REG_MSW:
  2592  		return Ymsw
  2593  	case REG_TASK:
  2594  		return Ytask
  2595  
  2596  	case REG_CR + 0:
  2597  		return Ycr0
  2598  	case REG_CR + 1:
  2599  		return Ycr1
  2600  	case REG_CR + 2:
  2601  		return Ycr2
  2602  	case REG_CR + 3:
  2603  		return Ycr3
  2604  	case REG_CR + 4:
  2605  		return Ycr4
  2606  	case REG_CR + 5:
  2607  		return Ycr5
  2608  	case REG_CR + 6:
  2609  		return Ycr6
  2610  	case REG_CR + 7:
  2611  		return Ycr7
  2612  	case REG_CR + 8:
  2613  		return Ycr8
  2614  
  2615  	case REG_DR + 0:
  2616  		return Ydr0
  2617  	case REG_DR + 1:
  2618  		return Ydr1
  2619  	case REG_DR + 2:
  2620  		return Ydr2
  2621  	case REG_DR + 3:
  2622  		return Ydr3
  2623  	case REG_DR + 4:
  2624  		return Ydr4
  2625  	case REG_DR + 5:
  2626  		return Ydr5
  2627  	case REG_DR + 6:
  2628  		return Ydr6
  2629  	case REG_DR + 7:
  2630  		return Ydr7
  2631  
  2632  	case REG_TR + 0:
  2633  		return Ytr0
  2634  	case REG_TR + 1:
  2635  		return Ytr1
  2636  	case REG_TR + 2:
  2637  		return Ytr2
  2638  	case REG_TR + 3:
  2639  		return Ytr3
  2640  	case REG_TR + 4:
  2641  		return Ytr4
  2642  	case REG_TR + 5:
  2643  		return Ytr5
  2644  	case REG_TR + 6:
  2645  		return Ytr6
  2646  	case REG_TR + 7:
  2647  		return Ytr7
  2648  	}
  2649  
  2650  	return Yxxx
  2651  }
  2652  
  2653  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2654  // and hold assembly state.
  2655  type AsmBuf struct {
  2656  	buf     [100]byte
  2657  	off     int
  2658  	rexflag int
  2659  	vexflag int
  2660  	rep     int
  2661  	repn    int
  2662  	lock    bool
  2663  }
  2664  
  2665  // Put1 appends one byte to the end of the buffer.
  2666  func (a *AsmBuf) Put1(x byte) {
  2667  	a.buf[a.off] = x
  2668  	a.off++
  2669  }
  2670  
  2671  // Put2 appends two bytes to the end of the buffer.
  2672  func (a *AsmBuf) Put2(x, y byte) {
  2673  	a.buf[a.off+0] = x
  2674  	a.buf[a.off+1] = y
  2675  	a.off += 2
  2676  }
  2677  
  2678  // Put3 appends three bytes to the end of the buffer.
  2679  func (a *AsmBuf) Put3(x, y, z byte) {
  2680  	a.buf[a.off+0] = x
  2681  	a.buf[a.off+1] = y
  2682  	a.buf[a.off+2] = z
  2683  	a.off += 3
  2684  }
  2685  
  2686  // Put4 appends four bytes to the end of the buffer.
  2687  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2688  	a.buf[a.off+0] = x
  2689  	a.buf[a.off+1] = y
  2690  	a.buf[a.off+2] = z
  2691  	a.buf[a.off+3] = w
  2692  	a.off += 4
  2693  }
  2694  
  2695  // PutInt16 writes v into the buffer using little-endian encoding.
  2696  func (a *AsmBuf) PutInt16(v int16) {
  2697  	a.buf[a.off+0] = byte(v)
  2698  	a.buf[a.off+1] = byte(v >> 8)
  2699  	a.off += 2
  2700  }
  2701  
  2702  // PutInt32 writes v into the buffer using little-endian encoding.
  2703  func (a *AsmBuf) PutInt32(v int32) {
  2704  	a.buf[a.off+0] = byte(v)
  2705  	a.buf[a.off+1] = byte(v >> 8)
  2706  	a.buf[a.off+2] = byte(v >> 16)
  2707  	a.buf[a.off+3] = byte(v >> 24)
  2708  	a.off += 4
  2709  }
  2710  
  2711  // PutInt64 writes v into the buffer using little-endian encoding.
  2712  func (a *AsmBuf) PutInt64(v int64) {
  2713  	a.buf[a.off+0] = byte(v)
  2714  	a.buf[a.off+1] = byte(v >> 8)
  2715  	a.buf[a.off+2] = byte(v >> 16)
  2716  	a.buf[a.off+3] = byte(v >> 24)
  2717  	a.buf[a.off+4] = byte(v >> 32)
  2718  	a.buf[a.off+5] = byte(v >> 40)
  2719  	a.buf[a.off+6] = byte(v >> 48)
  2720  	a.buf[a.off+7] = byte(v >> 56)
  2721  	a.off += 8
  2722  }
  2723  
  2724  // Put copies b into the buffer.
  2725  func (a *AsmBuf) Put(b []byte) {
  2726  	copy(a.buf[a.off:], b)
  2727  	a.off += len(b)
  2728  }
  2729  
  2730  // Insert inserts b at offset i.
  2731  func (a *AsmBuf) Insert(i int, b byte) {
  2732  	a.off++
  2733  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2734  	a.buf[i] = b
  2735  }
  2736  
  2737  // Last returns the byte at the end of the buffer.
  2738  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2739  
  2740  // Len returns the length of the buffer.
  2741  func (a *AsmBuf) Len() int { return a.off }
  2742  
  2743  // Bytes returns the contents of the buffer.
  2744  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2745  
  2746  // Reset empties the buffer.
  2747  func (a *AsmBuf) Reset() { a.off = 0 }
  2748  
  2749  // At returns the byte at offset i.
  2750  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2751  
  2752  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2753  	var i int
  2754  
  2755  	switch index {
  2756  	default:
  2757  		goto bad
  2758  
  2759  	case REG_NONE:
  2760  		i = 4 << 3
  2761  		goto bas
  2762  
  2763  	case REG_R8,
  2764  		REG_R9,
  2765  		REG_R10,
  2766  		REG_R11,
  2767  		REG_R12,
  2768  		REG_R13,
  2769  		REG_R14,
  2770  		REG_R15:
  2771  		if ctxt.Arch.Family == sys.I386 {
  2772  			goto bad
  2773  		}
  2774  		fallthrough
  2775  
  2776  	case REG_AX,
  2777  		REG_CX,
  2778  		REG_DX,
  2779  		REG_BX,
  2780  		REG_BP,
  2781  		REG_SI,
  2782  		REG_DI:
  2783  		i = reg[index] << 3
  2784  	}
  2785  
  2786  	switch scale {
  2787  	default:
  2788  		goto bad
  2789  
  2790  	case 1:
  2791  		break
  2792  
  2793  	case 2:
  2794  		i |= 1 << 6
  2795  
  2796  	case 4:
  2797  		i |= 2 << 6
  2798  
  2799  	case 8:
  2800  		i |= 3 << 6
  2801  	}
  2802  
  2803  bas:
  2804  	switch base {
  2805  	default:
  2806  		goto bad
  2807  
  2808  	case REG_NONE: /* must be mod=00 */
  2809  		i |= 5
  2810  
  2811  	case REG_R8,
  2812  		REG_R9,
  2813  		REG_R10,
  2814  		REG_R11,
  2815  		REG_R12,
  2816  		REG_R13,
  2817  		REG_R14,
  2818  		REG_R15:
  2819  		if ctxt.Arch.Family == sys.I386 {
  2820  			goto bad
  2821  		}
  2822  		fallthrough
  2823  
  2824  	case REG_AX,
  2825  		REG_CX,
  2826  		REG_DX,
  2827  		REG_BX,
  2828  		REG_SP,
  2829  		REG_BP,
  2830  		REG_SI,
  2831  		REG_DI:
  2832  		i |= reg[base]
  2833  	}
  2834  
  2835  	asmbuf.Put1(byte(i))
  2836  	return
  2837  
  2838  bad:
  2839  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2840  	asmbuf.Put1(0)
  2841  	return
  2842  }
  2843  
  2844  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2845  	var rel obj.Reloc
  2846  
  2847  	v := vaddr(ctxt, p, a, &rel)
  2848  	if rel.Siz != 0 {
  2849  		if rel.Siz != 4 {
  2850  			ctxt.Diag("bad reloc")
  2851  		}
  2852  		r := obj.Addrel(cursym)
  2853  		*r = rel
  2854  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2855  	}
  2856  
  2857  	asmbuf.PutInt32(int32(v))
  2858  }
  2859  
  2860  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2861  	if r != nil {
  2862  		*r = obj.Reloc{}
  2863  	}
  2864  
  2865  	switch a.Name {
  2866  	case obj.NAME_STATIC,
  2867  		obj.NAME_GOTREF,
  2868  		obj.NAME_EXTERN:
  2869  		s := a.Sym
  2870  		if r == nil {
  2871  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2872  			log.Fatalf("reloc")
  2873  		}
  2874  
  2875  		if a.Name == obj.NAME_GOTREF {
  2876  			r.Siz = 4
  2877  			r.Type = objabi.R_GOTPCREL
  2878  		} else if useAbs(ctxt, s) {
  2879  			r.Siz = 4
  2880  			r.Type = objabi.R_ADDR
  2881  		} else {
  2882  			r.Siz = 4
  2883  			r.Type = objabi.R_PCREL
  2884  		}
  2885  
  2886  		r.Off = -1 // caller must fill in
  2887  		r.Sym = s
  2888  		r.Add = a.Offset
  2889  
  2890  		return 0
  2891  	}
  2892  
  2893  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2894  		if r == nil {
  2895  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2896  			log.Fatalf("reloc")
  2897  		}
  2898  
  2899  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2900  			r.Type = objabi.R_TLS_LE
  2901  			r.Siz = 4
  2902  			r.Off = -1 // caller must fill in
  2903  			r.Add = a.Offset
  2904  		}
  2905  		return 0
  2906  	}
  2907  
  2908  	return a.Offset
  2909  }
  2910  
  2911  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2912  	var base int
  2913  	var rel obj.Reloc
  2914  
  2915  	rex &= 0x40 | Rxr
  2916  	switch {
  2917  	case int64(int32(a.Offset)) == a.Offset:
  2918  		// Offset fits in sign-extended 32 bits.
  2919  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  2920  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2921  		// This is allowed for assembly that wants to use 32-bit hex
  2922  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2923  	default:
  2924  		ctxt.Diag("offset too large in %s", p)
  2925  	}
  2926  	v := int32(a.Offset)
  2927  	rel.Siz = 0
  2928  
  2929  	switch a.Type {
  2930  	case obj.TYPE_ADDR:
  2931  		if a.Name == obj.NAME_NONE {
  2932  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2933  		}
  2934  		if a.Index == REG_TLS {
  2935  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2936  		}
  2937  		goto bad
  2938  
  2939  	case obj.TYPE_REG:
  2940  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2941  			goto bad
  2942  		}
  2943  		if v != 0 {
  2944  			goto bad
  2945  		}
  2946  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2947  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2948  		return
  2949  	}
  2950  
  2951  	if a.Type != obj.TYPE_MEM {
  2952  		goto bad
  2953  	}
  2954  
  2955  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2956  		base := int(a.Reg)
  2957  		switch a.Name {
  2958  		case obj.NAME_EXTERN,
  2959  			obj.NAME_GOTREF,
  2960  			obj.NAME_STATIC:
  2961  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  2962  				goto bad
  2963  			}
  2964  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  2965  				// The base register has already been set. It holds the PC
  2966  				// of this instruction returned by a PC-reading thunk.
  2967  				// See obj6.go:rewriteToPcrel.
  2968  			} else {
  2969  				base = REG_NONE
  2970  			}
  2971  			v = int32(vaddr(ctxt, p, a, &rel))
  2972  
  2973  		case obj.NAME_AUTO,
  2974  			obj.NAME_PARAM:
  2975  			base = REG_SP
  2976  		}
  2977  
  2978  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2979  		if base == REG_NONE {
  2980  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2981  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2982  			goto putrelv
  2983  		}
  2984  
  2985  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2986  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2987  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2988  			return
  2989  		}
  2990  
  2991  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2992  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2993  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2994  			asmbuf.Put1(byte(v))
  2995  			return
  2996  		}
  2997  
  2998  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2999  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3000  		goto putrelv
  3001  	}
  3002  
  3003  	base = int(a.Reg)
  3004  	switch a.Name {
  3005  	case obj.NAME_STATIC,
  3006  		obj.NAME_GOTREF,
  3007  		obj.NAME_EXTERN:
  3008  		if a.Sym == nil {
  3009  			ctxt.Diag("bad addr: %v", p)
  3010  		}
  3011  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3012  			// The base register has already been set. It holds the PC
  3013  			// of this instruction returned by a PC-reading thunk.
  3014  			// See obj6.go:rewriteToPcrel.
  3015  		} else {
  3016  			base = REG_NONE
  3017  		}
  3018  		v = int32(vaddr(ctxt, p, a, &rel))
  3019  
  3020  	case obj.NAME_AUTO,
  3021  		obj.NAME_PARAM:
  3022  		base = REG_SP
  3023  	}
  3024  
  3025  	if base == REG_TLS {
  3026  		v = int32(vaddr(ctxt, p, a, &rel))
  3027  	}
  3028  
  3029  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  3030  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3031  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3032  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3033  				ctxt.Diag("%v has offset against gotref", p)
  3034  			}
  3035  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  3036  			goto putrelv
  3037  		}
  3038  
  3039  		// temporary
  3040  		asmbuf.Put2(
  3041  			byte(0<<6|4<<0|r<<3), // sib present
  3042  			0<<6|4<<3|5<<0,       // DS:d32
  3043  		)
  3044  		goto putrelv
  3045  	}
  3046  
  3047  	if base == REG_SP || base == REG_R12 {
  3048  		if v == 0 {
  3049  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3050  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3051  			return
  3052  		}
  3053  
  3054  		if v >= -128 && v < 128 {
  3055  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3056  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3057  			asmbuf.Put1(byte(v))
  3058  			return
  3059  		}
  3060  
  3061  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3062  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3063  		goto putrelv
  3064  	}
  3065  
  3066  	if REG_AX <= base && base <= REG_R15 {
  3067  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3068  			rel = obj.Reloc{}
  3069  			rel.Type = objabi.R_TLS_LE
  3070  			rel.Siz = 4
  3071  			rel.Sym = nil
  3072  			rel.Add = int64(v)
  3073  			v = 0
  3074  		}
  3075  
  3076  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3077  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3078  			return
  3079  		}
  3080  
  3081  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3082  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3083  			return
  3084  		}
  3085  
  3086  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3087  		goto putrelv
  3088  	}
  3089  
  3090  	goto bad
  3091  
  3092  putrelv:
  3093  	if rel.Siz != 0 {
  3094  		if rel.Siz != 4 {
  3095  			ctxt.Diag("bad rel")
  3096  			goto bad
  3097  		}
  3098  
  3099  		r := obj.Addrel(cursym)
  3100  		*r = rel
  3101  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3102  	}
  3103  
  3104  	asmbuf.PutInt32(v)
  3105  	return
  3106  
  3107  bad:
  3108  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3109  	return
  3110  }
  3111  
  3112  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3113  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3114  }
  3115  
  3116  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3117  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3118  }
  3119  
  3120  func bytereg(a *obj.Addr, t *uint8) {
  3121  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3122  		a.Reg += REG_AL - REG_AX
  3123  		*t = 0
  3124  	}
  3125  }
  3126  
  3127  func unbytereg(a *obj.Addr, t *uint8) {
  3128  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3129  		a.Reg += REG_AX - REG_AL
  3130  		*t = 0
  3131  	}
  3132  }
  3133  
  3134  const (
  3135  	E = 0xff
  3136  )
  3137  
  3138  var ymovtab = []Movtab{
  3139  	/* push */
  3140  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3141  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3142  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3143  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3144  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3145  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3146  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3147  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3148  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3149  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3150  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3151  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3152  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3153  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3154  
  3155  	/* pop */
  3156  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3157  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3158  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3159  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3160  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3161  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3162  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3163  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3164  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3165  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3166  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3167  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3168  
  3169  	/* mov seg */
  3170  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3171  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3172  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3173  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3174  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3175  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3176  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3177  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3178  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3179  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3180  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3181  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3182  
  3183  	/* mov cr */
  3184  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3185  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3186  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3187  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3188  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3189  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3190  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3191  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3192  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3193  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3194  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3195  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3196  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3197  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3198  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3199  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3200  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3201  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3202  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3203  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3204  
  3205  	/* mov dr */
  3206  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3207  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3208  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3209  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3210  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3211  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3212  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3213  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3214  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3215  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3216  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3217  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3218  
  3219  	/* mov tr */
  3220  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3221  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3222  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3223  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3224  
  3225  	/* lgdt, sgdt, lidt, sidt */
  3226  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3227  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3228  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3229  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3230  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3231  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3232  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3233  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3234  
  3235  	/* lldt, sldt */
  3236  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3237  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3238  
  3239  	/* lmsw, smsw */
  3240  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3241  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3242  
  3243  	/* ltr, str */
  3244  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3245  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3246  
  3247  	/* load full pointer - unsupported
  3248  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3249  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3250  	*/
  3251  
  3252  	/* double shift */
  3253  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3254  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3255  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3256  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3257  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3258  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3259  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3260  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3261  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3262  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3263  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3264  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3265  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3266  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3267  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3268  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3269  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3270  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3271  
  3272  	/* load TLS base */
  3273  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3274  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3275  	{0, 0, 0, 0, 0, [4]uint8{}},
  3276  }
  3277  
  3278  func isax(a *obj.Addr) bool {
  3279  	switch a.Reg {
  3280  	case REG_AX, REG_AL, REG_AH:
  3281  		return true
  3282  	}
  3283  
  3284  	if a.Index == REG_AX {
  3285  		return true
  3286  	}
  3287  	return false
  3288  }
  3289  
  3290  func subreg(p *obj.Prog, from int, to int) {
  3291  	if false { /* debug['Q'] */
  3292  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3293  	}
  3294  
  3295  	if int(p.From.Reg) == from {
  3296  		p.From.Reg = int16(to)
  3297  		p.Ft = 0
  3298  	}
  3299  
  3300  	if int(p.To.Reg) == from {
  3301  		p.To.Reg = int16(to)
  3302  		p.Tt = 0
  3303  	}
  3304  
  3305  	if int(p.From.Index) == from {
  3306  		p.From.Index = int16(to)
  3307  		p.Ft = 0
  3308  	}
  3309  
  3310  	if int(p.To.Index) == from {
  3311  		p.To.Index = int16(to)
  3312  		p.Tt = 0
  3313  	}
  3314  
  3315  	if false { /* debug['Q'] */
  3316  		fmt.Printf("%v\n", p)
  3317  	}
  3318  }
  3319  
  3320  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3321  	switch op {
  3322  	case Pm, Pe, Pf2, Pf3:
  3323  		if osize != 1 {
  3324  			if op != Pm {
  3325  				asmbuf.Put1(byte(op))
  3326  			}
  3327  			asmbuf.Put1(Pm)
  3328  			z++
  3329  			op = int(o.op[z])
  3330  			break
  3331  		}
  3332  		fallthrough
  3333  
  3334  	default:
  3335  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3336  			asmbuf.Put1(Pm)
  3337  		}
  3338  	}
  3339  
  3340  	asmbuf.Put1(byte(op))
  3341  	return z
  3342  }
  3343  
  3344  var bpduff1 = []byte{
  3345  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3346  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3347  }
  3348  
  3349  var bpduff2 = []byte{
  3350  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3351  }
  3352  
  3353  // Emit VEX prefix and opcode byte.
  3354  // The three addresses are the r/m, vvvv, and reg fields.
  3355  // The reg and rm arguments appear in the same order as the
  3356  // arguments to asmand, which typically follows the call to asmvex.
  3357  // The final two arguments are the VEX prefix (see encoding above)
  3358  // and the opcode byte.
  3359  // For details about vex prefix see:
  3360  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3361  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3362  	asmbuf.vexflag = 1
  3363  	rexR := 0
  3364  	if r != nil {
  3365  		rexR = regrex[r.Reg] & Rxr
  3366  	}
  3367  	rexB := 0
  3368  	rexX := 0
  3369  	if rm != nil {
  3370  		rexB = regrex[rm.Reg] & Rxb
  3371  		rexX = regrex[rm.Index] & Rxx
  3372  	}
  3373  	vexM := (vex >> 3) & 0xF
  3374  	vexWLP := vex & 0x87
  3375  	vexV := byte(0)
  3376  	if v != nil {
  3377  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3378  	}
  3379  	vexV ^= 0xF
  3380  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3381  		// Can use 2-byte encoding.
  3382  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3383  	} else {
  3384  		// Must use 3-byte encoding.
  3385  		asmbuf.Put3(0xc4,
  3386  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3387  			vexV<<3|vexWLP,
  3388  		)
  3389  	}
  3390  	asmbuf.Put1(opcode)
  3391  }
  3392  
  3393  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3394  	o := opindex[p.As&obj.AMask]
  3395  
  3396  	if o == nil {
  3397  		ctxt.Diag("asmins: missing op %v", p)
  3398  		return
  3399  	}
  3400  
  3401  	pre := prefixof(ctxt, p, &p.From)
  3402  	if pre != 0 {
  3403  		asmbuf.Put1(byte(pre))
  3404  	}
  3405  	pre = prefixof(ctxt, p, &p.To)
  3406  	if pre != 0 {
  3407  		asmbuf.Put1(byte(pre))
  3408  	}
  3409  
  3410  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3411  	// which encodes as SHRQ $32(DX*0), AX.
  3412  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3413  	// Change encoding generated by assemblers and compilers and remove.
  3414  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3415  		p.SetFrom3(obj.Addr{
  3416  			Type: obj.TYPE_REG,
  3417  			Reg:  p.From.Index,
  3418  		})
  3419  		p.From.Index = 0
  3420  	}
  3421  
  3422  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3423  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3424  	switch p.As {
  3425  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3426  		if p.From3Type() == obj.TYPE_NONE {
  3427  			p.SetFrom3(p.From)
  3428  			p.From = obj.Addr{}
  3429  			p.From.Type = obj.TYPE_CONST
  3430  			p.From.Offset = p.To.Offset
  3431  			p.To.Offset = 0
  3432  		}
  3433  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3434  		if p.From3Type() == obj.TYPE_NONE {
  3435  			p.SetFrom3(p.To)
  3436  			p.To = obj.Addr{}
  3437  			p.To.Type = obj.TYPE_CONST
  3438  			p.To.Offset = p.GetFrom3().Offset
  3439  			p.GetFrom3().Offset = 0
  3440  		}
  3441  	}
  3442  
  3443  	if p.Ft == 0 {
  3444  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3445  	}
  3446  	if p.Tt == 0 {
  3447  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3448  	}
  3449  
  3450  	ft := int(p.Ft) * Ymax
  3451  	var f3t int
  3452  	tt := int(p.Tt) * Ymax
  3453  
  3454  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3455  	z := 0
  3456  	var a *obj.Addr
  3457  	var l int
  3458  	var op int
  3459  	var q *obj.Prog
  3460  	var r *obj.Reloc
  3461  	var rel obj.Reloc
  3462  	var v int64
  3463  
  3464  	args := make([]int, 0, 6)
  3465  	if ft != Ynone*Ymax {
  3466  		args = append(args, ft)
  3467  	}
  3468  	for i := range p.RestArgs {
  3469  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  3470  	}
  3471  	if tt != Ynone*Ymax {
  3472  		args = append(args, tt)
  3473  	}
  3474  
  3475  	for _, yt := range o.ytab {
  3476  		if !yt.match(args) {
  3477  			z += int(yt.zoffset) + xo
  3478  		} else {
  3479  			switch o.prefix {
  3480  			case Px1: /* first option valid only in 32-bit mode */
  3481  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3482  					z += int(yt.zoffset) + xo
  3483  					continue
  3484  				}
  3485  			case Pq: /* 16 bit escape and opcode escape */
  3486  				asmbuf.Put2(Pe, Pm)
  3487  
  3488  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3489  				asmbuf.rexflag |= Pw
  3490  				asmbuf.Put2(Pe, Pm)
  3491  
  3492  			case Pq4: /*  66 0F 38 */
  3493  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3494  
  3495  			case Pf2, /* xmm opcode escape */
  3496  				Pf3:
  3497  				asmbuf.Put2(o.prefix, Pm)
  3498  
  3499  			case Pef3:
  3500  				asmbuf.Put3(Pe, Pf3, Pm)
  3501  
  3502  			case Pfw: /* xmm opcode escape + REX.W */
  3503  				asmbuf.rexflag |= Pw
  3504  				asmbuf.Put2(Pf3, Pm)
  3505  
  3506  			case Pm: /* opcode escape */
  3507  				asmbuf.Put1(Pm)
  3508  
  3509  			case Pe: /* 16 bit escape */
  3510  				asmbuf.Put1(Pe)
  3511  
  3512  			case Pw: /* 64-bit escape */
  3513  				if ctxt.Arch.Family != sys.AMD64 {
  3514  					ctxt.Diag("asmins: illegal 64: %v", p)
  3515  				}
  3516  				asmbuf.rexflag |= Pw
  3517  
  3518  			case Pw8: /* 64-bit escape if z >= 8 */
  3519  				if z >= 8 {
  3520  					if ctxt.Arch.Family != sys.AMD64 {
  3521  						ctxt.Diag("asmins: illegal 64: %v", p)
  3522  					}
  3523  					asmbuf.rexflag |= Pw
  3524  				}
  3525  
  3526  			case Pb: /* botch */
  3527  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3528  					goto bad
  3529  				}
  3530  				// NOTE(rsc): This is probably safe to do always,
  3531  				// but when enabled it chooses different encodings
  3532  				// than the old cmd/internal/obj/i386 code did,
  3533  				// which breaks our "same bits out" checks.
  3534  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3535  				// in the original obj/i386, and it would encode
  3536  				// (using a valid, shorter form) as 3c 00 if we enabled
  3537  				// the call to bytereg here.
  3538  				if ctxt.Arch.Family == sys.AMD64 {
  3539  					bytereg(&p.From, &p.Ft)
  3540  					bytereg(&p.To, &p.Tt)
  3541  				}
  3542  
  3543  			case P32: /* 32 bit but illegal if 64-bit mode */
  3544  				if ctxt.Arch.Family == sys.AMD64 {
  3545  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3546  				}
  3547  
  3548  			case Py: /* 64-bit only, no prefix */
  3549  				if ctxt.Arch.Family != sys.AMD64 {
  3550  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3551  				}
  3552  
  3553  			case Py1: /* 64-bit only if z < 1, no prefix */
  3554  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3555  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3556  				}
  3557  
  3558  			case Py3: /* 64-bit only if z < 3, no prefix */
  3559  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3560  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3561  				}
  3562  			}
  3563  
  3564  			if z >= len(o.op) {
  3565  				log.Fatalf("asmins bad table %v", p)
  3566  			}
  3567  			op = int(o.op[z])
  3568  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3569  			if op == 0x0f && o.prefix != Pvex {
  3570  				asmbuf.Put1(byte(op))
  3571  				z++
  3572  				op = int(o.op[z])
  3573  			}
  3574  
  3575  			switch yt.zcase {
  3576  			default:
  3577  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3578  				return
  3579  
  3580  			case Zpseudo:
  3581  				break
  3582  
  3583  			case Zlit:
  3584  				for ; ; z++ {
  3585  					op = int(o.op[z])
  3586  					if op == 0 {
  3587  						break
  3588  					}
  3589  					asmbuf.Put1(byte(op))
  3590  				}
  3591  
  3592  			case Zlitm_r:
  3593  				for ; ; z++ {
  3594  					op = int(o.op[z])
  3595  					if op == 0 {
  3596  						break
  3597  					}
  3598  					asmbuf.Put1(byte(op))
  3599  				}
  3600  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3601  
  3602  			case Zmb_r:
  3603  				bytereg(&p.From, &p.Ft)
  3604  				fallthrough
  3605  
  3606  			case Zm_r:
  3607  				asmbuf.Put1(byte(op))
  3608  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3609  
  3610  			case Zm2_r:
  3611  				asmbuf.Put2(byte(op), o.op[z+1])
  3612  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3613  
  3614  			case Zm_r_xm:
  3615  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3616  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3617  
  3618  			case Zm_r_xm_nr:
  3619  				asmbuf.rexflag = 0
  3620  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3621  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3622  
  3623  			case Zm_r_i_xm:
  3624  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3625  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  3626  				asmbuf.Put1(byte(p.To.Offset))
  3627  
  3628  			case Zibm_r, Zibr_m:
  3629  				for {
  3630  					tmp1 := z
  3631  					z++
  3632  					op = int(o.op[tmp1])
  3633  					if op == 0 {
  3634  						break
  3635  					}
  3636  					asmbuf.Put1(byte(op))
  3637  				}
  3638  				if yt.zcase == Zibr_m {
  3639  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  3640  				} else {
  3641  					asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3642  				}
  3643  				asmbuf.Put1(byte(p.From.Offset))
  3644  
  3645  			case Zaut_r:
  3646  				asmbuf.Put1(0x8d) // leal
  3647  				if p.From.Type != obj.TYPE_ADDR {
  3648  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3649  				}
  3650  				p.From.Type = obj.TYPE_MEM
  3651  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3652  				p.From.Type = obj.TYPE_ADDR
  3653  
  3654  			case Zm_o:
  3655  				asmbuf.Put1(byte(op))
  3656  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3657  
  3658  			case Zr_m:
  3659  				asmbuf.Put1(byte(op))
  3660  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3661  
  3662  			case Zvex_rm_v_r:
  3663  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3664  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3665  
  3666  			case Zvex_i_r_v:
  3667  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  3668  				regnum := byte(0x7)
  3669  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  3670  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  3671  				} else {
  3672  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  3673  				}
  3674  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3675  				asmbuf.Put1(byte(p.From.Offset))
  3676  
  3677  			case Zvex_i_rm_v_r:
  3678  				imm, from, from3, to := unpackOps4(p)
  3679  				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  3680  				asmbuf.asmand(ctxt, cursym, p, from, to)
  3681  				asmbuf.Put1(byte(imm.Offset))
  3682  
  3683  			case Zvex_i_rm_r:
  3684  				asmbuf.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  3685  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3686  				asmbuf.Put1(byte(p.From.Offset))
  3687  
  3688  			case Zvex_v_rm_r:
  3689  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  3690  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3691  
  3692  			case Zvex_r_v_rm:
  3693  				asmbuf.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  3694  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3695  
  3696  			case Zr_m_xm:
  3697  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3698  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3699  
  3700  			case Zr_m_xm_nr:
  3701  				asmbuf.rexflag = 0
  3702  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3703  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3704  
  3705  			case Zo_m:
  3706  				asmbuf.Put1(byte(op))
  3707  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3708  
  3709  			case Zcallindreg:
  3710  				r = obj.Addrel(cursym)
  3711  				r.Off = int32(p.Pc)
  3712  				r.Type = objabi.R_CALLIND
  3713  				r.Siz = 0
  3714  				fallthrough
  3715  
  3716  			case Zo_m64:
  3717  				asmbuf.Put1(byte(op))
  3718  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3719  
  3720  			case Zm_ibo:
  3721  				asmbuf.Put1(byte(op))
  3722  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3723  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3724  
  3725  			case Zibo_m:
  3726  				asmbuf.Put1(byte(op))
  3727  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3728  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3729  
  3730  			case Zibo_m_xm:
  3731  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3732  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3733  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3734  
  3735  			case Z_ib, Zib_:
  3736  				if yt.zcase == Zib_ {
  3737  					a = &p.From
  3738  				} else {
  3739  					a = &p.To
  3740  				}
  3741  				asmbuf.Put1(byte(op))
  3742  				if p.As == AXABORT {
  3743  					asmbuf.Put1(o.op[z+1])
  3744  				}
  3745  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3746  
  3747  			case Zib_rp:
  3748  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3749  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3750  
  3751  			case Zil_rp:
  3752  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3753  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3754  				if o.prefix == Pe {
  3755  					v = vaddr(ctxt, p, &p.From, nil)
  3756  					asmbuf.PutInt16(int16(v))
  3757  				} else {
  3758  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3759  				}
  3760  
  3761  			case Zo_iw:
  3762  				asmbuf.Put1(byte(op))
  3763  				if p.From.Type != obj.TYPE_NONE {
  3764  					v = vaddr(ctxt, p, &p.From, nil)
  3765  					asmbuf.PutInt16(int16(v))
  3766  				}
  3767  
  3768  			case Ziq_rp:
  3769  				v = vaddr(ctxt, p, &p.From, &rel)
  3770  				l = int(v >> 32)
  3771  				if l == 0 && rel.Siz != 8 {
  3772  					//p->mark |= 0100;
  3773  					//print("zero: %llux %v\n", v, p);
  3774  					asmbuf.rexflag &^= (0x40 | Rxw)
  3775  
  3776  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3777  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3778  					if rel.Type != 0 {
  3779  						r = obj.Addrel(cursym)
  3780  						*r = rel
  3781  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3782  					}
  3783  
  3784  					asmbuf.PutInt32(int32(v))
  3785  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3786  
  3787  					//p->mark |= 0100;
  3788  					//print("sign: %llux %v\n", v, p);
  3789  					asmbuf.Put1(0xc7)
  3790  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3791  
  3792  					asmbuf.PutInt32(int32(v)) // need all 8
  3793  				} else {
  3794  					//print("all: %llux %v\n", v, p);
  3795  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3796  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3797  					if rel.Type != 0 {
  3798  						r = obj.Addrel(cursym)
  3799  						*r = rel
  3800  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3801  					}
  3802  
  3803  					asmbuf.PutInt64(v)
  3804  				}
  3805  
  3806  			case Zib_rr:
  3807  				asmbuf.Put1(byte(op))
  3808  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3809  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3810  
  3811  			case Z_il, Zil_:
  3812  				if yt.zcase == Zil_ {
  3813  					a = &p.From
  3814  				} else {
  3815  					a = &p.To
  3816  				}
  3817  				asmbuf.Put1(byte(op))
  3818  				if o.prefix == Pe {
  3819  					v = vaddr(ctxt, p, a, nil)
  3820  					asmbuf.PutInt16(int16(v))
  3821  				} else {
  3822  					asmbuf.relput4(ctxt, cursym, p, a)
  3823  				}
  3824  
  3825  			case Zm_ilo, Zilo_m:
  3826  				asmbuf.Put1(byte(op))
  3827  				if yt.zcase == Zilo_m {
  3828  					a = &p.From
  3829  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3830  				} else {
  3831  					a = &p.To
  3832  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3833  				}
  3834  
  3835  				if o.prefix == Pe {
  3836  					v = vaddr(ctxt, p, a, nil)
  3837  					asmbuf.PutInt16(int16(v))
  3838  				} else {
  3839  					asmbuf.relput4(ctxt, cursym, p, a)
  3840  				}
  3841  
  3842  			case Zil_rr:
  3843  				asmbuf.Put1(byte(op))
  3844  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3845  				if o.prefix == Pe {
  3846  					v = vaddr(ctxt, p, &p.From, nil)
  3847  					asmbuf.PutInt16(int16(v))
  3848  				} else {
  3849  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3850  				}
  3851  
  3852  			case Z_rp:
  3853  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3854  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3855  
  3856  			case Zrp_:
  3857  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3858  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3859  
  3860  			case Zclr:
  3861  				asmbuf.rexflag &^= Pw
  3862  				asmbuf.Put1(byte(op))
  3863  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3864  
  3865  			case Zcallcon, Zjmpcon:
  3866  				if yt.zcase == Zcallcon {
  3867  					asmbuf.Put1(byte(op))
  3868  				} else {
  3869  					asmbuf.Put1(o.op[z+1])
  3870  				}
  3871  				r = obj.Addrel(cursym)
  3872  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3873  				r.Type = objabi.R_PCREL
  3874  				r.Siz = 4
  3875  				r.Add = p.To.Offset
  3876  				asmbuf.PutInt32(0)
  3877  
  3878  			case Zcallind:
  3879  				asmbuf.Put2(byte(op), o.op[z+1])
  3880  				r = obj.Addrel(cursym)
  3881  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3882  				if ctxt.Arch.Family == sys.AMD64 {
  3883  					r.Type = objabi.R_PCREL
  3884  				} else {
  3885  					r.Type = objabi.R_ADDR
  3886  				}
  3887  				r.Siz = 4
  3888  				r.Add = p.To.Offset
  3889  				r.Sym = p.To.Sym
  3890  				asmbuf.PutInt32(0)
  3891  
  3892  			case Zcall, Zcallduff:
  3893  				if p.To.Sym == nil {
  3894  					ctxt.Diag("call without target")
  3895  					log.Fatalf("bad code")
  3896  				}
  3897  
  3898  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3899  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3900  				}
  3901  
  3902  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3903  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3904  					// (the call jumps into the middle of the function).
  3905  					// This makes it possible to see call sites for duffcopy/duffzero in
  3906  					// BP-based profiling tools like Linux perf (which is the
  3907  					// whole point of obj.Framepointer_enabled).
  3908  					// MOVQ BP, -16(SP)
  3909  					// LEAQ -16(SP), BP
  3910  					asmbuf.Put(bpduff1)
  3911  				}
  3912  				asmbuf.Put1(byte(op))
  3913  				r = obj.Addrel(cursym)
  3914  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3915  				r.Sym = p.To.Sym
  3916  				r.Add = p.To.Offset
  3917  				r.Type = objabi.R_CALL
  3918  				r.Siz = 4
  3919  				asmbuf.PutInt32(0)
  3920  
  3921  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  3922  					// Pop BP pushed above.
  3923  					// MOVQ 0(BP), BP
  3924  					asmbuf.Put(bpduff2)
  3925  				}
  3926  
  3927  			// TODO: jump across functions needs reloc
  3928  			case Zbr, Zjmp, Zloop:
  3929  				if p.As == AXBEGIN {
  3930  					asmbuf.Put1(byte(op))
  3931  				}
  3932  				if p.To.Sym != nil {
  3933  					if yt.zcase != Zjmp {
  3934  						ctxt.Diag("branch to ATEXT")
  3935  						log.Fatalf("bad code")
  3936  					}
  3937  
  3938  					asmbuf.Put1(o.op[z+1])
  3939  					r = obj.Addrel(cursym)
  3940  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3941  					r.Sym = p.To.Sym
  3942  					r.Type = objabi.R_PCREL
  3943  					r.Siz = 4
  3944  					asmbuf.PutInt32(0)
  3945  					break
  3946  				}
  3947  
  3948  				// Assumes q is in this function.
  3949  				// TODO: Check in input, preserve in brchain.
  3950  
  3951  				// Fill in backward jump now.
  3952  				q = p.Pcond
  3953  
  3954  				if q == nil {
  3955  					ctxt.Diag("jmp/branch/loop without target")
  3956  					log.Fatalf("bad code")
  3957  				}
  3958  
  3959  				if p.Back&1 != 0 {
  3960  					v = q.Pc - (p.Pc + 2)
  3961  					if v >= -128 && p.As != AXBEGIN {
  3962  						if p.As == AJCXZL {
  3963  							asmbuf.Put1(0x67)
  3964  						}
  3965  						asmbuf.Put2(byte(op), byte(v))
  3966  					} else if yt.zcase == Zloop {
  3967  						ctxt.Diag("loop too far: %v", p)
  3968  					} else {
  3969  						v -= 5 - 2
  3970  						if p.As == AXBEGIN {
  3971  							v--
  3972  						}
  3973  						if yt.zcase == Zbr {
  3974  							asmbuf.Put1(0x0f)
  3975  							v--
  3976  						}
  3977  
  3978  						asmbuf.Put1(o.op[z+1])
  3979  						asmbuf.PutInt32(int32(v))
  3980  					}
  3981  
  3982  					break
  3983  				}
  3984  
  3985  				// Annotate target; will fill in later.
  3986  				p.Forwd = q.Rel
  3987  
  3988  				q.Rel = p
  3989  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3990  					if p.As == AJCXZL {
  3991  						asmbuf.Put1(0x67)
  3992  					}
  3993  					asmbuf.Put2(byte(op), 0)
  3994  				} else if yt.zcase == Zloop {
  3995  					ctxt.Diag("loop too far: %v", p)
  3996  				} else {
  3997  					if yt.zcase == Zbr {
  3998  						asmbuf.Put1(0x0f)
  3999  					}
  4000  					asmbuf.Put1(o.op[z+1])
  4001  					asmbuf.PutInt32(0)
  4002  				}
  4003  
  4004  				break
  4005  
  4006  			/*
  4007  				v = q->pc - p->pc - 2;
  4008  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  4009  					*ctxt->andptr++ = op;
  4010  					*ctxt->andptr++ = v;
  4011  				} else {
  4012  					v -= 5-2;
  4013  					if(yt.zcase == Zbr) {
  4014  						*ctxt->andptr++ = 0x0f;
  4015  						v--;
  4016  					}
  4017  					*ctxt->andptr++ = o->op[z+1];
  4018  					*ctxt->andptr++ = v;
  4019  					*ctxt->andptr++ = v>>8;
  4020  					*ctxt->andptr++ = v>>16;
  4021  					*ctxt->andptr++ = v>>24;
  4022  				}
  4023  			*/
  4024  
  4025  			case Zbyte:
  4026  				v = vaddr(ctxt, p, &p.From, &rel)
  4027  				if rel.Siz != 0 {
  4028  					rel.Siz = uint8(op)
  4029  					r = obj.Addrel(cursym)
  4030  					*r = rel
  4031  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4032  				}
  4033  
  4034  				asmbuf.Put1(byte(v))
  4035  				if op > 1 {
  4036  					asmbuf.Put1(byte(v >> 8))
  4037  					if op > 2 {
  4038  						asmbuf.PutInt16(int16(v >> 16))
  4039  						if op > 4 {
  4040  							asmbuf.PutInt32(int32(v >> 32))
  4041  						}
  4042  					}
  4043  				}
  4044  			}
  4045  
  4046  			return
  4047  		}
  4048  	}
  4049  	f3t = Ynone * Ymax
  4050  	if p.GetFrom3() != nil {
  4051  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4052  	}
  4053  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4054  		var pp obj.Prog
  4055  		var t []byte
  4056  		if p.As == mo[0].as {
  4057  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4058  				t = mo[0].op[:]
  4059  				switch mo[0].code {
  4060  				default:
  4061  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4062  
  4063  				case 0: /* lit */
  4064  					for z = 0; t[z] != E; z++ {
  4065  						asmbuf.Put1(t[z])
  4066  					}
  4067  
  4068  				case 1: /* r,m */
  4069  					asmbuf.Put1(t[0])
  4070  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4071  
  4072  				case 2: /* m,r */
  4073  					asmbuf.Put1(t[0])
  4074  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4075  
  4076  				case 3: /* r,m - 2op */
  4077  					asmbuf.Put2(t[0], t[1])
  4078  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4079  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4080  
  4081  				case 4: /* m,r - 2op */
  4082  					asmbuf.Put2(t[0], t[1])
  4083  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4084  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4085  
  4086  				case 5: /* load full pointer, trash heap */
  4087  					if t[0] != 0 {
  4088  						asmbuf.Put1(t[0])
  4089  					}
  4090  					switch p.To.Index {
  4091  					default:
  4092  						goto bad
  4093  
  4094  					case REG_DS:
  4095  						asmbuf.Put1(0xc5)
  4096  
  4097  					case REG_SS:
  4098  						asmbuf.Put2(0x0f, 0xb2)
  4099  
  4100  					case REG_ES:
  4101  						asmbuf.Put1(0xc4)
  4102  
  4103  					case REG_FS:
  4104  						asmbuf.Put2(0x0f, 0xb4)
  4105  
  4106  					case REG_GS:
  4107  						asmbuf.Put2(0x0f, 0xb5)
  4108  					}
  4109  
  4110  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4111  
  4112  				case 6: /* double shift */
  4113  					if t[0] == Pw {
  4114  						if ctxt.Arch.Family != sys.AMD64 {
  4115  							ctxt.Diag("asmins: illegal 64: %v", p)
  4116  						}
  4117  						asmbuf.rexflag |= Pw
  4118  						t = t[1:]
  4119  					} else if t[0] == Pe {
  4120  						asmbuf.Put1(Pe)
  4121  						t = t[1:]
  4122  					}
  4123  
  4124  					switch p.From.Type {
  4125  					default:
  4126  						goto bad
  4127  
  4128  					case obj.TYPE_CONST:
  4129  						asmbuf.Put2(0x0f, t[0])
  4130  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4131  						asmbuf.Put1(byte(p.From.Offset))
  4132  
  4133  					case obj.TYPE_REG:
  4134  						switch p.From.Reg {
  4135  						default:
  4136  							goto bad
  4137  
  4138  						case REG_CL, REG_CX:
  4139  							asmbuf.Put2(0x0f, t[1])
  4140  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4141  						}
  4142  					}
  4143  
  4144  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4145  				// where you load the TLS base register into a register and then index off that
  4146  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4147  				// are handled in prefixof above and should not be listed here.
  4148  				case 7: /* mov tls, r */
  4149  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4150  						ctxt.Diag("invalid load of TLS: %v", p)
  4151  					}
  4152  
  4153  					if ctxt.Arch.Family == sys.I386 {
  4154  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4155  						// where you load the TLS base register into a register and then index off that
  4156  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4157  						// are handled in prefixof above and should not be listed here.
  4158  						switch ctxt.Headtype {
  4159  						default:
  4160  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4161  
  4162  						case objabi.Hlinux,
  4163  							objabi.Hnacl:
  4164  							if ctxt.Flag_shared {
  4165  								// Note that this is not generating the same insns as the other cases.
  4166  								//     MOV TLS, dst
  4167  								// becomes
  4168  								//     call __x86.get_pc_thunk.dst
  4169  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4170  								// which is encoded as
  4171  								//     call __x86.get_pc_thunk.dst
  4172  								//     movq 0(dst), dst
  4173  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4174  								// is g, which we can't check here, but will when we assemble the second
  4175  								// instruction.
  4176  								dst := p.To.Reg
  4177  								asmbuf.Put1(0xe8)
  4178  								r = obj.Addrel(cursym)
  4179  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4180  								r.Type = objabi.R_CALL
  4181  								r.Siz = 4
  4182  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4183  								asmbuf.PutInt32(0)
  4184  
  4185  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4186  								r = obj.Addrel(cursym)
  4187  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4188  								r.Type = objabi.R_TLS_IE
  4189  								r.Siz = 4
  4190  								r.Add = 2
  4191  								asmbuf.PutInt32(0)
  4192  							} else {
  4193  								// ELF TLS base is 0(GS).
  4194  								pp.From = p.From
  4195  
  4196  								pp.From.Type = obj.TYPE_MEM
  4197  								pp.From.Reg = REG_GS
  4198  								pp.From.Offset = 0
  4199  								pp.From.Index = REG_NONE
  4200  								pp.From.Scale = 0
  4201  								asmbuf.Put2(0x65, // GS
  4202  									0x8B)
  4203  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4204  							}
  4205  						case objabi.Hplan9:
  4206  							pp.From = obj.Addr{}
  4207  							pp.From.Type = obj.TYPE_MEM
  4208  							pp.From.Name = obj.NAME_EXTERN
  4209  							pp.From.Sym = plan9privates
  4210  							pp.From.Offset = 0
  4211  							pp.From.Index = REG_NONE
  4212  							asmbuf.Put1(0x8B)
  4213  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4214  
  4215  						case objabi.Hwindows:
  4216  							// Windows TLS base is always 0x14(FS).
  4217  							pp.From = p.From
  4218  
  4219  							pp.From.Type = obj.TYPE_MEM
  4220  							pp.From.Reg = REG_FS
  4221  							pp.From.Offset = 0x14
  4222  							pp.From.Index = REG_NONE
  4223  							pp.From.Scale = 0
  4224  							asmbuf.Put2(0x64, // FS
  4225  								0x8B)
  4226  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4227  						}
  4228  						break
  4229  					}
  4230  
  4231  					switch ctxt.Headtype {
  4232  					default:
  4233  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4234  
  4235  					case objabi.Hlinux:
  4236  						if !ctxt.Flag_shared {
  4237  							log.Fatalf("unknown TLS base location for linux without -shared")
  4238  						}
  4239  						// Note that this is not generating the same insn as the other cases.
  4240  						//     MOV TLS, R_to
  4241  						// becomes
  4242  						//     movq g@gottpoff(%rip), R_to
  4243  						// which is encoded as
  4244  						//     movq 0(%rip), R_to
  4245  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4246  						// is g, which we can't check here, but will when we assemble the second
  4247  						// instruction.
  4248  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4249  
  4250  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4251  						r = obj.Addrel(cursym)
  4252  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4253  						r.Type = objabi.R_TLS_IE
  4254  						r.Siz = 4
  4255  						r.Add = -4
  4256  						asmbuf.PutInt32(0)
  4257  
  4258  					case objabi.Hplan9:
  4259  						pp.From = obj.Addr{}
  4260  						pp.From.Type = obj.TYPE_MEM
  4261  						pp.From.Name = obj.NAME_EXTERN
  4262  						pp.From.Sym = plan9privates
  4263  						pp.From.Offset = 0
  4264  						pp.From.Index = REG_NONE
  4265  						asmbuf.rexflag |= Pw
  4266  						asmbuf.Put1(0x8B)
  4267  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4268  
  4269  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4270  						// TLS base is 0(FS).
  4271  						pp.From = p.From
  4272  
  4273  						pp.From.Type = obj.TYPE_MEM
  4274  						pp.From.Name = obj.NAME_NONE
  4275  						pp.From.Reg = REG_NONE
  4276  						pp.From.Offset = 0
  4277  						pp.From.Index = REG_NONE
  4278  						pp.From.Scale = 0
  4279  						asmbuf.rexflag |= Pw
  4280  						asmbuf.Put2(0x64, // FS
  4281  							0x8B)
  4282  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4283  
  4284  					case objabi.Hwindows:
  4285  						// Windows TLS base is always 0x28(GS).
  4286  						pp.From = p.From
  4287  
  4288  						pp.From.Type = obj.TYPE_MEM
  4289  						pp.From.Name = obj.NAME_NONE
  4290  						pp.From.Reg = REG_GS
  4291  						pp.From.Offset = 0x28
  4292  						pp.From.Index = REG_NONE
  4293  						pp.From.Scale = 0
  4294  						asmbuf.rexflag |= Pw
  4295  						asmbuf.Put2(0x65, // GS
  4296  							0x8B)
  4297  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4298  					}
  4299  				}
  4300  				return
  4301  			}
  4302  		}
  4303  	}
  4304  	goto bad
  4305  
  4306  bad:
  4307  	if ctxt.Arch.Family != sys.AMD64 {
  4308  		/*
  4309  		 * here, the assembly has failed.
  4310  		 * if its a byte instruction that has
  4311  		 * unaddressable registers, try to
  4312  		 * exchange registers and reissue the
  4313  		 * instruction with the operands renamed.
  4314  		 */
  4315  		pp := *p
  4316  
  4317  		unbytereg(&pp.From, &pp.Ft)
  4318  		unbytereg(&pp.To, &pp.Tt)
  4319  
  4320  		z := int(p.From.Reg)
  4321  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4322  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4323  			// For now, different to keep bit-for-bit compatibility.
  4324  			if ctxt.Arch.Family == sys.I386 {
  4325  				breg := byteswapreg(ctxt, &p.To)
  4326  				if breg != REG_AX {
  4327  					asmbuf.Put1(0x87) // xchg lhs,bx
  4328  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4329  					subreg(&pp, z, breg)
  4330  					asmbuf.doasm(ctxt, cursym, &pp)
  4331  					asmbuf.Put1(0x87) // xchg lhs,bx
  4332  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4333  				} else {
  4334  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4335  					subreg(&pp, z, REG_AX)
  4336  					asmbuf.doasm(ctxt, cursym, &pp)
  4337  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4338  				}
  4339  				return
  4340  			}
  4341  
  4342  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4343  				// We certainly don't want to exchange
  4344  				// with AX if the op is MUL or DIV.
  4345  				asmbuf.Put1(0x87) // xchg lhs,bx
  4346  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4347  				subreg(&pp, z, REG_BX)
  4348  				asmbuf.doasm(ctxt, cursym, &pp)
  4349  				asmbuf.Put1(0x87) // xchg lhs,bx
  4350  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4351  			} else {
  4352  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4353  				subreg(&pp, z, REG_AX)
  4354  				asmbuf.doasm(ctxt, cursym, &pp)
  4355  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4356  			}
  4357  			return
  4358  		}
  4359  
  4360  		z = int(p.To.Reg)
  4361  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4362  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4363  			// For now, different to keep bit-for-bit compatibility.
  4364  			if ctxt.Arch.Family == sys.I386 {
  4365  				breg := byteswapreg(ctxt, &p.From)
  4366  				if breg != REG_AX {
  4367  					asmbuf.Put1(0x87) //xchg rhs,bx
  4368  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4369  					subreg(&pp, z, breg)
  4370  					asmbuf.doasm(ctxt, cursym, &pp)
  4371  					asmbuf.Put1(0x87) // xchg rhs,bx
  4372  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4373  				} else {
  4374  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4375  					subreg(&pp, z, REG_AX)
  4376  					asmbuf.doasm(ctxt, cursym, &pp)
  4377  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4378  				}
  4379  				return
  4380  			}
  4381  
  4382  			if isax(&p.From) {
  4383  				asmbuf.Put1(0x87) // xchg rhs,bx
  4384  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4385  				subreg(&pp, z, REG_BX)
  4386  				asmbuf.doasm(ctxt, cursym, &pp)
  4387  				asmbuf.Put1(0x87) // xchg rhs,bx
  4388  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4389  			} else {
  4390  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4391  				subreg(&pp, z, REG_AX)
  4392  				asmbuf.doasm(ctxt, cursym, &pp)
  4393  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4394  			}
  4395  			return
  4396  		}
  4397  	}
  4398  
  4399  	ctxt.Diag("invalid instruction: %v", p)
  4400  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4401  	return
  4402  }
  4403  
  4404  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4405  // which is not referenced in a.
  4406  // If a is empty, it returns BX to account for MULB-like instructions
  4407  // that might use DX and AX.
  4408  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4409  	cana, canb, canc, cand := true, true, true, true
  4410  	if a.Type == obj.TYPE_NONE {
  4411  		cana, cand = false, false
  4412  	}
  4413  
  4414  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4415  		switch a.Reg {
  4416  		case REG_NONE:
  4417  			cana, cand = false, false
  4418  		case REG_AX, REG_AL, REG_AH:
  4419  			cana = false
  4420  		case REG_BX, REG_BL, REG_BH:
  4421  			canb = false
  4422  		case REG_CX, REG_CL, REG_CH:
  4423  			canc = false
  4424  		case REG_DX, REG_DL, REG_DH:
  4425  			cand = false
  4426  		}
  4427  	}
  4428  
  4429  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4430  		switch a.Index {
  4431  		case REG_AX:
  4432  			cana = false
  4433  		case REG_BX:
  4434  			canb = false
  4435  		case REG_CX:
  4436  			canc = false
  4437  		case REG_DX:
  4438  			cand = false
  4439  		}
  4440  	}
  4441  
  4442  	switch {
  4443  	case cana:
  4444  		return REG_AX
  4445  	case canb:
  4446  		return REG_BX
  4447  	case canc:
  4448  		return REG_CX
  4449  	case cand:
  4450  		return REG_DX
  4451  	default:
  4452  		ctxt.Diag("impossible byte register")
  4453  		log.Fatalf("bad code")
  4454  		return 0
  4455  	}
  4456  }
  4457  
  4458  func isbadbyte(a *obj.Addr) bool {
  4459  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4460  }
  4461  
  4462  var naclret = []uint8{
  4463  	0x5e, // POPL SI
  4464  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4465  	0x83,
  4466  	0xe6,
  4467  	0xe0, // ANDL $~31, SI
  4468  	0x4c,
  4469  	0x01,
  4470  	0xfe, // ADDQ R15, SI
  4471  	0xff,
  4472  	0xe6, // JMP SI
  4473  }
  4474  
  4475  var naclret8 = []uint8{
  4476  	0x5d, // POPL BP
  4477  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4478  	0x83,
  4479  	0xe5,
  4480  	0xe0, // ANDL $~31, BP
  4481  	0xff,
  4482  	0xe5, // JMP BP
  4483  }
  4484  
  4485  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4486  
  4487  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4488  
  4489  var naclmovs = []uint8{
  4490  	0x89,
  4491  	0xf6, // MOVL SI, SI
  4492  	0x49,
  4493  	0x8d,
  4494  	0x34,
  4495  	0x37, // LEAQ (R15)(SI*1), SI
  4496  	0x89,
  4497  	0xff, // MOVL DI, DI
  4498  	0x49,
  4499  	0x8d,
  4500  	0x3c,
  4501  	0x3f, // LEAQ (R15)(DI*1), DI
  4502  }
  4503  
  4504  var naclstos = []uint8{
  4505  	0x89,
  4506  	0xff, // MOVL DI, DI
  4507  	0x49,
  4508  	0x8d,
  4509  	0x3c,
  4510  	0x3f, // LEAQ (R15)(DI*1), DI
  4511  }
  4512  
  4513  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4514  	if reg >= REG_R8 {
  4515  		asmbuf.Put1(0x45)
  4516  	}
  4517  	reg = (reg - REG_AX) & 7
  4518  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4519  }
  4520  
  4521  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4522  	asmbuf.Reset()
  4523  
  4524  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4525  		switch p.As {
  4526  		case obj.ARET:
  4527  			asmbuf.Put(naclret8)
  4528  			return
  4529  
  4530  		case obj.ACALL,
  4531  			obj.AJMP:
  4532  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4533  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4534  			}
  4535  
  4536  		case AINT:
  4537  			asmbuf.Put1(0xf4)
  4538  			return
  4539  		}
  4540  	}
  4541  
  4542  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4543  		if p.As == AREP {
  4544  			asmbuf.rep++
  4545  			return
  4546  		}
  4547  
  4548  		if p.As == AREPN {
  4549  			asmbuf.repn++
  4550  			return
  4551  		}
  4552  
  4553  		if p.As == ALOCK {
  4554  			asmbuf.lock = true
  4555  			return
  4556  		}
  4557  
  4558  		if p.As != ALEAQ && p.As != ALEAL {
  4559  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4560  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4561  			}
  4562  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4563  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4564  			}
  4565  		}
  4566  
  4567  		switch p.As {
  4568  		case obj.ARET:
  4569  			asmbuf.Put(naclret)
  4570  			return
  4571  
  4572  		case obj.ACALL,
  4573  			obj.AJMP:
  4574  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4575  				// ANDL $~31, reg
  4576  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4577  				// ADDQ R15, reg
  4578  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4579  			}
  4580  
  4581  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4582  				// ANDL $~31, reg
  4583  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4584  				// ADDQ R15, reg
  4585  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4586  			}
  4587  
  4588  		case AINT:
  4589  			asmbuf.Put1(0xf4)
  4590  			return
  4591  
  4592  		case ASCASB,
  4593  			ASCASW,
  4594  			ASCASL,
  4595  			ASCASQ,
  4596  			ASTOSB,
  4597  			ASTOSW,
  4598  			ASTOSL,
  4599  			ASTOSQ:
  4600  			asmbuf.Put(naclstos)
  4601  
  4602  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4603  			asmbuf.Put(naclmovs)
  4604  		}
  4605  
  4606  		if asmbuf.rep != 0 {
  4607  			asmbuf.Put1(0xf3)
  4608  			asmbuf.rep = 0
  4609  		}
  4610  
  4611  		if asmbuf.repn != 0 {
  4612  			asmbuf.Put1(0xf2)
  4613  			asmbuf.repn = 0
  4614  		}
  4615  
  4616  		if asmbuf.lock {
  4617  			asmbuf.Put1(0xf0)
  4618  			asmbuf.lock = false
  4619  		}
  4620  	}
  4621  
  4622  	asmbuf.rexflag = 0
  4623  	asmbuf.vexflag = 0
  4624  	mark := asmbuf.Len()
  4625  	asmbuf.doasm(ctxt, cursym, p)
  4626  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4627  		/*
  4628  		 * as befits the whole approach of the architecture,
  4629  		 * the rex prefix must appear before the first opcode byte
  4630  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4631  		 * before the 0f opcode escape!), or it might be ignored.
  4632  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4633  		 */
  4634  		if ctxt.Arch.Family != sys.AMD64 {
  4635  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4636  		}
  4637  		n := asmbuf.Len()
  4638  		var np int
  4639  		for np = mark; np < n; np++ {
  4640  			c := asmbuf.At(np)
  4641  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4642  				break
  4643  			}
  4644  		}
  4645  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4646  	}
  4647  
  4648  	n := asmbuf.Len()
  4649  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4650  		r := &cursym.R[i]
  4651  		if int64(r.Off) < p.Pc {
  4652  			break
  4653  		}
  4654  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4655  			r.Off++
  4656  		}
  4657  		if r.Type == objabi.R_PCREL {
  4658  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4659  				// PC-relative addressing is relative to the end of the instruction,
  4660  				// but the relocations applied by the linker are relative to the end
  4661  				// of the relocation. Because immediate instruction
  4662  				// arguments can follow the PC-relative memory reference in the
  4663  				// instruction encoding, the two may not coincide. In this case,
  4664  				// adjust addend so that linker can keep relocating relative to the
  4665  				// end of the relocation.
  4666  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4667  			} else if ctxt.Arch.Family == sys.I386 {
  4668  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4669  				// assumes that the previous instruction loaded the PC of the end
  4670  				// of that instruction into CX, so the adjustment is relative to
  4671  				// that.
  4672  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4673  			}
  4674  		}
  4675  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4676  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4677  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4678  		}
  4679  
  4680  	}
  4681  
  4682  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4683  		switch p.To.Reg {
  4684  		case REG_SP:
  4685  			asmbuf.Put(naclspfix)
  4686  		case REG_BP:
  4687  			asmbuf.Put(naclbpfix)
  4688  		}
  4689  	}
  4690  }
  4691  
  4692  // Extract 4 operands from p.
  4693  func unpackOps4(p *obj.Prog) (*obj.Addr, *obj.Addr, *obj.Addr, *obj.Addr) {
  4694  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  4695  }