github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type Movtab struct {
    76  	as   obj.As
    77  	ft   uint8
    78  	f3t  uint8
    79  	tt   uint8
    80  	code uint8
    81  	op   [4]uint8
    82  }
    83  
    84  const (
    85  	Yxxx = iota
    86  	Ynone
    87  	Yi0 // $0
    88  	Yi1 // $1
    89  	Yu2 // $x, x fits in uint2
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Yyr
   152  	Yym
   153  	Ytls
   154  	Ytextsize
   155  	Yindir
   156  	Ymax
   157  )
   158  
   159  const (
   160  	Zxxx = iota
   161  	Zlit
   162  	Zlitm_r
   163  	Z_rp
   164  	Zbr
   165  	Zcall
   166  	Zcallcon
   167  	Zcallduff
   168  	Zcallind
   169  	Zcallindreg
   170  	Zib_
   171  	Zib_rp
   172  	Zibo_m
   173  	Zibo_m_xm
   174  	Zil_
   175  	Zil_rp
   176  	Ziq_rp
   177  	Zilo_m
   178  	Zjmp
   179  	Zjmpcon
   180  	Zloop
   181  	Zo_iw
   182  	Zm_o
   183  	Zm_r
   184  	Zm2_r
   185  	Zm_r_xm
   186  	Zm_r_i_xm
   187  	Zm_r_xm_nr
   188  	Zr_m_xm_nr
   189  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   190  	Zibr_m
   191  	Zmb_r
   192  	Zaut_r
   193  	Zo_m
   194  	Zo_m64
   195  	Zpseudo
   196  	Zr_m
   197  	Zr_m_xm
   198  	Zrp_
   199  	Z_ib
   200  	Z_il
   201  	Zm_ibo
   202  	Zm_ilo
   203  	Zib_rr
   204  	Zil_rr
   205  	Zbyte
   206  	Zvex_rm_v_r
   207  	Zvex_rm_v_ro
   208  	Zvex_r_v_rm
   209  	Zvex_v_rm_r
   210  	Zvex_i_rm_r
   211  	Zvex_i_r_v
   212  	Zvex_i_rm_v_r
   213  	Zvex
   214  	Zvex_rm_r_vo
   215  	Zvex_i_r_rm
   216  	Zvex_hr_rm_v_r
   217  
   218  	Zmax
   219  )
   220  
   221  const (
   222  	Px   = 0
   223  	Px1  = 1    // symbolic; exact value doesn't matter
   224  	P32  = 0x32 /* 32-bit only */
   225  	Pe   = 0x66 /* operand escape */
   226  	Pm   = 0x0f /* 2byte opcode escape */
   227  	Pq   = 0xff /* both escapes: 66 0f */
   228  	Pb   = 0xfe /* byte operands */
   229  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   230  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   231  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   232  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   233  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   234  	Pq4w = 0x69 /* Pq4 with Rex.w 66 0F 38 */
   235  	Pq5  = 0x6a /* xmm escape 5: F3 0F 38 */
   236  	Pq5w = 0x6b /* Pq5 with Rex.w F3 0F 38 */
   237  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   238  	Pw   = 0x48 /* Rex.w */
   239  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   240  	Py   = 0x80 /* defaults to 64-bit mode */
   241  	Py1  = 0x81 // symbolic; exact value doesn't matter
   242  	Py3  = 0x83 // symbolic; exact value doesn't matter
   243  	Pvex = 0x84 // symbolic: exact value doesn't matter
   244  
   245  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   246  	Rxr = 1 << 2 /* extend modrm reg */
   247  	Rxx = 1 << 1 /* extend sib index */
   248  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   249  )
   250  
   251  const (
   252  	// Encoding for VEX prefix in tables.
   253  	// The P, L, and W fields are chosen to match
   254  	// their eventual locations in the VEX prefix bytes.
   255  
   256  	// V field - 4 bits; ignored by encoder
   257  	vexNOVSR = 0 // No VEX-SPECIFIED-REGISTER
   258  	vexNDS   = 0
   259  	vexNDD   = 0
   260  	vexDDS   = 0
   261  	// P field - 2 bits
   262  	vex66 = 1 << 0
   263  	vexF3 = 2 << 0
   264  	vexF2 = 3 << 0
   265  	// L field - 1 bit
   266  	vexLZ  = 0 << 2
   267  	vexLIG = 0 << 2
   268  	vex128 = 0 << 2
   269  	vex256 = 1 << 2
   270  	// W field - 1 bit
   271  	vexWIG = 0 << 7
   272  	vexW0  = 0 << 7
   273  	vexW1  = 1 << 7
   274  	// M field - 5 bits, but mostly reserved; we can store up to 4
   275  	vex0F   = 1 << 3
   276  	vex0F38 = 2 << 3
   277  	vex0F3A = 3 << 3
   278  
   279  	// Combinations used in the manual.
   280  	VEX_DDS_LIG_66_0F38_W1    = vexDDS | vexLIG | vex66 | vex0F38 | vexW1
   281  	VEX_NDD_128_66_0F_WIG     = vexNDD | vex128 | vex66 | vex0F | vexWIG
   282  	VEX_NDD_256_66_0F_WIG     = vexNDD | vex256 | vex66 | vex0F | vexWIG
   283  	VEX_NDD_LZ_F2_0F38_W0     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW0
   284  	VEX_NDD_LZ_F2_0F38_W1     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW1
   285  	VEX_NDS_128_66_0F_WIG     = vexNDS | vex128 | vex66 | vex0F | vexWIG
   286  	VEX_NDS_128_66_0F38_WIG   = vexNDS | vex128 | vex66 | vex0F38 | vexWIG
   287  	VEX_NDS_128_F2_0F_WIG     = vexNDS | vex128 | vexF2 | vex0F | vexWIG
   288  	VEX_NDS_256_66_0F_WIG     = vexNDS | vex256 | vex66 | vex0F | vexWIG
   289  	VEX_NDS_256_66_0F38_WIG   = vexNDS | vex256 | vex66 | vex0F38 | vexWIG
   290  	VEX_NDS_256_66_0F3A_W0    = vexNDS | vex256 | vex66 | vex0F3A | vexW0
   291  	VEX_NDS_256_66_0F3A_WIG   = vexNDS | vex256 | vex66 | vex0F3A | vexWIG
   292  	VEX_NDS_LZ_0F38_W0        = vexNDS | vexLZ | vex0F38 | vexW0
   293  	VEX_NDS_LZ_0F38_W1        = vexNDS | vexLZ | vex0F38 | vexW1
   294  	VEX_NDS_LZ_66_0F38_W0     = vexNDS | vexLZ | vex66 | vex0F38 | vexW0
   295  	VEX_NDS_LZ_66_0F38_W1     = vexNDS | vexLZ | vex66 | vex0F38 | vexW1
   296  	VEX_NDS_LZ_F2_0F38_W0     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW0
   297  	VEX_NDS_LZ_F2_0F38_W1     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW1
   298  	VEX_NDS_LZ_F3_0F38_W0     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW0
   299  	VEX_NDS_LZ_F3_0F38_W1     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW1
   300  	VEX_NOVSR_128_66_0F_WIG   = vexNOVSR | vex128 | vex66 | vex0F | vexWIG
   301  	VEX_NOVSR_128_66_0F38_W0  = vexNOVSR | vex128 | vex66 | vex0F38 | vexW0
   302  	VEX_NOVSR_128_66_0F38_WIG = vexNOVSR | vex128 | vex66 | vex0F38 | vexWIG
   303  	VEX_NOVSR_128_F2_0F_WIG   = vexNOVSR | vex128 | vexF2 | vex0F | vexWIG
   304  	VEX_NOVSR_128_F3_0F_WIG   = vexNOVSR | vex128 | vexF3 | vex0F | vexWIG
   305  	VEX_NOVSR_256_66_0F_WIG   = vexNOVSR | vex256 | vex66 | vex0F | vexWIG
   306  	VEX_NOVSR_256_66_0F38_W0  = vexNOVSR | vex256 | vex66 | vex0F38 | vexW0
   307  	VEX_NOVSR_256_66_0F38_WIG = vexNOVSR | vex256 | vex66 | vex0F38 | vexWIG
   308  	VEX_NOVSR_256_F2_0F_WIG   = vexNOVSR | vex256 | vexF2 | vex0F | vexWIG
   309  	VEX_NOVSR_256_F3_0F_WIG   = vexNOVSR | vex256 | vexF3 | vex0F | vexWIG
   310  	VEX_NOVSR_LZ_F2_0F3A_W0   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW0
   311  	VEX_NOVSR_LZ_F2_0F3A_W1   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW1
   312  )
   313  
   314  var ycover [Ymax * Ymax]uint8
   315  
   316  var reg [MAXREG]int
   317  
   318  var regrex [MAXREG + 1]int
   319  
   320  var ynone = []ytab{
   321  	{Zlit, 1, argList{}},
   322  }
   323  
   324  var ytext = []ytab{
   325  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   326  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   327  }
   328  
   329  var ynop = []ytab{
   330  	{Zpseudo, 0, argList{}},
   331  	{Zpseudo, 0, argList{Yiauto}},
   332  	{Zpseudo, 0, argList{Yml}},
   333  	{Zpseudo, 0, argList{Yrf}},
   334  	{Zpseudo, 0, argList{Yxr}},
   335  	{Zpseudo, 0, argList{Yiauto}},
   336  	{Zpseudo, 0, argList{Yml}},
   337  	{Zpseudo, 0, argList{Yrf}},
   338  	{Zpseudo, 1, argList{Yxr}},
   339  }
   340  
   341  var yfuncdata = []ytab{
   342  	{Zpseudo, 0, argList{Yi32, Ym}},
   343  }
   344  
   345  var ypcdata = []ytab{
   346  	{Zpseudo, 0, argList{Yi32, Yi32}},
   347  }
   348  
   349  var yxorb = []ytab{
   350  	{Zib_, 1, argList{Yi32, Yal}},
   351  	{Zibo_m, 2, argList{Yi32, Ymb}},
   352  	{Zr_m, 1, argList{Yrb, Ymb}},
   353  	{Zm_r, 1, argList{Ymb, Yrb}},
   354  }
   355  
   356  var yaddl = []ytab{
   357  	{Zibo_m, 2, argList{Yi8, Yml}},
   358  	{Zil_, 1, argList{Yi32, Yax}},
   359  	{Zilo_m, 2, argList{Yi32, Yml}},
   360  	{Zr_m, 1, argList{Yrl, Yml}},
   361  	{Zm_r, 1, argList{Yml, Yrl}},
   362  }
   363  
   364  var yincl = []ytab{
   365  	{Z_rp, 1, argList{Yrl}},
   366  	{Zo_m, 2, argList{Yml}},
   367  }
   368  
   369  var yincq = []ytab{
   370  	{Zo_m, 2, argList{Yml}},
   371  }
   372  
   373  var ycmpb = []ytab{
   374  	{Z_ib, 1, argList{Yal, Yi32}},
   375  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   376  	{Zm_r, 1, argList{Ymb, Yrb}},
   377  	{Zr_m, 1, argList{Yrb, Ymb}},
   378  }
   379  
   380  var ycmpl = []ytab{
   381  	{Zm_ibo, 2, argList{Yml, Yi8}},
   382  	{Z_il, 1, argList{Yax, Yi32}},
   383  	{Zm_ilo, 2, argList{Yml, Yi32}},
   384  	{Zm_r, 1, argList{Yml, Yrl}},
   385  	{Zr_m, 1, argList{Yrl, Yml}},
   386  }
   387  
   388  var yshb = []ytab{
   389  	{Zo_m, 2, argList{Yi1, Ymb}},
   390  	{Zibo_m, 2, argList{Yu8, Ymb}},
   391  	{Zo_m, 2, argList{Ycx, Ymb}},
   392  }
   393  
   394  var yshl = []ytab{
   395  	{Zo_m, 2, argList{Yi1, Yml}},
   396  	{Zibo_m, 2, argList{Yu8, Yml}},
   397  	{Zo_m, 2, argList{Ycl, Yml}},
   398  	{Zo_m, 2, argList{Ycx, Yml}},
   399  }
   400  
   401  var ytestl = []ytab{
   402  	{Zil_, 1, argList{Yi32, Yax}},
   403  	{Zilo_m, 2, argList{Yi32, Yml}},
   404  	{Zr_m, 1, argList{Yrl, Yml}},
   405  	{Zm_r, 1, argList{Yml, Yrl}},
   406  }
   407  
   408  var ymovb = []ytab{
   409  	{Zr_m, 1, argList{Yrb, Ymb}},
   410  	{Zm_r, 1, argList{Ymb, Yrb}},
   411  	{Zib_rp, 1, argList{Yi32, Yrb}},
   412  	{Zibo_m, 2, argList{Yi32, Ymb}},
   413  }
   414  
   415  var ybtl = []ytab{
   416  	{Zibo_m, 2, argList{Yi8, Yml}},
   417  	{Zr_m, 1, argList{Yrl, Yml}},
   418  }
   419  
   420  var ymovw = []ytab{
   421  	{Zr_m, 1, argList{Yrl, Yml}},
   422  	{Zm_r, 1, argList{Yml, Yrl}},
   423  	{Zil_rp, 1, argList{Yi32, Yrl}},
   424  	{Zilo_m, 2, argList{Yi32, Yml}},
   425  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   426  }
   427  
   428  var ymovl = []ytab{
   429  	{Zr_m, 1, argList{Yrl, Yml}},
   430  	{Zm_r, 1, argList{Yml, Yrl}},
   431  	{Zil_rp, 1, argList{Yi32, Yrl}},
   432  	{Zilo_m, 2, argList{Yi32, Yml}},
   433  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   434  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   435  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   436  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   437  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   438  }
   439  
   440  var yret = []ytab{
   441  	{Zo_iw, 1, argList{}},
   442  	{Zo_iw, 1, argList{Yi32}},
   443  }
   444  
   445  var ymovq = []ytab{
   446  	// valid in 32-bit mode
   447  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   448  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   449  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   450  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   451  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   452  
   453  	// valid only in 64-bit mode, usually with 64-bit prefix
   454  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   455  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   456  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   457  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   458  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   459  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   460  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   461  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   462  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   463  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   464  }
   465  
   466  var ym_rl = []ytab{
   467  	{Zm_r, 1, argList{Ym, Yrl}},
   468  }
   469  
   470  var yrl_m = []ytab{
   471  	{Zr_m, 1, argList{Yrl, Ym}},
   472  }
   473  
   474  var ymb_rl = []ytab{
   475  	{Zmb_r, 1, argList{Ymb, Yrl}},
   476  }
   477  
   478  var yml_rl = []ytab{
   479  	{Zm_r, 1, argList{Yml, Yrl}},
   480  }
   481  
   482  var yrl_ml = []ytab{
   483  	{Zr_m, 1, argList{Yrl, Yml}},
   484  }
   485  
   486  var yml_mb = []ytab{
   487  	{Zr_m, 1, argList{Yrb, Ymb}},
   488  	{Zm_r, 1, argList{Ymb, Yrb}},
   489  }
   490  
   491  var yrb_mb = []ytab{
   492  	{Zr_m, 1, argList{Yrb, Ymb}},
   493  }
   494  
   495  var yxchg = []ytab{
   496  	{Z_rp, 1, argList{Yax, Yrl}},
   497  	{Zrp_, 1, argList{Yrl, Yax}},
   498  	{Zr_m, 1, argList{Yrl, Yml}},
   499  	{Zm_r, 1, argList{Yml, Yrl}},
   500  }
   501  
   502  var ydivl = []ytab{
   503  	{Zm_o, 2, argList{Yml}},
   504  }
   505  
   506  var ydivb = []ytab{
   507  	{Zm_o, 2, argList{Ymb}},
   508  }
   509  
   510  var yimul = []ytab{
   511  	{Zm_o, 2, argList{Yml}},
   512  	{Zib_rr, 1, argList{Yi8, Yrl}},
   513  	{Zil_rr, 1, argList{Yi32, Yrl}},
   514  	{Zm_r, 2, argList{Yml, Yrl}},
   515  }
   516  
   517  var yimul3 = []ytab{
   518  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   519  }
   520  
   521  var ybyte = []ytab{
   522  	{Zbyte, 1, argList{Yi64}},
   523  }
   524  
   525  var yin = []ytab{
   526  	{Zib_, 1, argList{Yi32}},
   527  	{Zlit, 1, argList{}},
   528  }
   529  
   530  var yint = []ytab{
   531  	{Zib_, 1, argList{Yi32}},
   532  }
   533  
   534  var ypushl = []ytab{
   535  	{Zrp_, 1, argList{Yrl}},
   536  	{Zm_o, 2, argList{Ym}},
   537  	{Zib_, 1, argList{Yi8}},
   538  	{Zil_, 1, argList{Yi32}},
   539  }
   540  
   541  var ypopl = []ytab{
   542  	{Z_rp, 1, argList{Yrl}},
   543  	{Zo_m, 2, argList{Ym}},
   544  }
   545  
   546  var yclflush = []ytab{
   547  	{Zo_m, 2, argList{Ym}},
   548  }
   549  
   550  var ybswap = []ytab{
   551  	{Z_rp, 2, argList{Yrl}},
   552  }
   553  
   554  var yscond = []ytab{
   555  	{Zo_m, 2, argList{Ymb}},
   556  }
   557  
   558  var yjcond = []ytab{
   559  	{Zbr, 0, argList{Ybr}},
   560  	{Zbr, 0, argList{Yi0, Ybr}},
   561  	{Zbr, 1, argList{Yi1, Ybr}},
   562  }
   563  
   564  var yloop = []ytab{
   565  	{Zloop, 1, argList{Ybr}},
   566  }
   567  
   568  var ycall = []ytab{
   569  	{Zcallindreg, 0, argList{Yml}},
   570  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   571  	{Zcallind, 2, argList{Yindir}},
   572  	{Zcall, 0, argList{Ybr}},
   573  	{Zcallcon, 1, argList{Yi32}},
   574  }
   575  
   576  var yduff = []ytab{
   577  	{Zcallduff, 1, argList{Yi32}},
   578  }
   579  
   580  var yjmp = []ytab{
   581  	{Zo_m64, 2, argList{Yml}},
   582  	{Zjmp, 0, argList{Ybr}},
   583  	{Zjmpcon, 1, argList{Yi32}},
   584  }
   585  
   586  var yfmvd = []ytab{
   587  	{Zm_o, 2, argList{Ym, Yf0}},
   588  	{Zo_m, 2, argList{Yf0, Ym}},
   589  	{Zm_o, 2, argList{Yrf, Yf0}},
   590  	{Zo_m, 2, argList{Yf0, Yrf}},
   591  }
   592  
   593  var yfmvdp = []ytab{
   594  	{Zo_m, 2, argList{Yf0, Ym}},
   595  	{Zo_m, 2, argList{Yf0, Yrf}},
   596  }
   597  
   598  var yfmvf = []ytab{
   599  	{Zm_o, 2, argList{Ym, Yf0}},
   600  	{Zo_m, 2, argList{Yf0, Ym}},
   601  }
   602  
   603  var yfmvx = []ytab{
   604  	{Zm_o, 2, argList{Ym, Yf0}},
   605  }
   606  
   607  var yfmvp = []ytab{
   608  	{Zo_m, 2, argList{Yf0, Ym}},
   609  }
   610  
   611  var yfcmv = []ytab{
   612  	{Zm_o, 2, argList{Yrf, Yf0}},
   613  }
   614  
   615  var yfadd = []ytab{
   616  	{Zm_o, 2, argList{Ym, Yf0}},
   617  	{Zm_o, 2, argList{Yrf, Yf0}},
   618  	{Zo_m, 2, argList{Yf0, Yrf}},
   619  }
   620  
   621  var yfxch = []ytab{
   622  	{Zo_m, 2, argList{Yf0, Yrf}},
   623  	{Zm_o, 2, argList{Yrf, Yf0}},
   624  }
   625  
   626  var ycompp = []ytab{
   627  	{Zo_m, 2, argList{Yf0, Yrf}}, /* botch is really f0,f1 */
   628  }
   629  
   630  var ystsw = []ytab{
   631  	{Zo_m, 2, argList{Ym}},
   632  	{Zlit, 1, argList{Yax}},
   633  }
   634  
   635  var ysvrs_mo = []ytab{
   636  	{Zm_o, 2, argList{Ym}},
   637  }
   638  
   639  // unaryDst version of "ysvrs_mo".
   640  var ysvrs_om = []ytab{
   641  	{Zo_m, 2, argList{Ym}},
   642  }
   643  
   644  var ymm = []ytab{
   645  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   646  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   647  }
   648  
   649  var yxm = []ytab{
   650  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   651  }
   652  
   653  var yxm_q4 = []ytab{
   654  	{Zm_r, 1, argList{Yxm, Yxr}},
   655  }
   656  
   657  var yxcvm1 = []ytab{
   658  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   659  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   660  }
   661  
   662  var yxcvm2 = []ytab{
   663  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   664  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   665  }
   666  
   667  var yxr = []ytab{
   668  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   669  }
   670  
   671  var yxr_ml = []ytab{
   672  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   673  }
   674  
   675  var ymr = []ytab{
   676  	{Zm_r, 1, argList{Ymr, Ymr}},
   677  }
   678  
   679  var ymr_ml = []ytab{
   680  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   681  }
   682  
   683  var yxcmpi = []ytab{
   684  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   685  }
   686  
   687  var yxmov = []ytab{
   688  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   689  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   690  }
   691  
   692  var yxcvfl = []ytab{
   693  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   694  }
   695  
   696  var yxcvlf = []ytab{
   697  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   698  }
   699  
   700  var yxcvfq = []ytab{
   701  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   702  }
   703  
   704  var yxcvqf = []ytab{
   705  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   706  }
   707  
   708  var yps = []ytab{
   709  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   710  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   711  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   712  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   713  }
   714  
   715  var yxrrl = []ytab{
   716  	{Zm_r, 1, argList{Yxr, Yrl}},
   717  }
   718  
   719  var ymrxr = []ytab{
   720  	{Zm_r, 1, argList{Ymr, Yxr}},
   721  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   722  }
   723  
   724  var ymshuf = []ytab{
   725  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   726  }
   727  
   728  var ymshufb = []ytab{
   729  	{Zm2_r, 2, argList{Yxm, Yxr}},
   730  }
   731  
   732  // It should never have more than 1 entry,
   733  // because some optab entries you opcode secuences that
   734  // are longer than 2 bytes (zoffset=2 here),
   735  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   736  // to name a few.
   737  var yxshuf = []ytab{
   738  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   739  }
   740  
   741  var yextrw = []ytab{
   742  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   743  }
   744  
   745  var yextr = []ytab{
   746  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   747  }
   748  
   749  var yinsrw = []ytab{
   750  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   751  }
   752  
   753  var yinsr = []ytab{
   754  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   755  }
   756  
   757  var ypsdq = []ytab{
   758  	{Zibo_m, 2, argList{Yi8, Yxr}},
   759  }
   760  
   761  var ymskb = []ytab{
   762  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   763  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   764  }
   765  
   766  var ycrc32l = []ytab{
   767  	{Zlitm_r, 0, argList{Yml, Yrl}},
   768  }
   769  
   770  var yprefetch = []ytab{
   771  	{Zm_o, 2, argList{Ym}},
   772  }
   773  
   774  var yaes = []ytab{
   775  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   776  }
   777  
   778  var yxbegin = []ytab{
   779  	{Zjmp, 1, argList{Ybr}},
   780  }
   781  
   782  var yxabort = []ytab{
   783  	{Zib_, 1, argList{Yu8}},
   784  }
   785  
   786  var ylddqu = []ytab{
   787  	{Zm_r, 1, argList{Ym, Yxr}},
   788  }
   789  
   790  var ypalignr = []ytab{
   791  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   792  }
   793  
   794  // VEX instructions that come in two forms:
   795  //	VTHING xmm2/m128, xmmV, xmm1
   796  //	VTHING ymm2/m256, ymmV, ymm1
   797  // The opcode array in the corresponding Optab entry
   798  // should contain the (VEX prefixes, opcode byte) pair
   799  // for each of the two forms.
   800  // For example, the entries for VPXOR are:
   801  //
   802  //	VPXOR xmm2/m128, xmmV, xmm1
   803  //	VEX.NDS.128.66.0F.WIG EF /r
   804  //
   805  //	VPXOR ymm2/m256, ymmV, ymm1
   806  //	VEX.NDS.256.66.0F.WIG EF /r
   807  //
   808  // Produce this Optab entry:
   809  //
   810  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xEF, VEX_NDS_256_66_0F_WIG, 0xEF}}
   811  //
   812  var yvex_xy3 = []ytab{
   813  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   814  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   815  }
   816  
   817  var yvex_x3 = []ytab{
   818  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   819  }
   820  
   821  var yvex_ri3 = []ytab{
   822  	{Zvex_i_rm_r, 2, argList{Yi8, Ymb, Yrl}},
   823  }
   824  
   825  var yvex_xyi3 = []ytab{
   826  	{Zvex_i_rm_r, 2, argList{Yu8, Yxm, Yxr}},
   827  	{Zvex_i_rm_r, 2, argList{Yu8, Yym, Yyr}},
   828  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   829  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   830  }
   831  
   832  var yvex_yyi4 = []ytab{
   833  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
   834  }
   835  
   836  var yvex_xyi4 = []ytab{
   837  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yyr, Yyr}},
   838  }
   839  
   840  var yvex_shift = []ytab{
   841  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   842  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   843  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   844  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
   845  }
   846  
   847  var yvex_shift_dq = []ytab{
   848  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   849  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   850  }
   851  
   852  var yvex_r3 = []ytab{
   853  	{Zvex_rm_v_r, 2, argList{Yml, Yrl, Yrl}},
   854  }
   855  
   856  var yvex_vmr3 = []ytab{
   857  	{Zvex_v_rm_r, 2, argList{Yrl, Yml, Yrl}},
   858  }
   859  
   860  var yvex_xy2 = []ytab{
   861  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   862  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   863  }
   864  
   865  var yvex_xyr2 = []ytab{
   866  	{Zvex_rm_v_r, 2, argList{Yxr, Yrl}},
   867  	{Zvex_rm_v_r, 2, argList{Yyr, Yrl}},
   868  }
   869  
   870  var yvex_vmovdqa = []ytab{
   871  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   872  	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
   873  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   874  	{Zvex_r_v_rm, 2, argList{Yyr, Yym}},
   875  }
   876  
   877  var yvex_vmovntdq = []ytab{
   878  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   879  	{Zvex_r_v_rm, 2, argList{Yyr, Ym}},
   880  }
   881  
   882  var yvex_vpbroadcast = []ytab{
   883  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   884  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   885  }
   886  
   887  var yvex_vpbroadcast_sd = []ytab{
   888  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   889  }
   890  
   891  var yvex_vpextrw = []ytab{
   892  	{Zvex_i_rm_r, 2, argList{Yi8, Yxr, Yrl}},
   893  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
   894  }
   895  
   896  var yvex_m = []ytab{
   897  	{Zvex_rm_v_ro, 3, argList{Ym}},
   898  }
   899  
   900  var yvex_xx3 = []ytab{
   901  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   902  }
   903  
   904  var yvex_yi3 = []ytab{
   905  	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
   906  }
   907  
   908  var yvex_mxy = []ytab{
   909  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   910  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   911  }
   912  
   913  var yvex_yy3 = []ytab{
   914  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   915  }
   916  
   917  var yvex_xi3 = []ytab{
   918  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   919  }
   920  
   921  var yvex_vpermpd = []ytab{
   922  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   923  }
   924  
   925  var yvex_vpermilp = []ytab{
   926  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   927  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   928  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   929  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   930  }
   931  
   932  var yvex_vcvtps2ph = []ytab{
   933  	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
   934  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yxm}},
   935  }
   936  
   937  var yvex_vbroadcastf = []ytab{
   938  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   939  }
   940  
   941  var yvex_vmovd = []ytab{
   942  	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
   943  	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
   944  }
   945  
   946  var yvex_x2 = []ytab{
   947  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   948  }
   949  
   950  var yvex_y2 = []ytab{
   951  	{Zvex_rm_v_r, 2, argList{Yym, Yxr}},
   952  }
   953  
   954  var yvex = []ytab{
   955  	{Zvex, 2, argList{}},
   956  }
   957  
   958  var yvex_xx2 = []ytab{
   959  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
   960  }
   961  
   962  var yvex_vpalignr = []ytab{
   963  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
   964  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
   965  }
   966  
   967  var yvex_rxi4 = []ytab{
   968  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yml, Yxr, Yxr}},
   969  }
   970  
   971  var yvex_xxi4 = []ytab{
   972  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
   973  }
   974  
   975  var yvex_xy4 = []ytab{
   976  	{Zvex_hr_rm_v_r, 2, argList{Yxr, Yxm, Yxr, Yxr}},
   977  	{Zvex_hr_rm_v_r, 2, argList{Yyr, Yym, Yyr, Yyr}},
   978  }
   979  
   980  var yvex_vpbroadcast_ss = []ytab{
   981  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   982  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
   983  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   984  	{Zvex_rm_v_r, 2, argList{Yxr, Yyr}},
   985  }
   986  
   987  var yvex_vblendvpd = []ytab{
   988  	{Zvex_r_v_rm, 2, argList{Yxr, Yxr, Yml}},
   989  	{Zvex_r_v_rm, 2, argList{Yyr, Yyr, Yml}},
   990  	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
   991  	{Zvex_rm_v_r, 2, argList{Ym, Yyr, Yyr}},
   992  }
   993  
   994  var yvex_vmov = []ytab{
   995  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   996  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   997  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   998  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   999  }
  1000  
  1001  var yvex_vps = []ytab{
  1002  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
  1003  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
  1004  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
  1005  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
  1006  }
  1007  
  1008  var yvex_r2 = []ytab{
  1009  	{Zvex_rm_r_vo, 3, argList{Yml, Yrl}},
  1010  }
  1011  
  1012  var yvex_vpextr = []ytab{
  1013  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
  1014  }
  1015  
  1016  var yvex_rx3 = []ytab{
  1017  	{Zvex_rm_v_r, 2, argList{Yml, Yxr, Yxr}},
  1018  }
  1019  
  1020  var yvex_vcvtsd2si = []ytab{
  1021  	{Zvex_rm_v_r, 2, argList{Yxm, Yrl}},
  1022  }
  1023  
  1024  var yvex_vmovhpd = []ytab{
  1025  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
  1026  	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
  1027  }
  1028  
  1029  var yvex_vmovq = []ytab{
  1030  	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
  1031  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
  1032  	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
  1033  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
  1034  	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
  1035  }
  1036  
  1037  var ymmxmm0f38 = []ytab{
  1038  	{Zlitm_r, 3, argList{Ymm, Ymr}},
  1039  	{Zlitm_r, 5, argList{Yxm, Yxr}},
  1040  }
  1041  
  1042  var yextractps = []ytab{
  1043  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
  1044  }
  1045  
  1046  /*
  1047   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
  1048   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
  1049   * to find the entry with the given p.As and then looks through the ytable for
  1050   * that instruction (the second field in the optab struct) for a line whose
  1051   * first two values match the Ytypes of the p.From and p.To operands.  The
  1052   * function oclass computes the specific Ytype of an operand and then the set
  1053   * of more general Ytypes that it satisfies is implied by the ycover table, set
  1054   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
  1055   * from the more general 8-bit constants, but instinit says
  1056   *
  1057   *        ycover[Yi0*Ymax+Ys32] = 1
  1058   *        ycover[Yi1*Ymax+Ys32] = 1
  1059   *        ycover[Yi8*Ymax+Ys32] = 1
  1060   *
  1061   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
  1062   * if that's what an instruction can handle.
  1063   *
  1064   * In parallel with the scan through the ytable for the appropriate line, there
  1065   * is a z pointer that starts out pointing at the strange magic byte list in
  1066   * the Optab struct.  With each step past a non-matching ytable line, z
  1067   * advances by the 4th entry in the line.  When a matching line is found, that
  1068   * z pointer has the extra data to use in laying down the instruction bytes.
  1069   * The actual bytes laid down are a function of the 3rd entry in the line (that
  1070   * is, the Ztype) and the z bytes.
  1071   *
  1072   * For example, let's look at AADDL.  The optab line says:
  1073   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1074   *
  1075   * and yaddl says
  1076   *        var yaddl = []ytab{
  1077   *                {Yi8, Ynone, Yml, Zibo_m, 2},
  1078   *                {Yi32, Ynone, Yax, Zil_, 1},
  1079   *                {Yi32, Ynone, Yml, Zilo_m, 2},
  1080   *                {Yrl, Ynone, Yml, Zr_m, 1},
  1081   *                {Yml, Ynone, Yrl, Zm_r, 1},
  1082   *        }
  1083   *
  1084   * so there are 5 possible types of ADDL instruction that can be laid down, and
  1085   * possible states used to lay them down (Ztype and z pointer, assuming z
  1086   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
  1087   *
  1088   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
  1089   *        Yi32, Yax -> Zil_, z+2 (0x05)
  1090   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
  1091   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
  1092   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
  1093   *
  1094   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
  1095   * relatively straightforward as this program goes.
  1096   *
  1097   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
  1098   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
  1099   * encoded addressing mode for the Yml arg), and then a single immediate byte.
  1100   * Zilo_m is the same but a long (32-bit) immediate.
  1101   */
  1102  var optab =
  1103  /*	as, ytab, andproto, opcode */
  1104  []Optab{
  1105  	{obj.AXXX, nil, 0, [23]uint8{}},
  1106  	{AAAA, ynone, P32, [23]uint8{0x37}},
  1107  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
  1108  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
  1109  	{AAAS, ynone, P32, [23]uint8{0x3f}},
  1110  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
  1111  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1112  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1113  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1114  	{AADCXL, yml_rl, Pq4, [23]uint8{0xf6}},
  1115  	{AADCXQ, yml_rl, Pq4w, [23]uint8{0xf6}},
  1116  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
  1117  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1118  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
  1119  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
  1120  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1121  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
  1122  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
  1123  	{AADDSUBPD, yxm, Pq, [23]uint8{0xd0}},
  1124  	{AADDSUBPS, yxm, Pf2, [23]uint8{0xd0}},
  1125  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1126  	{AADOXL, yml_rl, Pq5, [23]uint8{0xf6}},
  1127  	{AADOXQ, yml_rl, Pq5w, [23]uint8{0xf6}},
  1128  	{AADJSP, nil, 0, [23]uint8{}},
  1129  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
  1130  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1131  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
  1132  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
  1133  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
  1134  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
  1135  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1136  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1137  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
  1138  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
  1139  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
  1140  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
  1141  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
  1142  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
  1143  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
  1144  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
  1145  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
  1146  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
  1147  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
  1148  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1149  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1150  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1151  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1152  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1153  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1154  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1155  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1156  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1157  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1158  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1159  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1160  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1161  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1162  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1163  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1164  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1165  	{ACLFLUSH, yclflush, Pm, [23]uint8{0xae, 07}},
  1166  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1167  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1168  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1169  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1170  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1171  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1172  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1173  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1174  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1175  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1176  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1177  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1178  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1179  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1180  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1181  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1182  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1183  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1184  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1185  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1186  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1187  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1188  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1189  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1190  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1191  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1192  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1193  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1194  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1195  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1196  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1197  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1198  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1199  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1200  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1201  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1202  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1203  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1204  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1205  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1206  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1207  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1208  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1209  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1210  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1211  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1212  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1213  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1214  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1215  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1216  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1217  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1218  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1219  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1220  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1221  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1222  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1223  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1224  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1225  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1226  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1227  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1228  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1229  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1230  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1231  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1232  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1233  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1234  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1235  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1236  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1237  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1238  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1239  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1240  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1241  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1242  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1243  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1244  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1245  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1246  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1247  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1248  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1249  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1250  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1251  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1252  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1253  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1254  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1255  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1256  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1257  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1258  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1259  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1260  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1261  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1262  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1263  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1264  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1265  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1266  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1267  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1268  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1269  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1270  	{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
  1271  	{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
  1272  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1273  	{AEXTRACTPS, yextractps, Pq, [23]uint8{0x3a, 0x17, 0}},
  1274  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1275  	{AFXRSTOR, ysvrs_mo, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1276  	{AFXSAVE, ysvrs_om, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1277  	{AFXRSTOR64, ysvrs_mo, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1278  	{AFXSAVE64, ysvrs_om, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1279  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1280  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1281  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1282  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1283  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1284  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1285  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1286  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1287  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1288  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1289  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1290  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1291  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1292  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1293  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1294  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1295  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1296  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1297  	{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
  1298  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1299  	{AINT, yint, Px, [23]uint8{0xcd}},
  1300  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1301  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1302  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1303  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1304  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1305  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1306  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1307  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1308  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1309  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1310  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1311  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1312  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1313  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1314  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1315  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1316  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1317  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1318  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1319  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1320  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1321  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1322  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1323  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1324  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1325  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1326  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1327  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1328  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1329  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1330  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1331  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1332  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1333  	{ALDMXCSR, ysvrs_mo, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1334  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1335  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1336  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1337  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1338  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1339  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1340  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1341  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1342  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1343  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1344  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1345  	{ALONG, ybyte, Px, [23]uint8{4}},
  1346  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1347  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1348  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1349  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1350  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1351  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1352  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1353  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1354  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1355  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1356  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1357  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1358  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1359  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1360  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1361  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1362  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1363  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1364  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1365  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1366  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1367  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1368  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1369  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1370  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1371  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1372  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1373  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1374  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1375  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1376  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1377  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1378  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1379  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1380  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1381  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1382  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1383  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1384  	{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
  1385  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1386  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1387  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1388  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1389  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1390  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1391  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1392  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1393  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1394  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1395  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1396  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1397  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1398  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1399  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1400  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1401  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1402  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1403  	{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
  1404  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1405  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1406  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1407  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1408  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1409  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1410  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1411  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1412  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1413  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1414  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1415  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1416  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1417  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1418  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1419  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1420  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1421  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1422  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1423  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1424  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1425  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1426  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1427  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1428  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1429  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1430  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1431  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1432  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1433  	{APABSB, yxm_q4, Pq4, [23]uint8{0x1c}},
  1434  	{APABSD, yxm_q4, Pq4, [23]uint8{0x1e}},
  1435  	{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
  1436  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1437  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1438  	{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
  1439  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1440  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1441  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1442  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1443  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1444  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1445  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1446  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1447  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1448  	{APALIGNR, ypalignr, Pq, [23]uint8{0x3a, 0x0f}},
  1449  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1450  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1451  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1452  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1453  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1454  	{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
  1455  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1456  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1457  	{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
  1458  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1459  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1460  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1461  	{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
  1462  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1463  	{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
  1464  	{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
  1465  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1466  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1467  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1468  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1469  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1470  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1471  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1472  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1473  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1474  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1475  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1476  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1477  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1478  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1479  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1480  	{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
  1481  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1482  	{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
  1483  	{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
  1484  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1485  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1486  	{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
  1487  	{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
  1488  	{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
  1489  	{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
  1490  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1491  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1492  	{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
  1493  	{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
  1494  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1495  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1496  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1497  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1498  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1499  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1500  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1501  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1502  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1503  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1504  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1505  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1506  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1507  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1508  	{APMULHRSW, yxm_q4, Pq4, [23]uint8{0x0b}},
  1509  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1510  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1511  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1512  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1513  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1514  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1515  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1516  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1517  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1518  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1519  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1520  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1521  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1522  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1523  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1524  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1525  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1526  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1527  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1528  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1529  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1530  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1531  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1532  	{APSIGNB, yxm_q4, Pq4, [23]uint8{0x08}},
  1533  	{APSIGND, yxm_q4, Pq4, [23]uint8{0x0a}},
  1534  	{APSIGNW, yxm_q4, Pq4, [23]uint8{0x09}},
  1535  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1536  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1537  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1538  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1539  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1540  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1541  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1542  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1543  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1544  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1545  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1546  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1547  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1548  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1549  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1550  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1551  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1552  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1553  	{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
  1554  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1555  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1556  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1557  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1558  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1559  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1560  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1561  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1562  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1563  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1564  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1565  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1566  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1567  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1568  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1569  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1570  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1571  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1572  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1573  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1574  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1575  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1576  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1577  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1578  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1579  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1580  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1581  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1582  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1583  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1584  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1585  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1586  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1587  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1588  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1589  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1590  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1591  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1592  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1593  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1594  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1595  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1596  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1597  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1598  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1599  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1600  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1601  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1602  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1603  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1604  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1605  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1606  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1607  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1608  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1609  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1610  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1611  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1612  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1613  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1614  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1615  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1616  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1617  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1618  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1619  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1620  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1621  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1622  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1623  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1624  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1625  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1626  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1627  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1628  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1629  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1630  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1631  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1632  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1633  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1634  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1635  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1636  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1637  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1638  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1639  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1640  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1641  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1642  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1643  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1644  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1645  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1646  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1647  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1648  	{ASTMXCSR, ysvrs_om, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1649  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1650  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1651  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1652  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1653  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1654  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1655  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1656  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1657  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1658  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1659  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1660  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1661  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1662  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1663  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1664  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1665  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1666  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1667  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1668  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1669  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1670  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1671  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1672  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1673  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1674  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1675  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1676  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1677  	{AWORD, ybyte, Px, [23]uint8{2}},
  1678  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1679  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1680  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1681  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1682  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1683  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1684  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1685  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1686  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1687  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1688  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1689  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1690  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1691  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1692  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1693  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1694  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1695  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1696  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1697  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1698  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1699  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1700  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1701  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1702  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1703  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1704  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1705  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1706  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1707  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1708  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1709  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1710  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1711  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1712  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1713  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1714  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1715  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1716  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1717  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1718  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1719  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1720  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1721  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1722  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1723  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1724  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1725  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1726  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1727  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1728  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1729  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1730  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1731  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1732  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1733  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1734  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1735  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1736  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1737  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1738  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1739  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1740  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1741  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1742  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1743  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1744  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1745  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1746  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1747  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1748  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1749  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1750  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1751  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1752  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1753  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1754  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1755  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1756  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1757  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1758  	{AFFREE, nil, 0, [23]uint8{}},
  1759  	{AFLDCW, ysvrs_mo, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1760  	{AFLDENV, ysvrs_mo, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1761  	{AFRSTOR, ysvrs_mo, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1762  	{AFSAVE, ysvrs_om, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1763  	{AFSTCW, ysvrs_om, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1764  	{AFSTENV, ysvrs_om, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1765  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1766  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1767  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1768  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1769  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1770  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1771  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1772  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1773  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1774  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1775  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1776  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1777  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1778  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1779  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1780  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1781  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1782  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1783  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1784  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1785  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1786  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1787  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1788  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1789  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1790  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1791  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1792  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1793  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1794  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1795  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1796  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1797  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1798  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1799  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1800  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1801  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1802  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1803  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1804  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1805  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1806  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1807  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1808  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1809  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1810  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1811  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1812  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1813  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1814  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1815  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1816  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1817  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1818  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1819  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1820  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1821  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1822  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1823  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1824  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1825  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1826  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1827  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1828  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1829  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1830  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1831  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1832  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1833  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1834  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1835  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1836  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1837  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1838  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1839  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1840  	{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
  1841  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1842  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1843  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1844  
  1845  	{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
  1846  	{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
  1847  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1848  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1849  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1850  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1851  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1852  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1853  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1854  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1855  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1856  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1857  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1858  	{obj.AEND, nil, 0, [23]uint8{}},
  1859  	{0, nil, 0, [23]uint8{}},
  1860  }
  1861  
  1862  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1863  
  1864  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1865  // This happens on systems like Solaris that call .so functions instead of system calls.
  1866  // It does not seem to be necessary for any other systems. This is probably working
  1867  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1868  // what that bug is. And this does fix it.
  1869  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1870  	if ctxt.Headtype == objabi.Hsolaris {
  1871  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1872  		return strings.HasPrefix(s.Name, "libc_")
  1873  	}
  1874  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1875  }
  1876  
  1877  // single-instruction no-ops of various lengths.
  1878  // constructed by hand and disassembled with gdb to verify.
  1879  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1880  var nop = [][16]uint8{
  1881  	{0x90},
  1882  	{0x66, 0x90},
  1883  	{0x0F, 0x1F, 0x00},
  1884  	{0x0F, 0x1F, 0x40, 0x00},
  1885  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1886  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1887  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1888  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1889  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1890  }
  1891  
  1892  // Native Client rejects the repeated 0x66 prefix.
  1893  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1894  func fillnop(p []byte, n int) {
  1895  	var m int
  1896  
  1897  	for n > 0 {
  1898  		m = n
  1899  		if m > len(nop) {
  1900  			m = len(nop)
  1901  		}
  1902  		copy(p[:m], nop[m-1][:m])
  1903  		p = p[m:]
  1904  		n -= m
  1905  	}
  1906  }
  1907  
  1908  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1909  	s.Grow(int64(c) + int64(pad))
  1910  	fillnop(s.P[c:], int(pad))
  1911  	return c + pad
  1912  }
  1913  
  1914  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1915  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1916  		return l
  1917  	}
  1918  	return q
  1919  }
  1920  
  1921  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1922  	if s.P != nil {
  1923  		return
  1924  	}
  1925  
  1926  	if ycover[0] == 0 {
  1927  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1928  	}
  1929  
  1930  	var asmbuf AsmBuf
  1931  
  1932  	for p := s.Func.Text; p != nil; p = p.Link {
  1933  		if p.To.Type == obj.TYPE_BRANCH {
  1934  			if p.Pcond == nil {
  1935  				p.Pcond = p
  1936  			}
  1937  		}
  1938  		if p.As == AADJSP {
  1939  			p.To.Type = obj.TYPE_REG
  1940  			p.To.Reg = REG_SP
  1941  			v := int32(-p.From.Offset)
  1942  			p.From.Offset = int64(v)
  1943  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1944  			if v < 0 {
  1945  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1946  				v = -v
  1947  				p.From.Offset = int64(v)
  1948  			}
  1949  
  1950  			if v == 0 {
  1951  				p.As = obj.ANOP
  1952  			}
  1953  		}
  1954  	}
  1955  
  1956  	var q *obj.Prog
  1957  	var count int64 // rough count of number of instructions
  1958  	for p := s.Func.Text; p != nil; p = p.Link {
  1959  		count++
  1960  		p.Back = 2 // use short branches first time through
  1961  		q = p.Pcond
  1962  		if q != nil && (q.Back&2 != 0) {
  1963  			p.Back |= 1 // backward jump
  1964  			q.Back |= 4 // loop head
  1965  		}
  1966  
  1967  		if p.As == AADJSP {
  1968  			p.To.Type = obj.TYPE_REG
  1969  			p.To.Reg = REG_SP
  1970  			v := int32(-p.From.Offset)
  1971  			p.From.Offset = int64(v)
  1972  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1973  			if v < 0 {
  1974  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1975  				v = -v
  1976  				p.From.Offset = int64(v)
  1977  			}
  1978  
  1979  			if v == 0 {
  1980  				p.As = obj.ANOP
  1981  			}
  1982  		}
  1983  	}
  1984  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1985  
  1986  	n := 0
  1987  	var c int32
  1988  	errors := ctxt.Errors
  1989  	for {
  1990  		loop := int32(0)
  1991  		for i := range s.R {
  1992  			s.R[i] = obj.Reloc{}
  1993  		}
  1994  		s.R = s.R[:0]
  1995  		s.P = s.P[:0]
  1996  		c = 0
  1997  		for p := s.Func.Text; p != nil; p = p.Link {
  1998  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  1999  				// pad everything to avoid crossing 32-byte boundary
  2000  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  2001  					c = naclpad(ctxt, s, c, -c&31)
  2002  				}
  2003  
  2004  				// pad call deferreturn to start at 32-byte boundary
  2005  				// so that subtracting 5 in jmpdefer will jump back
  2006  				// to that boundary and rerun the call.
  2007  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  2008  					c = naclpad(ctxt, s, c, -c&31)
  2009  				}
  2010  
  2011  				// pad call to end at 32-byte boundary
  2012  				if p.As == obj.ACALL {
  2013  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  2014  				}
  2015  
  2016  				// the linker treats REP and STOSQ as different instructions
  2017  				// but in fact the REP is a prefix on the STOSQ.
  2018  				// make sure REP has room for 2 more bytes, so that
  2019  				// padding will not be inserted before the next instruction.
  2020  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  2021  					c = naclpad(ctxt, s, c, -c&31)
  2022  				}
  2023  
  2024  				// same for LOCK.
  2025  				// various instructions follow; the longest is 4 bytes.
  2026  				// give ourselves 8 bytes so as to avoid surprises.
  2027  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  2028  					c = naclpad(ctxt, s, c, -c&31)
  2029  				}
  2030  			}
  2031  
  2032  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  2033  				// pad with NOPs
  2034  				v := -c & (LoopAlign - 1)
  2035  
  2036  				if v <= MaxLoopPad {
  2037  					s.Grow(int64(c) + int64(v))
  2038  					fillnop(s.P[c:], int(v))
  2039  					c += v
  2040  				}
  2041  			}
  2042  
  2043  			p.Pc = int64(c)
  2044  
  2045  			// process forward jumps to p
  2046  			for q = p.Rel; q != nil; q = q.Forwd {
  2047  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2048  				if q.Back&2 != 0 { // short
  2049  					if v > 127 {
  2050  						loop++
  2051  						q.Back ^= 2
  2052  					}
  2053  
  2054  					if q.As == AJCXZL || q.As == AXBEGIN {
  2055  						s.P[q.Pc+2] = byte(v)
  2056  					} else {
  2057  						s.P[q.Pc+1] = byte(v)
  2058  					}
  2059  				} else {
  2060  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2061  				}
  2062  			}
  2063  
  2064  			p.Rel = nil
  2065  
  2066  			p.Pc = int64(c)
  2067  			asmbuf.asmins(ctxt, s, p)
  2068  			m := asmbuf.Len()
  2069  			if int(p.Isize) != m {
  2070  				p.Isize = uint8(m)
  2071  				loop++
  2072  			}
  2073  
  2074  			s.Grow(p.Pc + int64(m))
  2075  			copy(s.P[p.Pc:], asmbuf.Bytes())
  2076  			c += int32(m)
  2077  		}
  2078  
  2079  		n++
  2080  		if n > 20 {
  2081  			ctxt.Diag("span must be looping")
  2082  			log.Fatalf("loop")
  2083  		}
  2084  		if loop == 0 {
  2085  			break
  2086  		}
  2087  		if ctxt.Errors > errors {
  2088  			return
  2089  		}
  2090  	}
  2091  
  2092  	if ctxt.Headtype == objabi.Hnacl {
  2093  		c = naclpad(ctxt, s, c, -c&31)
  2094  	}
  2095  
  2096  	s.Size = int64(c)
  2097  
  2098  	if false { /* debug['a'] > 1 */
  2099  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2100  		var i int
  2101  		for i = 0; i < len(s.P); i++ {
  2102  			fmt.Printf(" %.2x", s.P[i])
  2103  			if i%16 == 15 {
  2104  				fmt.Printf("\n  %.6x", uint(i+1))
  2105  			}
  2106  		}
  2107  
  2108  		if i%16 != 0 {
  2109  			fmt.Printf("\n")
  2110  		}
  2111  
  2112  		for i := 0; i < len(s.R); i++ {
  2113  			r := &s.R[i]
  2114  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2115  		}
  2116  	}
  2117  }
  2118  
  2119  func instinit(ctxt *obj.Link) {
  2120  	if ycover[0] != 0 {
  2121  		// Already initialized; stop now.
  2122  		// This happens in the cmd/asm tests,
  2123  		// each of which re-initializes the arch.
  2124  		return
  2125  	}
  2126  
  2127  	switch ctxt.Headtype {
  2128  	case objabi.Hplan9:
  2129  		plan9privates = ctxt.Lookup("_privates")
  2130  	case objabi.Hnacl:
  2131  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  2132  	}
  2133  
  2134  	for i := range vexOptab {
  2135  		c := vexOptab[i].as
  2136  		if opindex[c&obj.AMask] != nil {
  2137  			ctxt.Diag("phase error in vexOptab: %d (%v)", i, c)
  2138  		}
  2139  		opindex[c&obj.AMask] = &vexOptab[i]
  2140  	}
  2141  	for i := 1; optab[i].as != 0; i++ {
  2142  		c := optab[i].as
  2143  		if opindex[c&obj.AMask] != nil {
  2144  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2145  		}
  2146  		opindex[c&obj.AMask] = &optab[i]
  2147  	}
  2148  
  2149  	for i := 0; i < Ymax; i++ {
  2150  		ycover[i*Ymax+i] = 1
  2151  	}
  2152  
  2153  	ycover[Yi0*Ymax+Yu2] = 1
  2154  	ycover[Yi1*Ymax+Yu2] = 1
  2155  
  2156  	ycover[Yi0*Ymax+Yi8] = 1
  2157  	ycover[Yi1*Ymax+Yi8] = 1
  2158  	ycover[Yu2*Ymax+Yi8] = 1
  2159  	ycover[Yu7*Ymax+Yi8] = 1
  2160  
  2161  	ycover[Yi0*Ymax+Yu7] = 1
  2162  	ycover[Yi1*Ymax+Yu7] = 1
  2163  	ycover[Yu2*Ymax+Yu7] = 1
  2164  
  2165  	ycover[Yi0*Ymax+Yu8] = 1
  2166  	ycover[Yi1*Ymax+Yu8] = 1
  2167  	ycover[Yu2*Ymax+Yu8] = 1
  2168  	ycover[Yu7*Ymax+Yu8] = 1
  2169  
  2170  	ycover[Yi0*Ymax+Ys32] = 1
  2171  	ycover[Yi1*Ymax+Ys32] = 1
  2172  	ycover[Yu2*Ymax+Ys32] = 1
  2173  	ycover[Yu7*Ymax+Ys32] = 1
  2174  	ycover[Yu8*Ymax+Ys32] = 1
  2175  	ycover[Yi8*Ymax+Ys32] = 1
  2176  
  2177  	ycover[Yi0*Ymax+Yi32] = 1
  2178  	ycover[Yi1*Ymax+Yi32] = 1
  2179  	ycover[Yu2*Ymax+Yi32] = 1
  2180  	ycover[Yu7*Ymax+Yi32] = 1
  2181  	ycover[Yu8*Ymax+Yi32] = 1
  2182  	ycover[Yi8*Ymax+Yi32] = 1
  2183  	ycover[Ys32*Ymax+Yi32] = 1
  2184  
  2185  	ycover[Yi0*Ymax+Yi64] = 1
  2186  	ycover[Yi1*Ymax+Yi64] = 1
  2187  	ycover[Yu7*Ymax+Yi64] = 1
  2188  	ycover[Yu2*Ymax+Yi64] = 1
  2189  	ycover[Yu8*Ymax+Yi64] = 1
  2190  	ycover[Yi8*Ymax+Yi64] = 1
  2191  	ycover[Ys32*Ymax+Yi64] = 1
  2192  	ycover[Yi32*Ymax+Yi64] = 1
  2193  
  2194  	ycover[Yal*Ymax+Yrb] = 1
  2195  	ycover[Ycl*Ymax+Yrb] = 1
  2196  	ycover[Yax*Ymax+Yrb] = 1
  2197  	ycover[Ycx*Ymax+Yrb] = 1
  2198  	ycover[Yrx*Ymax+Yrb] = 1
  2199  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2200  
  2201  	ycover[Ycl*Ymax+Ycx] = 1
  2202  
  2203  	ycover[Yax*Ymax+Yrx] = 1
  2204  	ycover[Ycx*Ymax+Yrx] = 1
  2205  
  2206  	ycover[Yax*Ymax+Yrl] = 1
  2207  	ycover[Ycx*Ymax+Yrl] = 1
  2208  	ycover[Yrx*Ymax+Yrl] = 1
  2209  	ycover[Yrl32*Ymax+Yrl] = 1
  2210  
  2211  	ycover[Yf0*Ymax+Yrf] = 1
  2212  
  2213  	ycover[Yal*Ymax+Ymb] = 1
  2214  	ycover[Ycl*Ymax+Ymb] = 1
  2215  	ycover[Yax*Ymax+Ymb] = 1
  2216  	ycover[Ycx*Ymax+Ymb] = 1
  2217  	ycover[Yrx*Ymax+Ymb] = 1
  2218  	ycover[Yrb*Ymax+Ymb] = 1
  2219  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2220  	ycover[Ym*Ymax+Ymb] = 1
  2221  
  2222  	ycover[Yax*Ymax+Yml] = 1
  2223  	ycover[Ycx*Ymax+Yml] = 1
  2224  	ycover[Yrx*Ymax+Yml] = 1
  2225  	ycover[Yrl*Ymax+Yml] = 1
  2226  	ycover[Yrl32*Ymax+Yml] = 1
  2227  	ycover[Ym*Ymax+Yml] = 1
  2228  
  2229  	ycover[Yax*Ymax+Ymm] = 1
  2230  	ycover[Ycx*Ymax+Ymm] = 1
  2231  	ycover[Yrx*Ymax+Ymm] = 1
  2232  	ycover[Yrl*Ymax+Ymm] = 1
  2233  	ycover[Yrl32*Ymax+Ymm] = 1
  2234  	ycover[Ym*Ymax+Ymm] = 1
  2235  	ycover[Ymr*Ymax+Ymm] = 1
  2236  
  2237  	ycover[Ym*Ymax+Yxm] = 1
  2238  	ycover[Yxr*Ymax+Yxm] = 1
  2239  
  2240  	ycover[Ym*Ymax+Yym] = 1
  2241  	ycover[Yyr*Ymax+Yym] = 1
  2242  
  2243  	for i := 0; i < MAXREG; i++ {
  2244  		reg[i] = -1
  2245  		if i >= REG_AL && i <= REG_R15B {
  2246  			reg[i] = (i - REG_AL) & 7
  2247  			if i >= REG_SPB && i <= REG_DIB {
  2248  				regrex[i] = 0x40
  2249  			}
  2250  			if i >= REG_R8B && i <= REG_R15B {
  2251  				regrex[i] = Rxr | Rxx | Rxb
  2252  			}
  2253  		}
  2254  
  2255  		if i >= REG_AH && i <= REG_BH {
  2256  			reg[i] = 4 + ((i - REG_AH) & 7)
  2257  		}
  2258  		if i >= REG_AX && i <= REG_R15 {
  2259  			reg[i] = (i - REG_AX) & 7
  2260  			if i >= REG_R8 {
  2261  				regrex[i] = Rxr | Rxx | Rxb
  2262  			}
  2263  		}
  2264  
  2265  		if i >= REG_F0 && i <= REG_F0+7 {
  2266  			reg[i] = (i - REG_F0) & 7
  2267  		}
  2268  		if i >= REG_M0 && i <= REG_M0+7 {
  2269  			reg[i] = (i - REG_M0) & 7
  2270  		}
  2271  		if i >= REG_X0 && i <= REG_X0+15 {
  2272  			reg[i] = (i - REG_X0) & 7
  2273  			if i >= REG_X0+8 {
  2274  				regrex[i] = Rxr | Rxx | Rxb
  2275  			}
  2276  		}
  2277  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2278  			reg[i] = (i - REG_Y0) & 7
  2279  			if i >= REG_Y0+8 {
  2280  				regrex[i] = Rxr | Rxx | Rxb
  2281  			}
  2282  		}
  2283  
  2284  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2285  			regrex[i] = Rxr
  2286  		}
  2287  	}
  2288  }
  2289  
  2290  var isAndroid = (objabi.GOOS == "android")
  2291  
  2292  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2293  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2294  		return 0
  2295  	}
  2296  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2297  		switch a.Reg {
  2298  		case REG_CS:
  2299  			return 0x2e
  2300  
  2301  		case REG_DS:
  2302  			return 0x3e
  2303  
  2304  		case REG_ES:
  2305  			return 0x26
  2306  
  2307  		case REG_FS:
  2308  			return 0x64
  2309  
  2310  		case REG_GS:
  2311  			return 0x65
  2312  
  2313  		case REG_TLS:
  2314  			// NOTE: Systems listed here should be only systems that
  2315  			// support direct TLS references like 8(TLS) implemented as
  2316  			// direct references from FS or GS. Systems that require
  2317  			// the initial-exec model, where you load the TLS base into
  2318  			// a register and then index from that register, do not reach
  2319  			// this code and should not be listed.
  2320  			if ctxt.Arch.Family == sys.I386 {
  2321  				switch ctxt.Headtype {
  2322  				default:
  2323  					if isAndroid {
  2324  						return 0x65 // GS
  2325  					}
  2326  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2327  
  2328  				case objabi.Hdarwin,
  2329  					objabi.Hdragonfly,
  2330  					objabi.Hfreebsd,
  2331  					objabi.Hnetbsd,
  2332  					objabi.Hopenbsd:
  2333  					return 0x65 // GS
  2334  				}
  2335  			}
  2336  
  2337  			switch ctxt.Headtype {
  2338  			default:
  2339  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2340  
  2341  			case objabi.Hlinux:
  2342  				if isAndroid {
  2343  					return 0x64 // FS
  2344  				}
  2345  
  2346  				if ctxt.Flag_shared {
  2347  					log.Fatalf("unknown TLS base register for linux with -shared")
  2348  				} else {
  2349  					return 0x64 // FS
  2350  				}
  2351  
  2352  			case objabi.Hdragonfly,
  2353  				objabi.Hfreebsd,
  2354  				objabi.Hnetbsd,
  2355  				objabi.Hopenbsd,
  2356  				objabi.Hsolaris:
  2357  				return 0x64 // FS
  2358  
  2359  			case objabi.Hdarwin:
  2360  				return 0x65 // GS
  2361  			}
  2362  		}
  2363  	}
  2364  
  2365  	if ctxt.Arch.Family == sys.I386 {
  2366  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2367  			// When building for inclusion into a shared library, an instruction of the form
  2368  			//     MOVL 0(CX)(TLS*1), AX
  2369  			// becomes
  2370  			//     mov %gs:(%ecx), %eax
  2371  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2372  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2373  			// a shared library the instruction it becomes
  2374  			//     mov 0x0(%ecx), $eax
  2375  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2376  			if a.Offset != 0 {
  2377  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2378  			}
  2379  			return 0x65 // GS
  2380  		}
  2381  		return 0
  2382  	}
  2383  
  2384  	switch a.Index {
  2385  	case REG_CS:
  2386  		return 0x2e
  2387  
  2388  	case REG_DS:
  2389  		return 0x3e
  2390  
  2391  	case REG_ES:
  2392  		return 0x26
  2393  
  2394  	case REG_TLS:
  2395  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2396  			// When building for inclusion into a shared library, an instruction of the form
  2397  			//     MOV 0(CX)(TLS*1), AX
  2398  			// becomes
  2399  			//     mov %fs:(%rcx), %rax
  2400  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2401  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2402  			// a shared library the instruction does not require a prefix.
  2403  			if a.Offset != 0 {
  2404  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2405  			}
  2406  			return 0x64
  2407  		}
  2408  
  2409  	case REG_FS:
  2410  		return 0x64
  2411  
  2412  	case REG_GS:
  2413  		return 0x65
  2414  	}
  2415  
  2416  	return 0
  2417  }
  2418  
  2419  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2420  	switch a.Type {
  2421  	case obj.TYPE_NONE:
  2422  		return Ynone
  2423  
  2424  	case obj.TYPE_BRANCH:
  2425  		return Ybr
  2426  
  2427  	case obj.TYPE_INDIR:
  2428  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2429  			return Yindir
  2430  		}
  2431  		return Yxxx
  2432  
  2433  	case obj.TYPE_MEM:
  2434  		if a.Index == REG_SP {
  2435  			// Can't use SP as the index register
  2436  			return Yxxx
  2437  		}
  2438  		if ctxt.Arch.Family == sys.AMD64 {
  2439  			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
  2440  			// where the sign extension doesn't matter).
  2441  			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
  2442  			if !(a.Offset == int64(int32(a.Offset)) ||
  2443  				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
  2444  				return Yxxx
  2445  			}
  2446  			switch a.Name {
  2447  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2448  				// Global variables can't use index registers and their
  2449  				// base register is %rip (%rip is encoded as REG_NONE).
  2450  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2451  					return Yxxx
  2452  				}
  2453  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2454  				// These names must have a base of SP.  The old compiler
  2455  				// uses 0 for the base register. SSA uses REG_SP.
  2456  				if a.Reg != REG_SP && a.Reg != 0 {
  2457  					return Yxxx
  2458  				}
  2459  			case obj.NAME_NONE:
  2460  				// everything is ok
  2461  			default:
  2462  				// unknown name
  2463  				return Yxxx
  2464  			}
  2465  		}
  2466  		return Ym
  2467  
  2468  	case obj.TYPE_ADDR:
  2469  		switch a.Name {
  2470  		case obj.NAME_GOTREF:
  2471  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2472  			return Yxxx
  2473  
  2474  		case obj.NAME_EXTERN,
  2475  			obj.NAME_STATIC:
  2476  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2477  				return Yi32
  2478  			}
  2479  			return Yiauto // use pc-relative addressing
  2480  
  2481  		case obj.NAME_AUTO,
  2482  			obj.NAME_PARAM:
  2483  			return Yiauto
  2484  		}
  2485  
  2486  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2487  		// and got Yi32 in an earlier version of this code.
  2488  		// Keep doing that until we fix yduff etc.
  2489  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2490  			return Yi32
  2491  		}
  2492  
  2493  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2494  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2495  		}
  2496  		fallthrough
  2497  
  2498  		// fall through
  2499  
  2500  	case obj.TYPE_CONST:
  2501  		if a.Sym != nil {
  2502  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2503  		}
  2504  
  2505  		v := a.Offset
  2506  		if ctxt.Arch.Family == sys.I386 {
  2507  			v = int64(int32(v))
  2508  		}
  2509  		if v == 0 {
  2510  			return Yi0
  2511  		}
  2512  		if v == 1 {
  2513  			return Yi1
  2514  		}
  2515  		if v >= 0 && v <= 3 {
  2516  			return Yu2
  2517  		}
  2518  		if v >= 0 && v <= 127 {
  2519  			return Yu7
  2520  		}
  2521  		if v >= 0 && v <= 255 {
  2522  			return Yu8
  2523  		}
  2524  		if v >= -128 && v <= 127 {
  2525  			return Yi8
  2526  		}
  2527  		if ctxt.Arch.Family == sys.I386 {
  2528  			return Yi32
  2529  		}
  2530  		l := int32(v)
  2531  		if int64(l) == v {
  2532  			return Ys32 /* can sign extend */
  2533  		}
  2534  		if v>>32 == 0 {
  2535  			return Yi32 /* unsigned */
  2536  		}
  2537  		return Yi64
  2538  
  2539  	case obj.TYPE_TEXTSIZE:
  2540  		return Ytextsize
  2541  	}
  2542  
  2543  	if a.Type != obj.TYPE_REG {
  2544  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2545  		return Yxxx
  2546  	}
  2547  
  2548  	switch a.Reg {
  2549  	case REG_AL:
  2550  		return Yal
  2551  
  2552  	case REG_AX:
  2553  		return Yax
  2554  
  2555  		/*
  2556  			case REG_SPB:
  2557  		*/
  2558  	case REG_BPB,
  2559  		REG_SIB,
  2560  		REG_DIB,
  2561  		REG_R8B,
  2562  		REG_R9B,
  2563  		REG_R10B,
  2564  		REG_R11B,
  2565  		REG_R12B,
  2566  		REG_R13B,
  2567  		REG_R14B,
  2568  		REG_R15B:
  2569  		if ctxt.Arch.Family == sys.I386 {
  2570  			return Yxxx
  2571  		}
  2572  		fallthrough
  2573  
  2574  	case REG_DL,
  2575  		REG_BL,
  2576  		REG_AH,
  2577  		REG_CH,
  2578  		REG_DH,
  2579  		REG_BH:
  2580  		return Yrb
  2581  
  2582  	case REG_CL:
  2583  		return Ycl
  2584  
  2585  	case REG_CX:
  2586  		return Ycx
  2587  
  2588  	case REG_DX, REG_BX:
  2589  		return Yrx
  2590  
  2591  	case REG_R8, /* not really Yrl */
  2592  		REG_R9,
  2593  		REG_R10,
  2594  		REG_R11,
  2595  		REG_R12,
  2596  		REG_R13,
  2597  		REG_R14,
  2598  		REG_R15:
  2599  		if ctxt.Arch.Family == sys.I386 {
  2600  			return Yxxx
  2601  		}
  2602  		fallthrough
  2603  
  2604  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2605  		if ctxt.Arch.Family == sys.I386 {
  2606  			return Yrl32
  2607  		}
  2608  		return Yrl
  2609  
  2610  	case REG_F0 + 0:
  2611  		return Yf0
  2612  
  2613  	case REG_F0 + 1,
  2614  		REG_F0 + 2,
  2615  		REG_F0 + 3,
  2616  		REG_F0 + 4,
  2617  		REG_F0 + 5,
  2618  		REG_F0 + 6,
  2619  		REG_F0 + 7:
  2620  		return Yrf
  2621  
  2622  	case REG_M0 + 0,
  2623  		REG_M0 + 1,
  2624  		REG_M0 + 2,
  2625  		REG_M0 + 3,
  2626  		REG_M0 + 4,
  2627  		REG_M0 + 5,
  2628  		REG_M0 + 6,
  2629  		REG_M0 + 7:
  2630  		return Ymr
  2631  
  2632  	case REG_X0 + 0,
  2633  		REG_X0 + 1,
  2634  		REG_X0 + 2,
  2635  		REG_X0 + 3,
  2636  		REG_X0 + 4,
  2637  		REG_X0 + 5,
  2638  		REG_X0 + 6,
  2639  		REG_X0 + 7,
  2640  		REG_X0 + 8,
  2641  		REG_X0 + 9,
  2642  		REG_X0 + 10,
  2643  		REG_X0 + 11,
  2644  		REG_X0 + 12,
  2645  		REG_X0 + 13,
  2646  		REG_X0 + 14,
  2647  		REG_X0 + 15:
  2648  		return Yxr
  2649  
  2650  	case REG_Y0 + 0,
  2651  		REG_Y0 + 1,
  2652  		REG_Y0 + 2,
  2653  		REG_Y0 + 3,
  2654  		REG_Y0 + 4,
  2655  		REG_Y0 + 5,
  2656  		REG_Y0 + 6,
  2657  		REG_Y0 + 7,
  2658  		REG_Y0 + 8,
  2659  		REG_Y0 + 9,
  2660  		REG_Y0 + 10,
  2661  		REG_Y0 + 11,
  2662  		REG_Y0 + 12,
  2663  		REG_Y0 + 13,
  2664  		REG_Y0 + 14,
  2665  		REG_Y0 + 15:
  2666  		return Yyr
  2667  
  2668  	case REG_CS:
  2669  		return Ycs
  2670  	case REG_SS:
  2671  		return Yss
  2672  	case REG_DS:
  2673  		return Yds
  2674  	case REG_ES:
  2675  		return Yes
  2676  	case REG_FS:
  2677  		return Yfs
  2678  	case REG_GS:
  2679  		return Ygs
  2680  	case REG_TLS:
  2681  		return Ytls
  2682  
  2683  	case REG_GDTR:
  2684  		return Ygdtr
  2685  	case REG_IDTR:
  2686  		return Yidtr
  2687  	case REG_LDTR:
  2688  		return Yldtr
  2689  	case REG_MSW:
  2690  		return Ymsw
  2691  	case REG_TASK:
  2692  		return Ytask
  2693  
  2694  	case REG_CR + 0:
  2695  		return Ycr0
  2696  	case REG_CR + 1:
  2697  		return Ycr1
  2698  	case REG_CR + 2:
  2699  		return Ycr2
  2700  	case REG_CR + 3:
  2701  		return Ycr3
  2702  	case REG_CR + 4:
  2703  		return Ycr4
  2704  	case REG_CR + 5:
  2705  		return Ycr5
  2706  	case REG_CR + 6:
  2707  		return Ycr6
  2708  	case REG_CR + 7:
  2709  		return Ycr7
  2710  	case REG_CR + 8:
  2711  		return Ycr8
  2712  
  2713  	case REG_DR + 0:
  2714  		return Ydr0
  2715  	case REG_DR + 1:
  2716  		return Ydr1
  2717  	case REG_DR + 2:
  2718  		return Ydr2
  2719  	case REG_DR + 3:
  2720  		return Ydr3
  2721  	case REG_DR + 4:
  2722  		return Ydr4
  2723  	case REG_DR + 5:
  2724  		return Ydr5
  2725  	case REG_DR + 6:
  2726  		return Ydr6
  2727  	case REG_DR + 7:
  2728  		return Ydr7
  2729  
  2730  	case REG_TR + 0:
  2731  		return Ytr0
  2732  	case REG_TR + 1:
  2733  		return Ytr1
  2734  	case REG_TR + 2:
  2735  		return Ytr2
  2736  	case REG_TR + 3:
  2737  		return Ytr3
  2738  	case REG_TR + 4:
  2739  		return Ytr4
  2740  	case REG_TR + 5:
  2741  		return Ytr5
  2742  	case REG_TR + 6:
  2743  		return Ytr6
  2744  	case REG_TR + 7:
  2745  		return Ytr7
  2746  	}
  2747  
  2748  	return Yxxx
  2749  }
  2750  
  2751  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2752  // and hold assembly state.
  2753  type AsmBuf struct {
  2754  	buf     [100]byte
  2755  	off     int
  2756  	rexflag int
  2757  	vexflag int
  2758  	rep     int
  2759  	repn    int
  2760  	lock    bool
  2761  }
  2762  
  2763  // Put1 appends one byte to the end of the buffer.
  2764  func (a *AsmBuf) Put1(x byte) {
  2765  	a.buf[a.off] = x
  2766  	a.off++
  2767  }
  2768  
  2769  // Put2 appends two bytes to the end of the buffer.
  2770  func (a *AsmBuf) Put2(x, y byte) {
  2771  	a.buf[a.off+0] = x
  2772  	a.buf[a.off+1] = y
  2773  	a.off += 2
  2774  }
  2775  
  2776  // Put3 appends three bytes to the end of the buffer.
  2777  func (a *AsmBuf) Put3(x, y, z byte) {
  2778  	a.buf[a.off+0] = x
  2779  	a.buf[a.off+1] = y
  2780  	a.buf[a.off+2] = z
  2781  	a.off += 3
  2782  }
  2783  
  2784  // Put4 appends four bytes to the end of the buffer.
  2785  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2786  	a.buf[a.off+0] = x
  2787  	a.buf[a.off+1] = y
  2788  	a.buf[a.off+2] = z
  2789  	a.buf[a.off+3] = w
  2790  	a.off += 4
  2791  }
  2792  
  2793  // PutInt16 writes v into the buffer using little-endian encoding.
  2794  func (a *AsmBuf) PutInt16(v int16) {
  2795  	a.buf[a.off+0] = byte(v)
  2796  	a.buf[a.off+1] = byte(v >> 8)
  2797  	a.off += 2
  2798  }
  2799  
  2800  // PutInt32 writes v into the buffer using little-endian encoding.
  2801  func (a *AsmBuf) PutInt32(v int32) {
  2802  	a.buf[a.off+0] = byte(v)
  2803  	a.buf[a.off+1] = byte(v >> 8)
  2804  	a.buf[a.off+2] = byte(v >> 16)
  2805  	a.buf[a.off+3] = byte(v >> 24)
  2806  	a.off += 4
  2807  }
  2808  
  2809  // PutInt64 writes v into the buffer using little-endian encoding.
  2810  func (a *AsmBuf) PutInt64(v int64) {
  2811  	a.buf[a.off+0] = byte(v)
  2812  	a.buf[a.off+1] = byte(v >> 8)
  2813  	a.buf[a.off+2] = byte(v >> 16)
  2814  	a.buf[a.off+3] = byte(v >> 24)
  2815  	a.buf[a.off+4] = byte(v >> 32)
  2816  	a.buf[a.off+5] = byte(v >> 40)
  2817  	a.buf[a.off+6] = byte(v >> 48)
  2818  	a.buf[a.off+7] = byte(v >> 56)
  2819  	a.off += 8
  2820  }
  2821  
  2822  // Put copies b into the buffer.
  2823  func (a *AsmBuf) Put(b []byte) {
  2824  	copy(a.buf[a.off:], b)
  2825  	a.off += len(b)
  2826  }
  2827  
  2828  // Insert inserts b at offset i.
  2829  func (a *AsmBuf) Insert(i int, b byte) {
  2830  	a.off++
  2831  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2832  	a.buf[i] = b
  2833  }
  2834  
  2835  // Last returns the byte at the end of the buffer.
  2836  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2837  
  2838  // Len returns the length of the buffer.
  2839  func (a *AsmBuf) Len() int { return a.off }
  2840  
  2841  // Bytes returns the contents of the buffer.
  2842  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2843  
  2844  // Reset empties the buffer.
  2845  func (a *AsmBuf) Reset() { a.off = 0 }
  2846  
  2847  // At returns the byte at offset i.
  2848  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2849  
  2850  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2851  	var i int
  2852  
  2853  	switch index {
  2854  	default:
  2855  		goto bad
  2856  
  2857  	case REG_NONE:
  2858  		i = 4 << 3
  2859  		goto bas
  2860  
  2861  	case REG_R8,
  2862  		REG_R9,
  2863  		REG_R10,
  2864  		REG_R11,
  2865  		REG_R12,
  2866  		REG_R13,
  2867  		REG_R14,
  2868  		REG_R15:
  2869  		if ctxt.Arch.Family == sys.I386 {
  2870  			goto bad
  2871  		}
  2872  		fallthrough
  2873  
  2874  	case REG_AX,
  2875  		REG_CX,
  2876  		REG_DX,
  2877  		REG_BX,
  2878  		REG_BP,
  2879  		REG_SI,
  2880  		REG_DI:
  2881  		i = reg[index] << 3
  2882  	}
  2883  
  2884  	switch scale {
  2885  	default:
  2886  		goto bad
  2887  
  2888  	case 1:
  2889  		break
  2890  
  2891  	case 2:
  2892  		i |= 1 << 6
  2893  
  2894  	case 4:
  2895  		i |= 2 << 6
  2896  
  2897  	case 8:
  2898  		i |= 3 << 6
  2899  	}
  2900  
  2901  bas:
  2902  	switch base {
  2903  	default:
  2904  		goto bad
  2905  
  2906  	case REG_NONE: /* must be mod=00 */
  2907  		i |= 5
  2908  
  2909  	case REG_R8,
  2910  		REG_R9,
  2911  		REG_R10,
  2912  		REG_R11,
  2913  		REG_R12,
  2914  		REG_R13,
  2915  		REG_R14,
  2916  		REG_R15:
  2917  		if ctxt.Arch.Family == sys.I386 {
  2918  			goto bad
  2919  		}
  2920  		fallthrough
  2921  
  2922  	case REG_AX,
  2923  		REG_CX,
  2924  		REG_DX,
  2925  		REG_BX,
  2926  		REG_SP,
  2927  		REG_BP,
  2928  		REG_SI,
  2929  		REG_DI:
  2930  		i |= reg[base]
  2931  	}
  2932  
  2933  	asmbuf.Put1(byte(i))
  2934  	return
  2935  
  2936  bad:
  2937  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2938  	asmbuf.Put1(0)
  2939  	return
  2940  }
  2941  
  2942  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  2943  	var rel obj.Reloc
  2944  
  2945  	v := vaddr(ctxt, p, a, &rel)
  2946  	if rel.Siz != 0 {
  2947  		if rel.Siz != 4 {
  2948  			ctxt.Diag("bad reloc")
  2949  		}
  2950  		r := obj.Addrel(cursym)
  2951  		*r = rel
  2952  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  2953  	}
  2954  
  2955  	asmbuf.PutInt32(int32(v))
  2956  }
  2957  
  2958  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2959  	if r != nil {
  2960  		*r = obj.Reloc{}
  2961  	}
  2962  
  2963  	switch a.Name {
  2964  	case obj.NAME_STATIC,
  2965  		obj.NAME_GOTREF,
  2966  		obj.NAME_EXTERN:
  2967  		s := a.Sym
  2968  		if r == nil {
  2969  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2970  			log.Fatalf("reloc")
  2971  		}
  2972  
  2973  		if a.Name == obj.NAME_GOTREF {
  2974  			r.Siz = 4
  2975  			r.Type = objabi.R_GOTPCREL
  2976  		} else if useAbs(ctxt, s) {
  2977  			r.Siz = 4
  2978  			r.Type = objabi.R_ADDR
  2979  		} else {
  2980  			r.Siz = 4
  2981  			r.Type = objabi.R_PCREL
  2982  		}
  2983  
  2984  		r.Off = -1 // caller must fill in
  2985  		r.Sym = s
  2986  		r.Add = a.Offset
  2987  
  2988  		return 0
  2989  	}
  2990  
  2991  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2992  		if r == nil {
  2993  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2994  			log.Fatalf("reloc")
  2995  		}
  2996  
  2997  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  2998  			r.Type = objabi.R_TLS_LE
  2999  			r.Siz = 4
  3000  			r.Off = -1 // caller must fill in
  3001  			r.Add = a.Offset
  3002  		}
  3003  		return 0
  3004  	}
  3005  
  3006  	return a.Offset
  3007  }
  3008  
  3009  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3010  	var base int
  3011  	var rel obj.Reloc
  3012  
  3013  	rex &= 0x40 | Rxr
  3014  	switch {
  3015  	case int64(int32(a.Offset)) == a.Offset:
  3016  		// Offset fits in sign-extended 32 bits.
  3017  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  3018  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  3019  		// This is allowed for assembly that wants to use 32-bit hex
  3020  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3021  	default:
  3022  		ctxt.Diag("offset too large in %s", p)
  3023  	}
  3024  	v := int32(a.Offset)
  3025  	rel.Siz = 0
  3026  
  3027  	switch a.Type {
  3028  	case obj.TYPE_ADDR:
  3029  		if a.Name == obj.NAME_NONE {
  3030  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3031  		}
  3032  		if a.Index == REG_TLS {
  3033  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3034  		}
  3035  		goto bad
  3036  
  3037  	case obj.TYPE_REG:
  3038  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  3039  			goto bad
  3040  		}
  3041  		if v != 0 {
  3042  			goto bad
  3043  		}
  3044  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3045  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3046  		return
  3047  	}
  3048  
  3049  	if a.Type != obj.TYPE_MEM {
  3050  		goto bad
  3051  	}
  3052  
  3053  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3054  		base := int(a.Reg)
  3055  		switch a.Name {
  3056  		case obj.NAME_EXTERN,
  3057  			obj.NAME_GOTREF,
  3058  			obj.NAME_STATIC:
  3059  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3060  				goto bad
  3061  			}
  3062  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3063  				// The base register has already been set. It holds the PC
  3064  				// of this instruction returned by a PC-reading thunk.
  3065  				// See obj6.go:rewriteToPcrel.
  3066  			} else {
  3067  				base = REG_NONE
  3068  			}
  3069  			v = int32(vaddr(ctxt, p, a, &rel))
  3070  
  3071  		case obj.NAME_AUTO,
  3072  			obj.NAME_PARAM:
  3073  			base = REG_SP
  3074  		}
  3075  
  3076  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3077  		if base == REG_NONE {
  3078  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  3079  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3080  			goto putrelv
  3081  		}
  3082  
  3083  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3084  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  3085  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3086  			return
  3087  		}
  3088  
  3089  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3090  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  3091  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3092  			asmbuf.Put1(byte(v))
  3093  			return
  3094  		}
  3095  
  3096  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  3097  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3098  		goto putrelv
  3099  	}
  3100  
  3101  	base = int(a.Reg)
  3102  	switch a.Name {
  3103  	case obj.NAME_STATIC,
  3104  		obj.NAME_GOTREF,
  3105  		obj.NAME_EXTERN:
  3106  		if a.Sym == nil {
  3107  			ctxt.Diag("bad addr: %v", p)
  3108  		}
  3109  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3110  			// The base register has already been set. It holds the PC
  3111  			// of this instruction returned by a PC-reading thunk.
  3112  			// See obj6.go:rewriteToPcrel.
  3113  		} else {
  3114  			base = REG_NONE
  3115  		}
  3116  		v = int32(vaddr(ctxt, p, a, &rel))
  3117  
  3118  	case obj.NAME_AUTO,
  3119  		obj.NAME_PARAM:
  3120  		base = REG_SP
  3121  	}
  3122  
  3123  	if base == REG_TLS {
  3124  		v = int32(vaddr(ctxt, p, a, &rel))
  3125  	}
  3126  
  3127  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  3128  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3129  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3130  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3131  				ctxt.Diag("%v has offset against gotref", p)
  3132  			}
  3133  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  3134  			goto putrelv
  3135  		}
  3136  
  3137  		// temporary
  3138  		asmbuf.Put2(
  3139  			byte(0<<6|4<<0|r<<3), // sib present
  3140  			0<<6|4<<3|5<<0,       // DS:d32
  3141  		)
  3142  		goto putrelv
  3143  	}
  3144  
  3145  	if base == REG_SP || base == REG_R12 {
  3146  		if v == 0 {
  3147  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3148  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3149  			return
  3150  		}
  3151  
  3152  		if v >= -128 && v < 128 {
  3153  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3154  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3155  			asmbuf.Put1(byte(v))
  3156  			return
  3157  		}
  3158  
  3159  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3160  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3161  		goto putrelv
  3162  	}
  3163  
  3164  	if REG_AX <= base && base <= REG_R15 {
  3165  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3166  			rel = obj.Reloc{}
  3167  			rel.Type = objabi.R_TLS_LE
  3168  			rel.Siz = 4
  3169  			rel.Sym = nil
  3170  			rel.Add = int64(v)
  3171  			v = 0
  3172  		}
  3173  
  3174  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3175  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3176  			return
  3177  		}
  3178  
  3179  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3180  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3181  			return
  3182  		}
  3183  
  3184  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3185  		goto putrelv
  3186  	}
  3187  
  3188  	goto bad
  3189  
  3190  putrelv:
  3191  	if rel.Siz != 0 {
  3192  		if rel.Siz != 4 {
  3193  			ctxt.Diag("bad rel")
  3194  			goto bad
  3195  		}
  3196  
  3197  		r := obj.Addrel(cursym)
  3198  		*r = rel
  3199  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3200  	}
  3201  
  3202  	asmbuf.PutInt32(v)
  3203  	return
  3204  
  3205  bad:
  3206  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3207  	return
  3208  }
  3209  
  3210  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3211  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3212  }
  3213  
  3214  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3215  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3216  }
  3217  
  3218  func bytereg(a *obj.Addr, t *uint8) {
  3219  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3220  		a.Reg += REG_AL - REG_AX
  3221  		*t = 0
  3222  	}
  3223  }
  3224  
  3225  func unbytereg(a *obj.Addr, t *uint8) {
  3226  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3227  		a.Reg += REG_AX - REG_AL
  3228  		*t = 0
  3229  	}
  3230  }
  3231  
  3232  const (
  3233  	E = 0xff
  3234  )
  3235  
  3236  var ymovtab = []Movtab{
  3237  	/* push */
  3238  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3239  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3240  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3241  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3242  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3243  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3244  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3245  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3246  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3247  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3248  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3249  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3250  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3251  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3252  
  3253  	/* pop */
  3254  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3255  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3256  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3257  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3258  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3259  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3260  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3261  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3262  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3263  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3264  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3265  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3266  
  3267  	/* mov seg */
  3268  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3269  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3270  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3271  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3272  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3273  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3274  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3275  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3276  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3277  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3278  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3279  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3280  
  3281  	/* mov cr */
  3282  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3283  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3284  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3285  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3286  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3287  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3288  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3289  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3290  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3291  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3292  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3293  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3294  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3295  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3296  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3297  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3298  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3299  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3300  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3301  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3302  
  3303  	/* mov dr */
  3304  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3305  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3306  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3307  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3308  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3309  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3310  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3311  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3312  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3313  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3314  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3315  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3316  
  3317  	/* mov tr */
  3318  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3319  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3320  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3321  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3322  
  3323  	/* lgdt, sgdt, lidt, sidt */
  3324  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3325  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3326  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3327  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3328  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3329  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3330  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3331  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3332  
  3333  	/* lldt, sldt */
  3334  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3335  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3336  
  3337  	/* lmsw, smsw */
  3338  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3339  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3340  
  3341  	/* ltr, str */
  3342  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3343  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3344  
  3345  	/* load full pointer - unsupported
  3346  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3347  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3348  	*/
  3349  
  3350  	/* double shift */
  3351  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3352  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3353  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3354  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3355  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3356  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3357  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3358  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3359  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3360  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3361  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3362  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3363  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3364  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3365  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3366  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3367  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3368  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3369  
  3370  	/* load TLS base */
  3371  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3372  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3373  	{0, 0, 0, 0, 0, [4]uint8{}},
  3374  }
  3375  
  3376  func isax(a *obj.Addr) bool {
  3377  	switch a.Reg {
  3378  	case REG_AX, REG_AL, REG_AH:
  3379  		return true
  3380  	}
  3381  
  3382  	if a.Index == REG_AX {
  3383  		return true
  3384  	}
  3385  	return false
  3386  }
  3387  
  3388  func subreg(p *obj.Prog, from int, to int) {
  3389  	if false { /* debug['Q'] */
  3390  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3391  	}
  3392  
  3393  	if int(p.From.Reg) == from {
  3394  		p.From.Reg = int16(to)
  3395  		p.Ft = 0
  3396  	}
  3397  
  3398  	if int(p.To.Reg) == from {
  3399  		p.To.Reg = int16(to)
  3400  		p.Tt = 0
  3401  	}
  3402  
  3403  	if int(p.From.Index) == from {
  3404  		p.From.Index = int16(to)
  3405  		p.Ft = 0
  3406  	}
  3407  
  3408  	if int(p.To.Index) == from {
  3409  		p.To.Index = int16(to)
  3410  		p.Tt = 0
  3411  	}
  3412  
  3413  	if false { /* debug['Q'] */
  3414  		fmt.Printf("%v\n", p)
  3415  	}
  3416  }
  3417  
  3418  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3419  	switch op {
  3420  	case Pm, Pe, Pf2, Pf3:
  3421  		if osize != 1 {
  3422  			if op != Pm {
  3423  				asmbuf.Put1(byte(op))
  3424  			}
  3425  			asmbuf.Put1(Pm)
  3426  			z++
  3427  			op = int(o.op[z])
  3428  			break
  3429  		}
  3430  		fallthrough
  3431  
  3432  	default:
  3433  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3434  			asmbuf.Put1(Pm)
  3435  		}
  3436  	}
  3437  
  3438  	asmbuf.Put1(byte(op))
  3439  	return z
  3440  }
  3441  
  3442  var bpduff1 = []byte{
  3443  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3444  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3445  }
  3446  
  3447  var bpduff2 = []byte{
  3448  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3449  }
  3450  
  3451  // Emit VEX prefix and opcode byte.
  3452  // The three addresses are the r/m, vvvv, and reg fields.
  3453  // The reg and rm arguments appear in the same order as the
  3454  // arguments to asmand, which typically follows the call to asmvex.
  3455  // The final two arguments are the VEX prefix (see encoding above)
  3456  // and the opcode byte.
  3457  // For details about vex prefix see:
  3458  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3459  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3460  	asmbuf.vexflag = 1
  3461  	rexR := 0
  3462  	if r != nil {
  3463  		rexR = regrex[r.Reg] & Rxr
  3464  	}
  3465  	rexB := 0
  3466  	rexX := 0
  3467  	if rm != nil {
  3468  		rexB = regrex[rm.Reg] & Rxb
  3469  		rexX = regrex[rm.Index] & Rxx
  3470  	}
  3471  	vexM := (vex >> 3) & 0xF
  3472  	vexWLP := vex & 0x87
  3473  	vexV := byte(0)
  3474  	if v != nil {
  3475  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3476  	}
  3477  	vexV ^= 0xF
  3478  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3479  		// Can use 2-byte encoding.
  3480  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3481  	} else {
  3482  		// Must use 3-byte encoding.
  3483  		asmbuf.Put3(0xc4,
  3484  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3485  			vexV<<3|vexWLP,
  3486  		)
  3487  	}
  3488  	asmbuf.Put1(opcode)
  3489  }
  3490  
  3491  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3492  	o := opindex[p.As&obj.AMask]
  3493  
  3494  	if o == nil {
  3495  		ctxt.Diag("asmins: missing op %v", p)
  3496  		return
  3497  	}
  3498  
  3499  	pre := prefixof(ctxt, p, &p.From)
  3500  	if pre != 0 {
  3501  		asmbuf.Put1(byte(pre))
  3502  	}
  3503  	pre = prefixof(ctxt, p, &p.To)
  3504  	if pre != 0 {
  3505  		asmbuf.Put1(byte(pre))
  3506  	}
  3507  
  3508  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3509  	// which encodes as SHRQ $32(DX*0), AX.
  3510  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3511  	// Change encoding generated by assemblers and compilers and remove.
  3512  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3513  		p.SetFrom3(obj.Addr{
  3514  			Type: obj.TYPE_REG,
  3515  			Reg:  p.From.Index,
  3516  		})
  3517  		p.From.Index = 0
  3518  	}
  3519  
  3520  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3521  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3522  	switch p.As {
  3523  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3524  		if p.From3Type() == obj.TYPE_NONE {
  3525  			p.SetFrom3(p.From)
  3526  			p.From = obj.Addr{}
  3527  			p.From.Type = obj.TYPE_CONST
  3528  			p.From.Offset = p.To.Offset
  3529  			p.To.Offset = 0
  3530  		}
  3531  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3532  		if p.From3Type() == obj.TYPE_NONE {
  3533  			p.SetFrom3(p.To)
  3534  			p.To = obj.Addr{}
  3535  			p.To.Type = obj.TYPE_CONST
  3536  			p.To.Offset = p.GetFrom3().Offset
  3537  			p.GetFrom3().Offset = 0
  3538  		}
  3539  	}
  3540  
  3541  	if p.Ft == 0 {
  3542  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3543  	}
  3544  	if p.Tt == 0 {
  3545  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3546  	}
  3547  
  3548  	ft := int(p.Ft) * Ymax
  3549  	var f3t int
  3550  	tt := int(p.Tt) * Ymax
  3551  
  3552  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3553  	z := 0
  3554  	var a *obj.Addr
  3555  	var l int
  3556  	var op int
  3557  	var q *obj.Prog
  3558  	var r *obj.Reloc
  3559  	var rel obj.Reloc
  3560  	var v int64
  3561  
  3562  	args := make([]int, 0, 6)
  3563  	if ft != Ynone*Ymax {
  3564  		args = append(args, ft)
  3565  	}
  3566  	for i := range p.RestArgs {
  3567  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  3568  	}
  3569  	if tt != Ynone*Ymax {
  3570  		args = append(args, tt)
  3571  	}
  3572  
  3573  	for _, yt := range o.ytab {
  3574  		if !yt.match(args) {
  3575  			z += int(yt.zoffset) + xo
  3576  		} else {
  3577  			switch o.prefix {
  3578  			case Px1: /* first option valid only in 32-bit mode */
  3579  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3580  					z += int(yt.zoffset) + xo
  3581  					continue
  3582  				}
  3583  			case Pq: /* 16 bit escape and opcode escape */
  3584  				asmbuf.Put2(Pe, Pm)
  3585  
  3586  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3587  				asmbuf.rexflag |= Pw
  3588  				asmbuf.Put2(Pe, Pm)
  3589  
  3590  			case Pq4: /*  66 0F 38 */
  3591  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3592  
  3593  			case Pq4w: /*  66 0F 38 + REX.W */
  3594  				asmbuf.rexflag |= Pw
  3595  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3596  
  3597  			case Pq5: /*  F3 0F 38 */
  3598  				asmbuf.Put3(0xF3, 0x0F, 0x38)
  3599  
  3600  			case Pq5w: /*  F3 0F 38 + REX.W */
  3601  				asmbuf.rexflag |= Pw
  3602  				asmbuf.Put3(0xF3, 0x0F, 0x38)
  3603  
  3604  			case Pf2, /* xmm opcode escape */
  3605  				Pf3:
  3606  				asmbuf.Put2(o.prefix, Pm)
  3607  
  3608  			case Pef3:
  3609  				asmbuf.Put3(Pe, Pf3, Pm)
  3610  
  3611  			case Pfw: /* xmm opcode escape + REX.W */
  3612  				asmbuf.rexflag |= Pw
  3613  				asmbuf.Put2(Pf3, Pm)
  3614  
  3615  			case Pm: /* opcode escape */
  3616  				asmbuf.Put1(Pm)
  3617  
  3618  			case Pe: /* 16 bit escape */
  3619  				asmbuf.Put1(Pe)
  3620  
  3621  			case Pw: /* 64-bit escape */
  3622  				if ctxt.Arch.Family != sys.AMD64 {
  3623  					ctxt.Diag("asmins: illegal 64: %v", p)
  3624  				}
  3625  				asmbuf.rexflag |= Pw
  3626  
  3627  			case Pw8: /* 64-bit escape if z >= 8 */
  3628  				if z >= 8 {
  3629  					if ctxt.Arch.Family != sys.AMD64 {
  3630  						ctxt.Diag("asmins: illegal 64: %v", p)
  3631  					}
  3632  					asmbuf.rexflag |= Pw
  3633  				}
  3634  
  3635  			case Pb: /* botch */
  3636  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3637  					goto bad
  3638  				}
  3639  				// NOTE(rsc): This is probably safe to do always,
  3640  				// but when enabled it chooses different encodings
  3641  				// than the old cmd/internal/obj/i386 code did,
  3642  				// which breaks our "same bits out" checks.
  3643  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3644  				// in the original obj/i386, and it would encode
  3645  				// (using a valid, shorter form) as 3c 00 if we enabled
  3646  				// the call to bytereg here.
  3647  				if ctxt.Arch.Family == sys.AMD64 {
  3648  					bytereg(&p.From, &p.Ft)
  3649  					bytereg(&p.To, &p.Tt)
  3650  				}
  3651  
  3652  			case P32: /* 32 bit but illegal if 64-bit mode */
  3653  				if ctxt.Arch.Family == sys.AMD64 {
  3654  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3655  				}
  3656  
  3657  			case Py: /* 64-bit only, no prefix */
  3658  				if ctxt.Arch.Family != sys.AMD64 {
  3659  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3660  				}
  3661  
  3662  			case Py1: /* 64-bit only if z < 1, no prefix */
  3663  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3664  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3665  				}
  3666  
  3667  			case Py3: /* 64-bit only if z < 3, no prefix */
  3668  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3669  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3670  				}
  3671  			}
  3672  
  3673  			if z >= len(o.op) {
  3674  				log.Fatalf("asmins bad table %v", p)
  3675  			}
  3676  			op = int(o.op[z])
  3677  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3678  			if op == 0x0f && o.prefix != Pvex {
  3679  				asmbuf.Put1(byte(op))
  3680  				z++
  3681  				op = int(o.op[z])
  3682  			}
  3683  
  3684  			switch yt.zcase {
  3685  			default:
  3686  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3687  				return
  3688  
  3689  			case Zpseudo:
  3690  				break
  3691  
  3692  			case Zlit:
  3693  				for ; ; z++ {
  3694  					op = int(o.op[z])
  3695  					if op == 0 {
  3696  						break
  3697  					}
  3698  					asmbuf.Put1(byte(op))
  3699  				}
  3700  
  3701  			case Zlitm_r:
  3702  				for ; ; z++ {
  3703  					op = int(o.op[z])
  3704  					if op == 0 {
  3705  						break
  3706  					}
  3707  					asmbuf.Put1(byte(op))
  3708  				}
  3709  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3710  
  3711  			case Zmb_r:
  3712  				bytereg(&p.From, &p.Ft)
  3713  				fallthrough
  3714  
  3715  			case Zm_r:
  3716  				asmbuf.Put1(byte(op))
  3717  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3718  
  3719  			case Zm2_r:
  3720  				asmbuf.Put2(byte(op), o.op[z+1])
  3721  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3722  
  3723  			case Zm_r_xm:
  3724  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3725  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3726  
  3727  			case Zm_r_xm_nr:
  3728  				asmbuf.rexflag = 0
  3729  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3730  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3731  
  3732  			case Zm_r_i_xm:
  3733  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3734  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  3735  				asmbuf.Put1(byte(p.To.Offset))
  3736  
  3737  			case Zibm_r, Zibr_m:
  3738  				for {
  3739  					tmp1 := z
  3740  					z++
  3741  					op = int(o.op[tmp1])
  3742  					if op == 0 {
  3743  						break
  3744  					}
  3745  					asmbuf.Put1(byte(op))
  3746  				}
  3747  				if yt.zcase == Zibr_m {
  3748  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  3749  				} else {
  3750  					asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3751  				}
  3752  				asmbuf.Put1(byte(p.From.Offset))
  3753  
  3754  			case Zaut_r:
  3755  				asmbuf.Put1(0x8d) // leal
  3756  				if p.From.Type != obj.TYPE_ADDR {
  3757  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3758  				}
  3759  				p.From.Type = obj.TYPE_MEM
  3760  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3761  				p.From.Type = obj.TYPE_ADDR
  3762  
  3763  			case Zm_o:
  3764  				asmbuf.Put1(byte(op))
  3765  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3766  
  3767  			case Zr_m:
  3768  				asmbuf.Put1(byte(op))
  3769  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3770  
  3771  			case Zvex:
  3772  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3773  
  3774  			case Zvex_rm_v_r:
  3775  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3776  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3777  
  3778  			case Zvex_rm_v_ro:
  3779  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3780  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  3781  
  3782  			case Zvex_i_r_v:
  3783  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  3784  				regnum := byte(0x7)
  3785  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  3786  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  3787  				} else {
  3788  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  3789  				}
  3790  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3791  				asmbuf.Put1(byte(p.From.Offset))
  3792  
  3793  			case Zvex_i_rm_v_r:
  3794  				imm, from, from3, to := unpackOps4(p)
  3795  				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  3796  				asmbuf.asmand(ctxt, cursym, p, from, to)
  3797  				asmbuf.Put1(byte(imm.Offset))
  3798  
  3799  			case Zvex_i_rm_r:
  3800  				asmbuf.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  3801  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3802  				asmbuf.Put1(byte(p.From.Offset))
  3803  
  3804  			case Zvex_v_rm_r:
  3805  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  3806  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3807  
  3808  			case Zvex_r_v_rm:
  3809  				asmbuf.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  3810  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3811  
  3812  			case Zvex_rm_r_vo:
  3813  				asmbuf.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  3814  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  3815  
  3816  			case Zvex_i_r_rm:
  3817  				asmbuf.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  3818  				asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  3819  				asmbuf.Put1(byte(p.From.Offset))
  3820  
  3821  			case Zvex_hr_rm_v_r:
  3822  				hr, from, from3, to := unpackOps4(p)
  3823  				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  3824  				asmbuf.asmand(ctxt, cursym, p, from, to)
  3825  				asmbuf.Put1(byte(regrex[hr.Reg]+reg[hr.Reg]+1) << 4)
  3826  
  3827  			case Zr_m_xm:
  3828  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3829  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3830  
  3831  			case Zr_m_xm_nr:
  3832  				asmbuf.rexflag = 0
  3833  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3834  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3835  
  3836  			case Zo_m:
  3837  				asmbuf.Put1(byte(op))
  3838  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3839  
  3840  			case Zcallindreg:
  3841  				r = obj.Addrel(cursym)
  3842  				r.Off = int32(p.Pc)
  3843  				r.Type = objabi.R_CALLIND
  3844  				r.Siz = 0
  3845  				fallthrough
  3846  
  3847  			case Zo_m64:
  3848  				asmbuf.Put1(byte(op))
  3849  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3850  
  3851  			case Zm_ibo:
  3852  				asmbuf.Put1(byte(op))
  3853  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3854  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3855  
  3856  			case Zibo_m:
  3857  				asmbuf.Put1(byte(op))
  3858  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3859  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3860  
  3861  			case Zibo_m_xm:
  3862  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3863  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3864  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3865  
  3866  			case Z_ib, Zib_:
  3867  				if yt.zcase == Zib_ {
  3868  					a = &p.From
  3869  				} else {
  3870  					a = &p.To
  3871  				}
  3872  				asmbuf.Put1(byte(op))
  3873  				if p.As == AXABORT {
  3874  					asmbuf.Put1(o.op[z+1])
  3875  				}
  3876  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3877  
  3878  			case Zib_rp:
  3879  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3880  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3881  
  3882  			case Zil_rp:
  3883  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3884  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3885  				if o.prefix == Pe {
  3886  					v = vaddr(ctxt, p, &p.From, nil)
  3887  					asmbuf.PutInt16(int16(v))
  3888  				} else {
  3889  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3890  				}
  3891  
  3892  			case Zo_iw:
  3893  				asmbuf.Put1(byte(op))
  3894  				if p.From.Type != obj.TYPE_NONE {
  3895  					v = vaddr(ctxt, p, &p.From, nil)
  3896  					asmbuf.PutInt16(int16(v))
  3897  				}
  3898  
  3899  			case Ziq_rp:
  3900  				v = vaddr(ctxt, p, &p.From, &rel)
  3901  				l = int(v >> 32)
  3902  				if l == 0 && rel.Siz != 8 {
  3903  					//p->mark |= 0100;
  3904  					//print("zero: %llux %v\n", v, p);
  3905  					asmbuf.rexflag &^= (0x40 | Rxw)
  3906  
  3907  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3908  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3909  					if rel.Type != 0 {
  3910  						r = obj.Addrel(cursym)
  3911  						*r = rel
  3912  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3913  					}
  3914  
  3915  					asmbuf.PutInt32(int32(v))
  3916  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3917  
  3918  					//p->mark |= 0100;
  3919  					//print("sign: %llux %v\n", v, p);
  3920  					asmbuf.Put1(0xc7)
  3921  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  3922  
  3923  					asmbuf.PutInt32(int32(v)) // need all 8
  3924  				} else {
  3925  					//print("all: %llux %v\n", v, p);
  3926  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  3927  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3928  					if rel.Type != 0 {
  3929  						r = obj.Addrel(cursym)
  3930  						*r = rel
  3931  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3932  					}
  3933  
  3934  					asmbuf.PutInt64(v)
  3935  				}
  3936  
  3937  			case Zib_rr:
  3938  				asmbuf.Put1(byte(op))
  3939  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3940  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3941  
  3942  			case Z_il, Zil_:
  3943  				if yt.zcase == Zil_ {
  3944  					a = &p.From
  3945  				} else {
  3946  					a = &p.To
  3947  				}
  3948  				asmbuf.Put1(byte(op))
  3949  				if o.prefix == Pe {
  3950  					v = vaddr(ctxt, p, a, nil)
  3951  					asmbuf.PutInt16(int16(v))
  3952  				} else {
  3953  					asmbuf.relput4(ctxt, cursym, p, a)
  3954  				}
  3955  
  3956  			case Zm_ilo, Zilo_m:
  3957  				asmbuf.Put1(byte(op))
  3958  				if yt.zcase == Zilo_m {
  3959  					a = &p.From
  3960  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3961  				} else {
  3962  					a = &p.To
  3963  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3964  				}
  3965  
  3966  				if o.prefix == Pe {
  3967  					v = vaddr(ctxt, p, a, nil)
  3968  					asmbuf.PutInt16(int16(v))
  3969  				} else {
  3970  					asmbuf.relput4(ctxt, cursym, p, a)
  3971  				}
  3972  
  3973  			case Zil_rr:
  3974  				asmbuf.Put1(byte(op))
  3975  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  3976  				if o.prefix == Pe {
  3977  					v = vaddr(ctxt, p, &p.From, nil)
  3978  					asmbuf.PutInt16(int16(v))
  3979  				} else {
  3980  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  3981  				}
  3982  
  3983  			case Z_rp:
  3984  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3985  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  3986  
  3987  			case Zrp_:
  3988  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3989  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  3990  
  3991  			case Zcallcon, Zjmpcon:
  3992  				if yt.zcase == Zcallcon {
  3993  					asmbuf.Put1(byte(op))
  3994  				} else {
  3995  					asmbuf.Put1(o.op[z+1])
  3996  				}
  3997  				r = obj.Addrel(cursym)
  3998  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3999  				r.Type = objabi.R_PCREL
  4000  				r.Siz = 4
  4001  				r.Add = p.To.Offset
  4002  				asmbuf.PutInt32(0)
  4003  
  4004  			case Zcallind:
  4005  				asmbuf.Put2(byte(op), o.op[z+1])
  4006  				r = obj.Addrel(cursym)
  4007  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4008  				if ctxt.Arch.Family == sys.AMD64 {
  4009  					r.Type = objabi.R_PCREL
  4010  				} else {
  4011  					r.Type = objabi.R_ADDR
  4012  				}
  4013  				r.Siz = 4
  4014  				r.Add = p.To.Offset
  4015  				r.Sym = p.To.Sym
  4016  				asmbuf.PutInt32(0)
  4017  
  4018  			case Zcall, Zcallduff:
  4019  				if p.To.Sym == nil {
  4020  					ctxt.Diag("call without target")
  4021  					ctxt.DiagFlush()
  4022  					log.Fatalf("bad code")
  4023  				}
  4024  
  4025  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4026  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4027  				}
  4028  
  4029  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4030  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4031  					// (the call jumps into the middle of the function).
  4032  					// This makes it possible to see call sites for duffcopy/duffzero in
  4033  					// BP-based profiling tools like Linux perf (which is the
  4034  					// whole point of obj.Framepointer_enabled).
  4035  					// MOVQ BP, -16(SP)
  4036  					// LEAQ -16(SP), BP
  4037  					asmbuf.Put(bpduff1)
  4038  				}
  4039  				asmbuf.Put1(byte(op))
  4040  				r = obj.Addrel(cursym)
  4041  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4042  				r.Sym = p.To.Sym
  4043  				r.Add = p.To.Offset
  4044  				r.Type = objabi.R_CALL
  4045  				r.Siz = 4
  4046  				asmbuf.PutInt32(0)
  4047  
  4048  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4049  					// Pop BP pushed above.
  4050  					// MOVQ 0(BP), BP
  4051  					asmbuf.Put(bpduff2)
  4052  				}
  4053  
  4054  			// TODO: jump across functions needs reloc
  4055  			case Zbr, Zjmp, Zloop:
  4056  				if p.As == AXBEGIN {
  4057  					asmbuf.Put1(byte(op))
  4058  				}
  4059  				if p.To.Sym != nil {
  4060  					if yt.zcase != Zjmp {
  4061  						ctxt.Diag("branch to ATEXT")
  4062  						ctxt.DiagFlush()
  4063  						log.Fatalf("bad code")
  4064  					}
  4065  
  4066  					asmbuf.Put1(o.op[z+1])
  4067  					r = obj.Addrel(cursym)
  4068  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4069  					r.Sym = p.To.Sym
  4070  					r.Type = objabi.R_PCREL
  4071  					r.Siz = 4
  4072  					asmbuf.PutInt32(0)
  4073  					break
  4074  				}
  4075  
  4076  				// Assumes q is in this function.
  4077  				// TODO: Check in input, preserve in brchain.
  4078  
  4079  				// Fill in backward jump now.
  4080  				q = p.Pcond
  4081  
  4082  				if q == nil {
  4083  					ctxt.Diag("jmp/branch/loop without target")
  4084  					ctxt.DiagFlush()
  4085  					log.Fatalf("bad code")
  4086  				}
  4087  
  4088  				if p.Back&1 != 0 {
  4089  					v = q.Pc - (p.Pc + 2)
  4090  					if v >= -128 && p.As != AXBEGIN {
  4091  						if p.As == AJCXZL {
  4092  							asmbuf.Put1(0x67)
  4093  						}
  4094  						asmbuf.Put2(byte(op), byte(v))
  4095  					} else if yt.zcase == Zloop {
  4096  						ctxt.Diag("loop too far: %v", p)
  4097  					} else {
  4098  						v -= 5 - 2
  4099  						if p.As == AXBEGIN {
  4100  							v--
  4101  						}
  4102  						if yt.zcase == Zbr {
  4103  							asmbuf.Put1(0x0f)
  4104  							v--
  4105  						}
  4106  
  4107  						asmbuf.Put1(o.op[z+1])
  4108  						asmbuf.PutInt32(int32(v))
  4109  					}
  4110  
  4111  					break
  4112  				}
  4113  
  4114  				// Annotate target; will fill in later.
  4115  				p.Forwd = q.Rel
  4116  
  4117  				q.Rel = p
  4118  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  4119  					if p.As == AJCXZL {
  4120  						asmbuf.Put1(0x67)
  4121  					}
  4122  					asmbuf.Put2(byte(op), 0)
  4123  				} else if yt.zcase == Zloop {
  4124  					ctxt.Diag("loop too far: %v", p)
  4125  				} else {
  4126  					if yt.zcase == Zbr {
  4127  						asmbuf.Put1(0x0f)
  4128  					}
  4129  					asmbuf.Put1(o.op[z+1])
  4130  					asmbuf.PutInt32(0)
  4131  				}
  4132  
  4133  				break
  4134  
  4135  			/*
  4136  				v = q->pc - p->pc - 2;
  4137  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  4138  					*ctxt->andptr++ = op;
  4139  					*ctxt->andptr++ = v;
  4140  				} else {
  4141  					v -= 5-2;
  4142  					if(yt.zcase == Zbr) {
  4143  						*ctxt->andptr++ = 0x0f;
  4144  						v--;
  4145  					}
  4146  					*ctxt->andptr++ = o->op[z+1];
  4147  					*ctxt->andptr++ = v;
  4148  					*ctxt->andptr++ = v>>8;
  4149  					*ctxt->andptr++ = v>>16;
  4150  					*ctxt->andptr++ = v>>24;
  4151  				}
  4152  			*/
  4153  
  4154  			case Zbyte:
  4155  				v = vaddr(ctxt, p, &p.From, &rel)
  4156  				if rel.Siz != 0 {
  4157  					rel.Siz = uint8(op)
  4158  					r = obj.Addrel(cursym)
  4159  					*r = rel
  4160  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4161  				}
  4162  
  4163  				asmbuf.Put1(byte(v))
  4164  				if op > 1 {
  4165  					asmbuf.Put1(byte(v >> 8))
  4166  					if op > 2 {
  4167  						asmbuf.PutInt16(int16(v >> 16))
  4168  						if op > 4 {
  4169  							asmbuf.PutInt32(int32(v >> 32))
  4170  						}
  4171  					}
  4172  				}
  4173  			}
  4174  
  4175  			return
  4176  		}
  4177  	}
  4178  	f3t = Ynone * Ymax
  4179  	if p.GetFrom3() != nil {
  4180  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4181  	}
  4182  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4183  		var pp obj.Prog
  4184  		var t []byte
  4185  		if p.As == mo[0].as {
  4186  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4187  				t = mo[0].op[:]
  4188  				switch mo[0].code {
  4189  				default:
  4190  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4191  
  4192  				case 0: /* lit */
  4193  					for z = 0; t[z] != E; z++ {
  4194  						asmbuf.Put1(t[z])
  4195  					}
  4196  
  4197  				case 1: /* r,m */
  4198  					asmbuf.Put1(t[0])
  4199  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4200  
  4201  				case 2: /* m,r */
  4202  					asmbuf.Put1(t[0])
  4203  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4204  
  4205  				case 3: /* r,m - 2op */
  4206  					asmbuf.Put2(t[0], t[1])
  4207  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4208  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4209  
  4210  				case 4: /* m,r - 2op */
  4211  					asmbuf.Put2(t[0], t[1])
  4212  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4213  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4214  
  4215  				case 5: /* load full pointer, trash heap */
  4216  					if t[0] != 0 {
  4217  						asmbuf.Put1(t[0])
  4218  					}
  4219  					switch p.To.Index {
  4220  					default:
  4221  						goto bad
  4222  
  4223  					case REG_DS:
  4224  						asmbuf.Put1(0xc5)
  4225  
  4226  					case REG_SS:
  4227  						asmbuf.Put2(0x0f, 0xb2)
  4228  
  4229  					case REG_ES:
  4230  						asmbuf.Put1(0xc4)
  4231  
  4232  					case REG_FS:
  4233  						asmbuf.Put2(0x0f, 0xb4)
  4234  
  4235  					case REG_GS:
  4236  						asmbuf.Put2(0x0f, 0xb5)
  4237  					}
  4238  
  4239  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4240  
  4241  				case 6: /* double shift */
  4242  					if t[0] == Pw {
  4243  						if ctxt.Arch.Family != sys.AMD64 {
  4244  							ctxt.Diag("asmins: illegal 64: %v", p)
  4245  						}
  4246  						asmbuf.rexflag |= Pw
  4247  						t = t[1:]
  4248  					} else if t[0] == Pe {
  4249  						asmbuf.Put1(Pe)
  4250  						t = t[1:]
  4251  					}
  4252  
  4253  					switch p.From.Type {
  4254  					default:
  4255  						goto bad
  4256  
  4257  					case obj.TYPE_CONST:
  4258  						asmbuf.Put2(0x0f, t[0])
  4259  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4260  						asmbuf.Put1(byte(p.From.Offset))
  4261  
  4262  					case obj.TYPE_REG:
  4263  						switch p.From.Reg {
  4264  						default:
  4265  							goto bad
  4266  
  4267  						case REG_CL, REG_CX:
  4268  							asmbuf.Put2(0x0f, t[1])
  4269  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4270  						}
  4271  					}
  4272  
  4273  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4274  				// where you load the TLS base register into a register and then index off that
  4275  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4276  				// are handled in prefixof above and should not be listed here.
  4277  				case 7: /* mov tls, r */
  4278  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4279  						ctxt.Diag("invalid load of TLS: %v", p)
  4280  					}
  4281  
  4282  					if ctxt.Arch.Family == sys.I386 {
  4283  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4284  						// where you load the TLS base register into a register and then index off that
  4285  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4286  						// are handled in prefixof above and should not be listed here.
  4287  						switch ctxt.Headtype {
  4288  						default:
  4289  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4290  
  4291  						case objabi.Hlinux,
  4292  							objabi.Hnacl:
  4293  							if ctxt.Flag_shared {
  4294  								// Note that this is not generating the same insns as the other cases.
  4295  								//     MOV TLS, dst
  4296  								// becomes
  4297  								//     call __x86.get_pc_thunk.dst
  4298  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4299  								// which is encoded as
  4300  								//     call __x86.get_pc_thunk.dst
  4301  								//     movq 0(dst), dst
  4302  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4303  								// is g, which we can't check here, but will when we assemble the second
  4304  								// instruction.
  4305  								dst := p.To.Reg
  4306  								asmbuf.Put1(0xe8)
  4307  								r = obj.Addrel(cursym)
  4308  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4309  								r.Type = objabi.R_CALL
  4310  								r.Siz = 4
  4311  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4312  								asmbuf.PutInt32(0)
  4313  
  4314  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4315  								r = obj.Addrel(cursym)
  4316  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4317  								r.Type = objabi.R_TLS_IE
  4318  								r.Siz = 4
  4319  								r.Add = 2
  4320  								asmbuf.PutInt32(0)
  4321  							} else {
  4322  								// ELF TLS base is 0(GS).
  4323  								pp.From = p.From
  4324  
  4325  								pp.From.Type = obj.TYPE_MEM
  4326  								pp.From.Reg = REG_GS
  4327  								pp.From.Offset = 0
  4328  								pp.From.Index = REG_NONE
  4329  								pp.From.Scale = 0
  4330  								asmbuf.Put2(0x65, // GS
  4331  									0x8B)
  4332  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4333  							}
  4334  						case objabi.Hplan9:
  4335  							pp.From = obj.Addr{}
  4336  							pp.From.Type = obj.TYPE_MEM
  4337  							pp.From.Name = obj.NAME_EXTERN
  4338  							pp.From.Sym = plan9privates
  4339  							pp.From.Offset = 0
  4340  							pp.From.Index = REG_NONE
  4341  							asmbuf.Put1(0x8B)
  4342  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4343  
  4344  						case objabi.Hwindows:
  4345  							// Windows TLS base is always 0x14(FS).
  4346  							pp.From = p.From
  4347  
  4348  							pp.From.Type = obj.TYPE_MEM
  4349  							pp.From.Reg = REG_FS
  4350  							pp.From.Offset = 0x14
  4351  							pp.From.Index = REG_NONE
  4352  							pp.From.Scale = 0
  4353  							asmbuf.Put2(0x64, // FS
  4354  								0x8B)
  4355  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4356  						}
  4357  						break
  4358  					}
  4359  
  4360  					switch ctxt.Headtype {
  4361  					default:
  4362  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4363  
  4364  					case objabi.Hlinux:
  4365  						if !ctxt.Flag_shared {
  4366  							log.Fatalf("unknown TLS base location for linux without -shared")
  4367  						}
  4368  						// Note that this is not generating the same insn as the other cases.
  4369  						//     MOV TLS, R_to
  4370  						// becomes
  4371  						//     movq g@gottpoff(%rip), R_to
  4372  						// which is encoded as
  4373  						//     movq 0(%rip), R_to
  4374  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4375  						// is g, which we can't check here, but will when we assemble the second
  4376  						// instruction.
  4377  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4378  
  4379  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4380  						r = obj.Addrel(cursym)
  4381  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4382  						r.Type = objabi.R_TLS_IE
  4383  						r.Siz = 4
  4384  						r.Add = -4
  4385  						asmbuf.PutInt32(0)
  4386  
  4387  					case objabi.Hplan9:
  4388  						pp.From = obj.Addr{}
  4389  						pp.From.Type = obj.TYPE_MEM
  4390  						pp.From.Name = obj.NAME_EXTERN
  4391  						pp.From.Sym = plan9privates
  4392  						pp.From.Offset = 0
  4393  						pp.From.Index = REG_NONE
  4394  						asmbuf.rexflag |= Pw
  4395  						asmbuf.Put1(0x8B)
  4396  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4397  
  4398  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4399  						// TLS base is 0(FS).
  4400  						pp.From = p.From
  4401  
  4402  						pp.From.Type = obj.TYPE_MEM
  4403  						pp.From.Name = obj.NAME_NONE
  4404  						pp.From.Reg = REG_NONE
  4405  						pp.From.Offset = 0
  4406  						pp.From.Index = REG_NONE
  4407  						pp.From.Scale = 0
  4408  						asmbuf.rexflag |= Pw
  4409  						asmbuf.Put2(0x64, // FS
  4410  							0x8B)
  4411  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4412  
  4413  					case objabi.Hwindows:
  4414  						// Windows TLS base is always 0x28(GS).
  4415  						pp.From = p.From
  4416  
  4417  						pp.From.Type = obj.TYPE_MEM
  4418  						pp.From.Name = obj.NAME_NONE
  4419  						pp.From.Reg = REG_GS
  4420  						pp.From.Offset = 0x28
  4421  						pp.From.Index = REG_NONE
  4422  						pp.From.Scale = 0
  4423  						asmbuf.rexflag |= Pw
  4424  						asmbuf.Put2(0x65, // GS
  4425  							0x8B)
  4426  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4427  					}
  4428  				}
  4429  				return
  4430  			}
  4431  		}
  4432  	}
  4433  	goto bad
  4434  
  4435  bad:
  4436  	if ctxt.Arch.Family != sys.AMD64 {
  4437  		/*
  4438  		 * here, the assembly has failed.
  4439  		 * if its a byte instruction that has
  4440  		 * unaddressable registers, try to
  4441  		 * exchange registers and reissue the
  4442  		 * instruction with the operands renamed.
  4443  		 */
  4444  		pp := *p
  4445  
  4446  		unbytereg(&pp.From, &pp.Ft)
  4447  		unbytereg(&pp.To, &pp.Tt)
  4448  
  4449  		z := int(p.From.Reg)
  4450  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4451  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4452  			// For now, different to keep bit-for-bit compatibility.
  4453  			if ctxt.Arch.Family == sys.I386 {
  4454  				breg := byteswapreg(ctxt, &p.To)
  4455  				if breg != REG_AX {
  4456  					asmbuf.Put1(0x87) // xchg lhs,bx
  4457  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4458  					subreg(&pp, z, breg)
  4459  					asmbuf.doasm(ctxt, cursym, &pp)
  4460  					asmbuf.Put1(0x87) // xchg lhs,bx
  4461  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4462  				} else {
  4463  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4464  					subreg(&pp, z, REG_AX)
  4465  					asmbuf.doasm(ctxt, cursym, &pp)
  4466  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4467  				}
  4468  				return
  4469  			}
  4470  
  4471  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4472  				// We certainly don't want to exchange
  4473  				// with AX if the op is MUL or DIV.
  4474  				asmbuf.Put1(0x87) // xchg lhs,bx
  4475  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4476  				subreg(&pp, z, REG_BX)
  4477  				asmbuf.doasm(ctxt, cursym, &pp)
  4478  				asmbuf.Put1(0x87) // xchg lhs,bx
  4479  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4480  			} else {
  4481  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4482  				subreg(&pp, z, REG_AX)
  4483  				asmbuf.doasm(ctxt, cursym, &pp)
  4484  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4485  			}
  4486  			return
  4487  		}
  4488  
  4489  		z = int(p.To.Reg)
  4490  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4491  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4492  			// For now, different to keep bit-for-bit compatibility.
  4493  			if ctxt.Arch.Family == sys.I386 {
  4494  				breg := byteswapreg(ctxt, &p.From)
  4495  				if breg != REG_AX {
  4496  					asmbuf.Put1(0x87) //xchg rhs,bx
  4497  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4498  					subreg(&pp, z, breg)
  4499  					asmbuf.doasm(ctxt, cursym, &pp)
  4500  					asmbuf.Put1(0x87) // xchg rhs,bx
  4501  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4502  				} else {
  4503  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4504  					subreg(&pp, z, REG_AX)
  4505  					asmbuf.doasm(ctxt, cursym, &pp)
  4506  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4507  				}
  4508  				return
  4509  			}
  4510  
  4511  			if isax(&p.From) {
  4512  				asmbuf.Put1(0x87) // xchg rhs,bx
  4513  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4514  				subreg(&pp, z, REG_BX)
  4515  				asmbuf.doasm(ctxt, cursym, &pp)
  4516  				asmbuf.Put1(0x87) // xchg rhs,bx
  4517  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4518  			} else {
  4519  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4520  				subreg(&pp, z, REG_AX)
  4521  				asmbuf.doasm(ctxt, cursym, &pp)
  4522  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4523  			}
  4524  			return
  4525  		}
  4526  	}
  4527  
  4528  	ctxt.Diag("invalid instruction: %v", p)
  4529  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4530  	return
  4531  }
  4532  
  4533  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4534  // which is not referenced in a.
  4535  // If a is empty, it returns BX to account for MULB-like instructions
  4536  // that might use DX and AX.
  4537  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4538  	cana, canb, canc, cand := true, true, true, true
  4539  	if a.Type == obj.TYPE_NONE {
  4540  		cana, cand = false, false
  4541  	}
  4542  
  4543  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4544  		switch a.Reg {
  4545  		case REG_NONE:
  4546  			cana, cand = false, false
  4547  		case REG_AX, REG_AL, REG_AH:
  4548  			cana = false
  4549  		case REG_BX, REG_BL, REG_BH:
  4550  			canb = false
  4551  		case REG_CX, REG_CL, REG_CH:
  4552  			canc = false
  4553  		case REG_DX, REG_DL, REG_DH:
  4554  			cand = false
  4555  		}
  4556  	}
  4557  
  4558  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4559  		switch a.Index {
  4560  		case REG_AX:
  4561  			cana = false
  4562  		case REG_BX:
  4563  			canb = false
  4564  		case REG_CX:
  4565  			canc = false
  4566  		case REG_DX:
  4567  			cand = false
  4568  		}
  4569  	}
  4570  
  4571  	switch {
  4572  	case cana:
  4573  		return REG_AX
  4574  	case canb:
  4575  		return REG_BX
  4576  	case canc:
  4577  		return REG_CX
  4578  	case cand:
  4579  		return REG_DX
  4580  	default:
  4581  		ctxt.Diag("impossible byte register")
  4582  		ctxt.DiagFlush()
  4583  		log.Fatalf("bad code")
  4584  		return 0
  4585  	}
  4586  }
  4587  
  4588  func isbadbyte(a *obj.Addr) bool {
  4589  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4590  }
  4591  
  4592  var naclret = []uint8{
  4593  	0x5e, // POPL SI
  4594  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4595  	0x83,
  4596  	0xe6,
  4597  	0xe0, // ANDL $~31, SI
  4598  	0x4c,
  4599  	0x01,
  4600  	0xfe, // ADDQ R15, SI
  4601  	0xff,
  4602  	0xe6, // JMP SI
  4603  }
  4604  
  4605  var naclret8 = []uint8{
  4606  	0x5d, // POPL BP
  4607  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4608  	0x83,
  4609  	0xe5,
  4610  	0xe0, // ANDL $~31, BP
  4611  	0xff,
  4612  	0xe5, // JMP BP
  4613  }
  4614  
  4615  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4616  
  4617  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4618  
  4619  var naclmovs = []uint8{
  4620  	0x89,
  4621  	0xf6, // MOVL SI, SI
  4622  	0x49,
  4623  	0x8d,
  4624  	0x34,
  4625  	0x37, // LEAQ (R15)(SI*1), SI
  4626  	0x89,
  4627  	0xff, // MOVL DI, DI
  4628  	0x49,
  4629  	0x8d,
  4630  	0x3c,
  4631  	0x3f, // LEAQ (R15)(DI*1), DI
  4632  }
  4633  
  4634  var naclstos = []uint8{
  4635  	0x89,
  4636  	0xff, // MOVL DI, DI
  4637  	0x49,
  4638  	0x8d,
  4639  	0x3c,
  4640  	0x3f, // LEAQ (R15)(DI*1), DI
  4641  }
  4642  
  4643  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4644  	if reg >= REG_R8 {
  4645  		asmbuf.Put1(0x45)
  4646  	}
  4647  	reg = (reg - REG_AX) & 7
  4648  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4649  }
  4650  
  4651  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4652  	asmbuf.Reset()
  4653  
  4654  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4655  		switch p.As {
  4656  		case obj.ARET:
  4657  			asmbuf.Put(naclret8)
  4658  			return
  4659  
  4660  		case obj.ACALL,
  4661  			obj.AJMP:
  4662  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4663  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4664  			}
  4665  
  4666  		case AINT:
  4667  			asmbuf.Put1(0xf4)
  4668  			return
  4669  		}
  4670  	}
  4671  
  4672  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4673  		if p.As == AREP {
  4674  			asmbuf.rep++
  4675  			return
  4676  		}
  4677  
  4678  		if p.As == AREPN {
  4679  			asmbuf.repn++
  4680  			return
  4681  		}
  4682  
  4683  		if p.As == ALOCK {
  4684  			asmbuf.lock = true
  4685  			return
  4686  		}
  4687  
  4688  		if p.As != ALEAQ && p.As != ALEAL {
  4689  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4690  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4691  			}
  4692  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4693  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4694  			}
  4695  		}
  4696  
  4697  		switch p.As {
  4698  		case obj.ARET:
  4699  			asmbuf.Put(naclret)
  4700  			return
  4701  
  4702  		case obj.ACALL,
  4703  			obj.AJMP:
  4704  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4705  				// ANDL $~31, reg
  4706  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4707  				// ADDQ R15, reg
  4708  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4709  			}
  4710  
  4711  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4712  				// ANDL $~31, reg
  4713  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4714  				// ADDQ R15, reg
  4715  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4716  			}
  4717  
  4718  		case AINT:
  4719  			asmbuf.Put1(0xf4)
  4720  			return
  4721  
  4722  		case ASCASB,
  4723  			ASCASW,
  4724  			ASCASL,
  4725  			ASCASQ,
  4726  			ASTOSB,
  4727  			ASTOSW,
  4728  			ASTOSL,
  4729  			ASTOSQ:
  4730  			asmbuf.Put(naclstos)
  4731  
  4732  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4733  			asmbuf.Put(naclmovs)
  4734  		}
  4735  
  4736  		if asmbuf.rep != 0 {
  4737  			asmbuf.Put1(0xf3)
  4738  			asmbuf.rep = 0
  4739  		}
  4740  
  4741  		if asmbuf.repn != 0 {
  4742  			asmbuf.Put1(0xf2)
  4743  			asmbuf.repn = 0
  4744  		}
  4745  
  4746  		if asmbuf.lock {
  4747  			asmbuf.Put1(0xf0)
  4748  			asmbuf.lock = false
  4749  		}
  4750  	}
  4751  
  4752  	asmbuf.rexflag = 0
  4753  	asmbuf.vexflag = 0
  4754  	mark := asmbuf.Len()
  4755  	asmbuf.doasm(ctxt, cursym, p)
  4756  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4757  		/*
  4758  		 * as befits the whole approach of the architecture,
  4759  		 * the rex prefix must appear before the first opcode byte
  4760  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4761  		 * before the 0f opcode escape!), or it might be ignored.
  4762  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4763  		 */
  4764  		if ctxt.Arch.Family != sys.AMD64 {
  4765  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4766  		}
  4767  		n := asmbuf.Len()
  4768  		var np int
  4769  		for np = mark; np < n; np++ {
  4770  			c := asmbuf.At(np)
  4771  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4772  				break
  4773  			}
  4774  		}
  4775  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4776  	}
  4777  
  4778  	n := asmbuf.Len()
  4779  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4780  		r := &cursym.R[i]
  4781  		if int64(r.Off) < p.Pc {
  4782  			break
  4783  		}
  4784  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4785  			r.Off++
  4786  		}
  4787  		if r.Type == objabi.R_PCREL {
  4788  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4789  				// PC-relative addressing is relative to the end of the instruction,
  4790  				// but the relocations applied by the linker are relative to the end
  4791  				// of the relocation. Because immediate instruction
  4792  				// arguments can follow the PC-relative memory reference in the
  4793  				// instruction encoding, the two may not coincide. In this case,
  4794  				// adjust addend so that linker can keep relocating relative to the
  4795  				// end of the relocation.
  4796  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4797  			} else if ctxt.Arch.Family == sys.I386 {
  4798  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4799  				// assumes that the previous instruction loaded the PC of the end
  4800  				// of that instruction into CX, so the adjustment is relative to
  4801  				// that.
  4802  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4803  			}
  4804  		}
  4805  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4806  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4807  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4808  		}
  4809  
  4810  	}
  4811  
  4812  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4813  		switch p.To.Reg {
  4814  		case REG_SP:
  4815  			asmbuf.Put(naclspfix)
  4816  		case REG_BP:
  4817  			asmbuf.Put(naclbpfix)
  4818  		}
  4819  	}
  4820  }
  4821  
  4822  // Extract 4 operands from p.
  4823  func unpackOps4(p *obj.Prog) (*obj.Addr, *obj.Addr, *obj.Addr, *obj.Addr) {
  4824  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  4825  }