github.com/c9s/go@v0.0.0-20180120015821-984e81f64e0c/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"log"
    40  	"strings"
    41  )
    42  
    43  var (
    44  	plan9privates *obj.LSym
    45  	deferreturn   *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  const (
    51  	// Loop alignment constants:
    52  	// want to align loop entry to LoopAlign-byte boundary,
    53  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    54  	// We define a loop entry as the target of a backward jump.
    55  	//
    56  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    57  	// and it aligns all jump targets, not just backward jump targets.
    58  	//
    59  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    60  	// is very slight but negative, so the alignment is disabled by
    61  	// setting MaxLoopPad = 0. The code is here for reference and
    62  	// for future experiments.
    63  	//
    64  	LoopAlign  = 16
    65  	MaxLoopPad = 0
    66  )
    67  
    68  type Optab struct {
    69  	as     obj.As
    70  	ytab   []ytab
    71  	prefix uint8
    72  	op     [23]uint8
    73  }
    74  
    75  type Movtab struct {
    76  	as   obj.As
    77  	ft   uint8
    78  	f3t  uint8
    79  	tt   uint8
    80  	code uint8
    81  	op   [4]uint8
    82  }
    83  
    84  const (
    85  	Yxxx = iota
    86  	Ynone
    87  	Yi0 // $0
    88  	Yi1 // $1
    89  	Yu2 // $x, x fits in uint2
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Yxvm // VSIB vector array; vm32x/vm64x
   152  	Yyr
   153  	Yym
   154  	Yyvm // VSIB vector array; vm32y/vm64y
   155  	Ytls
   156  	Ytextsize
   157  	Yindir
   158  	Ymax
   159  )
   160  
   161  const (
   162  	Zxxx = iota
   163  	Zlit
   164  	Zlitm_r
   165  	Z_rp
   166  	Zbr
   167  	Zcall
   168  	Zcallcon
   169  	Zcallduff
   170  	Zcallind
   171  	Zcallindreg
   172  	Zib_
   173  	Zib_rp
   174  	Zibo_m
   175  	Zibo_m_xm
   176  	Zil_
   177  	Zil_rp
   178  	Ziq_rp
   179  	Zilo_m
   180  	Zjmp
   181  	Zjmpcon
   182  	Zloop
   183  	Zo_iw
   184  	Zm_o
   185  	Zm_r
   186  	Zm2_r
   187  	Zm_r_xm
   188  	Zm_r_i_xm
   189  	Zm_r_xm_nr
   190  	Zr_m_xm_nr
   191  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   192  	Zibr_m
   193  	Zmb_r
   194  	Zaut_r
   195  	Zo_m
   196  	Zo_m64
   197  	Zpseudo
   198  	Zr_m
   199  	Zr_m_xm
   200  	Zrp_
   201  	Z_ib
   202  	Z_il
   203  	Zm_ibo
   204  	Zm_ilo
   205  	Zib_rr
   206  	Zil_rr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_rm_v_ro
   210  	Zvex_r_v_rm
   211  	Zvex_v_rm_r
   212  	Zvex_i_rm_r
   213  	Zvex_i_r_v
   214  	Zvex_i_rm_v_r
   215  	Zvex
   216  	Zvex_rm_r_vo
   217  	Zvex_i_r_rm
   218  	Zvex_hr_rm_v_r
   219  
   220  	Zmax
   221  )
   222  
   223  const (
   224  	Px   = 0
   225  	Px1  = 1    // symbolic; exact value doesn't matter
   226  	P32  = 0x32 /* 32-bit only */
   227  	Pe   = 0x66 /* operand escape */
   228  	Pm   = 0x0f /* 2byte opcode escape */
   229  	Pq   = 0xff /* both escapes: 66 0f */
   230  	Pb   = 0xfe /* byte operands */
   231  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   232  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   233  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   234  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   235  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   236  	Pq4w = 0x69 /* Pq4 with Rex.w 66 0F 38 */
   237  	Pq5  = 0x6a /* xmm escape 5: F3 0F 38 */
   238  	Pq5w = 0x6b /* Pq5 with Rex.w F3 0F 38 */
   239  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   240  	Pw   = 0x48 /* Rex.w */
   241  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   242  	Py   = 0x80 /* defaults to 64-bit mode */
   243  	Py1  = 0x81 // symbolic; exact value doesn't matter
   244  	Py3  = 0x83 // symbolic; exact value doesn't matter
   245  	Pvex = 0x84 // symbolic: exact value doesn't matter
   246  
   247  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   248  	Rxr = 1 << 2 /* extend modrm reg */
   249  	Rxx = 1 << 1 /* extend sib index */
   250  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   251  )
   252  
   253  const (
   254  	// Encoding for VEX prefix in tables.
   255  	// The P, L, and W fields are chosen to match
   256  	// their eventual locations in the VEX prefix bytes.
   257  
   258  	// V field - 4 bits; ignored by encoder
   259  	vexNOVSR = 0 // No VEX-SPECIFIED-REGISTER
   260  	vexNDS   = 0
   261  	vexNDD   = 0
   262  	vexDDS   = 0
   263  	// P field - 2 bits
   264  	vex66 = 1 << 0
   265  	vexF3 = 2 << 0
   266  	vexF2 = 3 << 0
   267  	// L field - 1 bit
   268  	vexLZ  = 0 << 2
   269  	vexLIG = 0 << 2
   270  	vex128 = 0 << 2
   271  	vex256 = 1 << 2
   272  	// W field - 1 bit
   273  	vexWIG = 0 << 7
   274  	vexW0  = 0 << 7
   275  	vexW1  = 1 << 7
   276  	// M field - 5 bits, but mostly reserved; we can store up to 4
   277  	vex0F   = 1 << 3
   278  	vex0F38 = 2 << 3
   279  	vex0F3A = 3 << 3
   280  
   281  	// Combinations used in the manual.
   282  	VEX_DDS_LIG_66_0F38_W1    = vexDDS | vexLIG | vex66 | vex0F38 | vexW1
   283  	VEX_NDD_128_66_0F_WIG     = vexNDD | vex128 | vex66 | vex0F | vexWIG
   284  	VEX_NDD_256_66_0F_WIG     = vexNDD | vex256 | vex66 | vex0F | vexWIG
   285  	VEX_NDD_LZ_F2_0F38_W0     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW0
   286  	VEX_NDD_LZ_F2_0F38_W1     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW1
   287  	VEX_NDS_128_66_0F_WIG     = vexNDS | vex128 | vex66 | vex0F | vexWIG
   288  	VEX_NDS_128_66_0F38_WIG   = vexNDS | vex128 | vex66 | vex0F38 | vexWIG
   289  	VEX_NDS_128_F2_0F_WIG     = vexNDS | vex128 | vexF2 | vex0F | vexWIG
   290  	VEX_NDS_256_66_0F_WIG     = vexNDS | vex256 | vex66 | vex0F | vexWIG
   291  	VEX_NDS_256_66_0F38_WIG   = vexNDS | vex256 | vex66 | vex0F38 | vexWIG
   292  	VEX_NDS_256_66_0F3A_W0    = vexNDS | vex256 | vex66 | vex0F3A | vexW0
   293  	VEX_NDS_256_66_0F3A_WIG   = vexNDS | vex256 | vex66 | vex0F3A | vexWIG
   294  	VEX_NDS_LZ_0F38_W0        = vexNDS | vexLZ | vex0F38 | vexW0
   295  	VEX_NDS_LZ_0F38_W1        = vexNDS | vexLZ | vex0F38 | vexW1
   296  	VEX_NDS_LZ_66_0F38_W0     = vexNDS | vexLZ | vex66 | vex0F38 | vexW0
   297  	VEX_NDS_LZ_66_0F38_W1     = vexNDS | vexLZ | vex66 | vex0F38 | vexW1
   298  	VEX_NDS_LZ_F2_0F38_W0     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW0
   299  	VEX_NDS_LZ_F2_0F38_W1     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW1
   300  	VEX_NDS_LZ_F3_0F38_W0     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW0
   301  	VEX_NDS_LZ_F3_0F38_W1     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW1
   302  	VEX_NOVSR_128_66_0F_WIG   = vexNOVSR | vex128 | vex66 | vex0F | vexWIG
   303  	VEX_NOVSR_128_66_0F38_W0  = vexNOVSR | vex128 | vex66 | vex0F38 | vexW0
   304  	VEX_NOVSR_128_66_0F38_WIG = vexNOVSR | vex128 | vex66 | vex0F38 | vexWIG
   305  	VEX_NOVSR_128_F2_0F_WIG   = vexNOVSR | vex128 | vexF2 | vex0F | vexWIG
   306  	VEX_NOVSR_128_F3_0F_WIG   = vexNOVSR | vex128 | vexF3 | vex0F | vexWIG
   307  	VEX_NOVSR_256_66_0F_WIG   = vexNOVSR | vex256 | vex66 | vex0F | vexWIG
   308  	VEX_NOVSR_256_66_0F38_W0  = vexNOVSR | vex256 | vex66 | vex0F38 | vexW0
   309  	VEX_NOVSR_256_66_0F38_WIG = vexNOVSR | vex256 | vex66 | vex0F38 | vexWIG
   310  	VEX_NOVSR_256_F2_0F_WIG   = vexNOVSR | vex256 | vexF2 | vex0F | vexWIG
   311  	VEX_NOVSR_256_F3_0F_WIG   = vexNOVSR | vex256 | vexF3 | vex0F | vexWIG
   312  	VEX_NOVSR_LZ_F2_0F3A_W0   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW0
   313  	VEX_NOVSR_LZ_F2_0F3A_W1   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW1
   314  )
   315  
   316  var ycover [Ymax * Ymax]uint8
   317  
   318  var reg [MAXREG]int
   319  
   320  var regrex [MAXREG + 1]int
   321  
   322  var ynone = []ytab{
   323  	{Zlit, 1, argList{}},
   324  }
   325  
   326  var ytext = []ytab{
   327  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   328  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   329  }
   330  
   331  var ynop = []ytab{
   332  	{Zpseudo, 0, argList{}},
   333  	{Zpseudo, 0, argList{Yiauto}},
   334  	{Zpseudo, 0, argList{Yml}},
   335  	{Zpseudo, 0, argList{Yrf}},
   336  	{Zpseudo, 0, argList{Yxr}},
   337  	{Zpseudo, 0, argList{Yiauto}},
   338  	{Zpseudo, 0, argList{Yml}},
   339  	{Zpseudo, 0, argList{Yrf}},
   340  	{Zpseudo, 1, argList{Yxr}},
   341  }
   342  
   343  var yfuncdata = []ytab{
   344  	{Zpseudo, 0, argList{Yi32, Ym}},
   345  }
   346  
   347  var ypcdata = []ytab{
   348  	{Zpseudo, 0, argList{Yi32, Yi32}},
   349  }
   350  
   351  var yxorb = []ytab{
   352  	{Zib_, 1, argList{Yi32, Yal}},
   353  	{Zibo_m, 2, argList{Yi32, Ymb}},
   354  	{Zr_m, 1, argList{Yrb, Ymb}},
   355  	{Zm_r, 1, argList{Ymb, Yrb}},
   356  }
   357  
   358  var yaddl = []ytab{
   359  	{Zibo_m, 2, argList{Yi8, Yml}},
   360  	{Zil_, 1, argList{Yi32, Yax}},
   361  	{Zilo_m, 2, argList{Yi32, Yml}},
   362  	{Zr_m, 1, argList{Yrl, Yml}},
   363  	{Zm_r, 1, argList{Yml, Yrl}},
   364  }
   365  
   366  var yincl = []ytab{
   367  	{Z_rp, 1, argList{Yrl}},
   368  	{Zo_m, 2, argList{Yml}},
   369  }
   370  
   371  var yincq = []ytab{
   372  	{Zo_m, 2, argList{Yml}},
   373  }
   374  
   375  var ycmpb = []ytab{
   376  	{Z_ib, 1, argList{Yal, Yi32}},
   377  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   378  	{Zm_r, 1, argList{Ymb, Yrb}},
   379  	{Zr_m, 1, argList{Yrb, Ymb}},
   380  }
   381  
   382  var ycmpl = []ytab{
   383  	{Zm_ibo, 2, argList{Yml, Yi8}},
   384  	{Z_il, 1, argList{Yax, Yi32}},
   385  	{Zm_ilo, 2, argList{Yml, Yi32}},
   386  	{Zm_r, 1, argList{Yml, Yrl}},
   387  	{Zr_m, 1, argList{Yrl, Yml}},
   388  }
   389  
   390  var yshb = []ytab{
   391  	{Zo_m, 2, argList{Yi1, Ymb}},
   392  	{Zibo_m, 2, argList{Yu8, Ymb}},
   393  	{Zo_m, 2, argList{Ycx, Ymb}},
   394  }
   395  
   396  var yshl = []ytab{
   397  	{Zo_m, 2, argList{Yi1, Yml}},
   398  	{Zibo_m, 2, argList{Yu8, Yml}},
   399  	{Zo_m, 2, argList{Ycl, Yml}},
   400  	{Zo_m, 2, argList{Ycx, Yml}},
   401  }
   402  
   403  var ytestl = []ytab{
   404  	{Zil_, 1, argList{Yi32, Yax}},
   405  	{Zilo_m, 2, argList{Yi32, Yml}},
   406  	{Zr_m, 1, argList{Yrl, Yml}},
   407  	{Zm_r, 1, argList{Yml, Yrl}},
   408  }
   409  
   410  var ymovb = []ytab{
   411  	{Zr_m, 1, argList{Yrb, Ymb}},
   412  	{Zm_r, 1, argList{Ymb, Yrb}},
   413  	{Zib_rp, 1, argList{Yi32, Yrb}},
   414  	{Zibo_m, 2, argList{Yi32, Ymb}},
   415  }
   416  
   417  var ybtl = []ytab{
   418  	{Zibo_m, 2, argList{Yi8, Yml}},
   419  	{Zr_m, 1, argList{Yrl, Yml}},
   420  }
   421  
   422  var ymovw = []ytab{
   423  	{Zr_m, 1, argList{Yrl, Yml}},
   424  	{Zm_r, 1, argList{Yml, Yrl}},
   425  	{Zil_rp, 1, argList{Yi32, Yrl}},
   426  	{Zilo_m, 2, argList{Yi32, Yml}},
   427  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   428  }
   429  
   430  var ymovl = []ytab{
   431  	{Zr_m, 1, argList{Yrl, Yml}},
   432  	{Zm_r, 1, argList{Yml, Yrl}},
   433  	{Zil_rp, 1, argList{Yi32, Yrl}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   436  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   437  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   438  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   439  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   440  }
   441  
   442  var yret = []ytab{
   443  	{Zo_iw, 1, argList{}},
   444  	{Zo_iw, 1, argList{Yi32}},
   445  }
   446  
   447  var ymovq = []ytab{
   448  	// valid in 32-bit mode
   449  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   450  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   451  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   452  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   453  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   454  
   455  	// valid only in 64-bit mode, usually with 64-bit prefix
   456  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   457  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   458  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   459  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   460  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   461  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   462  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   463  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   464  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   465  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   466  }
   467  
   468  var ym_rl = []ytab{
   469  	{Zm_r, 1, argList{Ym, Yrl}},
   470  }
   471  
   472  var yrl_m = []ytab{
   473  	{Zr_m, 1, argList{Yrl, Ym}},
   474  }
   475  
   476  var ymb_rl = []ytab{
   477  	{Zmb_r, 1, argList{Ymb, Yrl}},
   478  }
   479  
   480  var yml_rl = []ytab{
   481  	{Zm_r, 1, argList{Yml, Yrl}},
   482  }
   483  
   484  var yrl_ml = []ytab{
   485  	{Zr_m, 1, argList{Yrl, Yml}},
   486  }
   487  
   488  var yml_mb = []ytab{
   489  	{Zr_m, 1, argList{Yrb, Ymb}},
   490  	{Zm_r, 1, argList{Ymb, Yrb}},
   491  }
   492  
   493  var yrb_mb = []ytab{
   494  	{Zr_m, 1, argList{Yrb, Ymb}},
   495  }
   496  
   497  var yxchg = []ytab{
   498  	{Z_rp, 1, argList{Yax, Yrl}},
   499  	{Zrp_, 1, argList{Yrl, Yax}},
   500  	{Zr_m, 1, argList{Yrl, Yml}},
   501  	{Zm_r, 1, argList{Yml, Yrl}},
   502  }
   503  
   504  var ydivl = []ytab{
   505  	{Zm_o, 2, argList{Yml}},
   506  }
   507  
   508  var ydivb = []ytab{
   509  	{Zm_o, 2, argList{Ymb}},
   510  }
   511  
   512  var yimul = []ytab{
   513  	{Zm_o, 2, argList{Yml}},
   514  	{Zib_rr, 1, argList{Yi8, Yrl}},
   515  	{Zil_rr, 1, argList{Yi32, Yrl}},
   516  	{Zm_r, 2, argList{Yml, Yrl}},
   517  }
   518  
   519  var yimul3 = []ytab{
   520  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   521  }
   522  
   523  var ybyte = []ytab{
   524  	{Zbyte, 1, argList{Yi64}},
   525  }
   526  
   527  var yin = []ytab{
   528  	{Zib_, 1, argList{Yi32}},
   529  	{Zlit, 1, argList{}},
   530  }
   531  
   532  var yint = []ytab{
   533  	{Zib_, 1, argList{Yi32}},
   534  }
   535  
   536  var ypushl = []ytab{
   537  	{Zrp_, 1, argList{Yrl}},
   538  	{Zm_o, 2, argList{Ym}},
   539  	{Zib_, 1, argList{Yi8}},
   540  	{Zil_, 1, argList{Yi32}},
   541  }
   542  
   543  var ypopl = []ytab{
   544  	{Z_rp, 1, argList{Yrl}},
   545  	{Zo_m, 2, argList{Ym}},
   546  }
   547  
   548  var yclflush = []ytab{
   549  	{Zo_m, 2, argList{Ym}},
   550  }
   551  
   552  var ybswap = []ytab{
   553  	{Z_rp, 2, argList{Yrl}},
   554  }
   555  
   556  var yscond = []ytab{
   557  	{Zo_m, 2, argList{Ymb}},
   558  }
   559  
   560  var yjcond = []ytab{
   561  	{Zbr, 0, argList{Ybr}},
   562  	{Zbr, 0, argList{Yi0, Ybr}},
   563  	{Zbr, 1, argList{Yi1, Ybr}},
   564  }
   565  
   566  var yloop = []ytab{
   567  	{Zloop, 1, argList{Ybr}},
   568  }
   569  
   570  var ycall = []ytab{
   571  	{Zcallindreg, 0, argList{Yml}},
   572  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   573  	{Zcallind, 2, argList{Yindir}},
   574  	{Zcall, 0, argList{Ybr}},
   575  	{Zcallcon, 1, argList{Yi32}},
   576  }
   577  
   578  var yduff = []ytab{
   579  	{Zcallduff, 1, argList{Yi32}},
   580  }
   581  
   582  var yjmp = []ytab{
   583  	{Zo_m64, 2, argList{Yml}},
   584  	{Zjmp, 0, argList{Ybr}},
   585  	{Zjmpcon, 1, argList{Yi32}},
   586  }
   587  
   588  var yfmvd = []ytab{
   589  	{Zm_o, 2, argList{Ym, Yf0}},
   590  	{Zo_m, 2, argList{Yf0, Ym}},
   591  	{Zm_o, 2, argList{Yrf, Yf0}},
   592  	{Zo_m, 2, argList{Yf0, Yrf}},
   593  }
   594  
   595  var yfmvdp = []ytab{
   596  	{Zo_m, 2, argList{Yf0, Ym}},
   597  	{Zo_m, 2, argList{Yf0, Yrf}},
   598  }
   599  
   600  var yfmvf = []ytab{
   601  	{Zm_o, 2, argList{Ym, Yf0}},
   602  	{Zo_m, 2, argList{Yf0, Ym}},
   603  }
   604  
   605  var yfmvx = []ytab{
   606  	{Zm_o, 2, argList{Ym, Yf0}},
   607  }
   608  
   609  var yfmvp = []ytab{
   610  	{Zo_m, 2, argList{Yf0, Ym}},
   611  }
   612  
   613  var yfcmv = []ytab{
   614  	{Zm_o, 2, argList{Yrf, Yf0}},
   615  }
   616  
   617  var yfadd = []ytab{
   618  	{Zm_o, 2, argList{Ym, Yf0}},
   619  	{Zm_o, 2, argList{Yrf, Yf0}},
   620  	{Zo_m, 2, argList{Yf0, Yrf}},
   621  }
   622  
   623  var yfxch = []ytab{
   624  	{Zo_m, 2, argList{Yf0, Yrf}},
   625  	{Zm_o, 2, argList{Yrf, Yf0}},
   626  }
   627  
   628  var ycompp = []ytab{
   629  	{Zo_m, 2, argList{Yf0, Yrf}}, /* botch is really f0,f1 */
   630  }
   631  
   632  var ystsw = []ytab{
   633  	{Zo_m, 2, argList{Ym}},
   634  	{Zlit, 1, argList{Yax}},
   635  }
   636  
   637  var ysvrs_mo = []ytab{
   638  	{Zm_o, 2, argList{Ym}},
   639  }
   640  
   641  // unaryDst version of "ysvrs_mo".
   642  var ysvrs_om = []ytab{
   643  	{Zo_m, 2, argList{Ym}},
   644  }
   645  
   646  var ymm = []ytab{
   647  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   648  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   649  }
   650  
   651  var yxm = []ytab{
   652  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   653  }
   654  
   655  var yxm_q4 = []ytab{
   656  	{Zm_r, 1, argList{Yxm, Yxr}},
   657  }
   658  
   659  var yxcvm1 = []ytab{
   660  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   661  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   662  }
   663  
   664  var yxcvm2 = []ytab{
   665  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   666  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   667  }
   668  
   669  var yxr = []ytab{
   670  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   671  }
   672  
   673  var yxr_ml = []ytab{
   674  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   675  }
   676  
   677  var ymr = []ytab{
   678  	{Zm_r, 1, argList{Ymr, Ymr}},
   679  }
   680  
   681  var ymr_ml = []ytab{
   682  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   683  }
   684  
   685  var yxcmpi = []ytab{
   686  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   687  }
   688  
   689  var yxmov = []ytab{
   690  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   691  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   692  }
   693  
   694  var yxcvfl = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   696  }
   697  
   698  var yxcvlf = []ytab{
   699  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   700  }
   701  
   702  var yxcvfq = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   704  }
   705  
   706  var yxcvqf = []ytab{
   707  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   708  }
   709  
   710  var yps = []ytab{
   711  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   712  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   713  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   714  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   715  }
   716  
   717  var yxrrl = []ytab{
   718  	{Zm_r, 1, argList{Yxr, Yrl}},
   719  }
   720  
   721  var ymrxr = []ytab{
   722  	{Zm_r, 1, argList{Ymr, Yxr}},
   723  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   724  }
   725  
   726  var ymshuf = []ytab{
   727  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   728  }
   729  
   730  var ymshufb = []ytab{
   731  	{Zm2_r, 2, argList{Yxm, Yxr}},
   732  }
   733  
   734  // It should never have more than 1 entry,
   735  // because some optab entries you opcode secuences that
   736  // are longer than 2 bytes (zoffset=2 here),
   737  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   738  // to name a few.
   739  var yxshuf = []ytab{
   740  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   741  }
   742  
   743  var yextrw = []ytab{
   744  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   745  }
   746  
   747  var yextr = []ytab{
   748  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   749  }
   750  
   751  var yinsrw = []ytab{
   752  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   753  }
   754  
   755  var yinsr = []ytab{
   756  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   757  }
   758  
   759  var ypsdq = []ytab{
   760  	{Zibo_m, 2, argList{Yi8, Yxr}},
   761  }
   762  
   763  var ymskb = []ytab{
   764  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   765  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   766  }
   767  
   768  var ycrc32l = []ytab{
   769  	{Zlitm_r, 0, argList{Yml, Yrl}},
   770  }
   771  
   772  var yprefetch = []ytab{
   773  	{Zm_o, 2, argList{Ym}},
   774  }
   775  
   776  var yaes = []ytab{
   777  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   778  }
   779  
   780  var yxbegin = []ytab{
   781  	{Zjmp, 1, argList{Ybr}},
   782  }
   783  
   784  var yxabort = []ytab{
   785  	{Zib_, 1, argList{Yu8}},
   786  }
   787  
   788  var ylddqu = []ytab{
   789  	{Zm_r, 1, argList{Ym, Yxr}},
   790  }
   791  
   792  var ypalignr = []ytab{
   793  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   794  }
   795  
   796  // VEX instructions that come in two forms:
   797  //	VTHING xmm2/m128, xmmV, xmm1
   798  //	VTHING ymm2/m256, ymmV, ymm1
   799  // The opcode array in the corresponding Optab entry
   800  // should contain the (VEX prefixes, opcode byte) pair
   801  // for each of the two forms.
   802  // For example, the entries for VPXOR are:
   803  //
   804  //	VPXOR xmm2/m128, xmmV, xmm1
   805  //	VEX.NDS.128.66.0F.WIG EF /r
   806  //
   807  //	VPXOR ymm2/m256, ymmV, ymm1
   808  //	VEX.NDS.256.66.0F.WIG EF /r
   809  //
   810  // Produce this Optab entry:
   811  //
   812  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xEF, VEX_NDS_256_66_0F_WIG, 0xEF}}
   813  //
   814  var yvex_xy3 = []ytab{
   815  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   816  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   817  }
   818  
   819  var yvex_x3 = []ytab{
   820  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   821  }
   822  
   823  var yvex_ri3 = []ytab{
   824  	{Zvex_i_rm_r, 2, argList{Yi8, Ymb, Yrl}},
   825  }
   826  
   827  var yvex_xyi3 = []ytab{
   828  	{Zvex_i_rm_r, 2, argList{Yu8, Yxm, Yxr}},
   829  	{Zvex_i_rm_r, 2, argList{Yu8, Yym, Yyr}},
   830  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   831  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   832  }
   833  
   834  var yvex_yyi4 = []ytab{
   835  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
   836  }
   837  
   838  var yvex_xyi4 = []ytab{
   839  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yyr, Yyr}},
   840  }
   841  
   842  var yvex_shift = []ytab{
   843  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   844  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   845  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   846  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
   847  }
   848  
   849  var yvex_shift_dq = []ytab{
   850  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   851  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   852  }
   853  
   854  var yvex_r3 = []ytab{
   855  	{Zvex_rm_v_r, 2, argList{Yml, Yrl, Yrl}},
   856  }
   857  
   858  var yvex_vmr3 = []ytab{
   859  	{Zvex_v_rm_r, 2, argList{Yrl, Yml, Yrl}},
   860  }
   861  
   862  var yvex_xy2 = []ytab{
   863  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   864  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   865  }
   866  
   867  var yvex_xyr2 = []ytab{
   868  	{Zvex_rm_v_r, 2, argList{Yxr, Yrl}},
   869  	{Zvex_rm_v_r, 2, argList{Yyr, Yrl}},
   870  }
   871  
   872  var yvex_vmovdqa = []ytab{
   873  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   874  	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
   875  	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
   876  	{Zvex_r_v_rm, 2, argList{Yyr, Yym}},
   877  }
   878  
   879  var yvex_vmovntdq = []ytab{
   880  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   881  	{Zvex_r_v_rm, 2, argList{Yyr, Ym}},
   882  }
   883  
   884  var yvex_vpbroadcast = []ytab{
   885  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   886  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   887  }
   888  
   889  var yvex_vpbroadcast_sd = []ytab{
   890  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
   891  }
   892  
   893  var yvex_vpextrw = []ytab{
   894  	{Zvex_i_rm_r, 2, argList{Yi8, Yxr, Yrl}},
   895  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
   896  }
   897  
   898  var yvex_m = []ytab{
   899  	{Zvex_rm_v_ro, 3, argList{Ym}},
   900  }
   901  
   902  var yvex_xx3 = []ytab{
   903  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   904  }
   905  
   906  var yvex_yi3 = []ytab{
   907  	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
   908  }
   909  
   910  var yvex_mxy = []ytab{
   911  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   912  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   913  }
   914  
   915  var yvex_yy3 = []ytab{
   916  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   917  }
   918  
   919  var yvex_xi3 = []ytab{
   920  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   921  }
   922  
   923  var yvex_vpermpd = []ytab{
   924  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   925  }
   926  
   927  var yvex_vpermilp = []ytab{
   928  	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
   929  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   930  	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
   931  	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
   932  }
   933  
   934  var yvex_vcvtps2ph = []ytab{
   935  	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
   936  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yxm}},
   937  }
   938  
   939  var yvex_vbroadcastf = []ytab{
   940  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   941  }
   942  
   943  var yvex_vmovd = []ytab{
   944  	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
   945  	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
   946  }
   947  
   948  var yvex_x2 = []ytab{
   949  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
   950  }
   951  
   952  var yvex_y2 = []ytab{
   953  	{Zvex_rm_v_r, 2, argList{Yym, Yxr}},
   954  }
   955  
   956  var yvex = []ytab{
   957  	{Zvex, 2, argList{}},
   958  }
   959  
   960  var yvex_xx2 = []ytab{
   961  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
   962  }
   963  
   964  var yvex_vpalignr = []ytab{
   965  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
   966  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
   967  }
   968  
   969  var yvex_rxi4 = []ytab{
   970  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yml, Yxr, Yxr}},
   971  }
   972  
   973  var yvex_xxi4 = []ytab{
   974  	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
   975  }
   976  
   977  var yvex_xy4 = []ytab{
   978  	{Zvex_hr_rm_v_r, 2, argList{Yxr, Yxm, Yxr, Yxr}},
   979  	{Zvex_hr_rm_v_r, 2, argList{Yyr, Yym, Yyr, Yyr}},
   980  }
   981  
   982  var yvex_vpbroadcast_ss = []ytab{
   983  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   984  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
   985  	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
   986  	{Zvex_rm_v_r, 2, argList{Yxr, Yyr}},
   987  }
   988  
   989  var yvex_vblendvpd = []ytab{
   990  	{Zvex_r_v_rm, 2, argList{Yxr, Yxr, Yml}},
   991  	{Zvex_r_v_rm, 2, argList{Yyr, Yyr, Yml}},
   992  	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
   993  	{Zvex_rm_v_r, 2, argList{Ym, Yyr, Yyr}},
   994  }
   995  
   996  var yvex_vmov = []ytab{
   997  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   998  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   999  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
  1000  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
  1001  }
  1002  
  1003  var yvex_vps = []ytab{
  1004  	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
  1005  	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
  1006  	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
  1007  	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
  1008  }
  1009  
  1010  var yvex_r2 = []ytab{
  1011  	{Zvex_rm_r_vo, 3, argList{Yml, Yrl}},
  1012  }
  1013  
  1014  var yvex_vpextr = []ytab{
  1015  	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
  1016  }
  1017  
  1018  var yvex_rx3 = []ytab{
  1019  	{Zvex_rm_v_r, 2, argList{Yml, Yxr, Yxr}},
  1020  }
  1021  
  1022  var yvex_vcvtsd2si = []ytab{
  1023  	{Zvex_rm_v_r, 2, argList{Yxm, Yrl}},
  1024  }
  1025  
  1026  var yvex_vmovhpd = []ytab{
  1027  	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
  1028  	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
  1029  }
  1030  
  1031  var yvex_vmovq = []ytab{
  1032  	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
  1033  	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
  1034  	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
  1035  	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
  1036  	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
  1037  }
  1038  
  1039  var yvpgatherdq = []ytab{
  1040  	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
  1041  	{Zvex_v_rm_r, 2, argList{Yyr, Yxvm, Yyr}},
  1042  }
  1043  
  1044  var yvpgatherqq = []ytab{
  1045  	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
  1046  	{Zvex_v_rm_r, 2, argList{Yyr, Yyvm, Yyr}},
  1047  }
  1048  
  1049  var yvgatherqps = []ytab{
  1050  	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
  1051  	{Zvex_v_rm_r, 2, argList{Yxr, Yyvm, Yxr}},
  1052  }
  1053  
  1054  var ymmxmm0f38 = []ytab{
  1055  	{Zlitm_r, 3, argList{Ymm, Ymr}},
  1056  	{Zlitm_r, 5, argList{Yxm, Yxr}},
  1057  }
  1058  
  1059  var yextractps = []ytab{
  1060  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
  1061  }
  1062  
  1063  /*
  1064   * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
  1065   * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
  1066   * to find the entry with the given p.As and then looks through the ytable for
  1067   * that instruction (the second field in the optab struct) for a line whose
  1068   * first two values match the Ytypes of the p.From and p.To operands.  The
  1069   * function oclass computes the specific Ytype of an operand and then the set
  1070   * of more general Ytypes that it satisfies is implied by the ycover table, set
  1071   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
  1072   * from the more general 8-bit constants, but instinit says
  1073   *
  1074   *        ycover[Yi0*Ymax+Ys32] = 1
  1075   *        ycover[Yi1*Ymax+Ys32] = 1
  1076   *        ycover[Yi8*Ymax+Ys32] = 1
  1077   *
  1078   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
  1079   * if that's what an instruction can handle.
  1080   *
  1081   * In parallel with the scan through the ytable for the appropriate line, there
  1082   * is a z pointer that starts out pointing at the strange magic byte list in
  1083   * the Optab struct.  With each step past a non-matching ytable line, z
  1084   * advances by the 4th entry in the line.  When a matching line is found, that
  1085   * z pointer has the extra data to use in laying down the instruction bytes.
  1086   * The actual bytes laid down are a function of the 3rd entry in the line (that
  1087   * is, the Ztype) and the z bytes.
  1088   *
  1089   * For example, let's look at AADDL.  The optab line says:
  1090   *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1091   *
  1092   * and yaddl says
  1093   *        var yaddl = []ytab{
  1094   *                {Yi8, Ynone, Yml, Zibo_m, 2},
  1095   *                {Yi32, Ynone, Yax, Zil_, 1},
  1096   *                {Yi32, Ynone, Yml, Zilo_m, 2},
  1097   *                {Yrl, Ynone, Yml, Zr_m, 1},
  1098   *                {Yml, Ynone, Yrl, Zm_r, 1},
  1099   *        }
  1100   *
  1101   * so there are 5 possible types of ADDL instruction that can be laid down, and
  1102   * possible states used to lay them down (Ztype and z pointer, assuming z
  1103   * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
  1104   *
  1105   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
  1106   *        Yi32, Yax -> Zil_, z+2 (0x05)
  1107   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
  1108   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
  1109   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
  1110   *
  1111   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
  1112   * relatively straightforward as this program goes.
  1113   *
  1114   * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
  1115   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
  1116   * encoded addressing mode for the Yml arg), and then a single immediate byte.
  1117   * Zilo_m is the same but a long (32-bit) immediate.
  1118   */
  1119  var optab =
  1120  /*	as, ytab, andproto, opcode */
  1121  []Optab{
  1122  	{obj.AXXX, nil, 0, [23]uint8{}},
  1123  	{AAAA, ynone, P32, [23]uint8{0x37}},
  1124  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
  1125  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
  1126  	{AAAS, ynone, P32, [23]uint8{0x3f}},
  1127  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
  1128  	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1129  	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1130  	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  1131  	{AADCXL, yml_rl, Pq4, [23]uint8{0xf6}},
  1132  	{AADCXQ, yml_rl, Pq4w, [23]uint8{0xf6}},
  1133  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
  1134  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1135  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
  1136  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
  1137  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1138  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
  1139  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
  1140  	{AADDSUBPD, yxm, Pq, [23]uint8{0xd0}},
  1141  	{AADDSUBPS, yxm, Pf2, [23]uint8{0xd0}},
  1142  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  1143  	{AADOXL, yml_rl, Pq5, [23]uint8{0xf6}},
  1144  	{AADOXQ, yml_rl, Pq5w, [23]uint8{0xf6}},
  1145  	{AADJSP, nil, 0, [23]uint8{}},
  1146  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
  1147  	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1148  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
  1149  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
  1150  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
  1151  	{AANDPS, yxm, Pm, [23]uint8{0x54}},
  1152  	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1153  	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  1154  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
  1155  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
  1156  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
  1157  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
  1158  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
  1159  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
  1160  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
  1161  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
  1162  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
  1163  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
  1164  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
  1165  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1166  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1167  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1168  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1169  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1170  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1171  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1172  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1173  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1174  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1175  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1176  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1177  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1178  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1179  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1180  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1181  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1182  	{ACLFLUSH, yclflush, Pm, [23]uint8{0xae, 07}},
  1183  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1184  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1185  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1186  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1187  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1188  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1189  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1190  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1191  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1192  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1193  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1194  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1195  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1196  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1197  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1198  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1199  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1200  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1201  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1202  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1203  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1204  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1205  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1206  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1207  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1208  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1209  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1210  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1211  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1212  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1213  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1214  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1215  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1216  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1217  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1218  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1219  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1220  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1221  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1222  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1223  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1224  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1225  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1226  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1227  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1228  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1229  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1230  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1231  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1232  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1233  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1234  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1235  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1236  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1237  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1238  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1239  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1240  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1241  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1242  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1243  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1244  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1245  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1246  	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
  1247  	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
  1248  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1249  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1250  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1251  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1252  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1253  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1254  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1255  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1256  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1257  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1258  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1259  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1260  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1261  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1262  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1263  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1264  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1265  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1266  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1267  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1268  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1269  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1270  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1271  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1272  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1273  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1274  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1275  	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
  1276  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1277  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1278  	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
  1279  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1280  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1281  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1282  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1283  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1284  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1285  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1286  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1287  	{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
  1288  	{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
  1289  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1290  	{AEXTRACTPS, yextractps, Pq, [23]uint8{0x3a, 0x17, 0}},
  1291  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1292  	{AFXRSTOR, ysvrs_mo, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1293  	{AFXSAVE, ysvrs_om, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1294  	{AFXRSTOR64, ysvrs_mo, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1295  	{AFXSAVE64, ysvrs_om, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1296  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1297  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1298  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1299  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1300  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1301  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1302  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1303  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1304  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1305  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1306  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1307  	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
  1308  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1309  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1310  	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
  1311  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1312  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1313  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1314  	{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
  1315  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1316  	{AINT, yint, Px, [23]uint8{0xcd}},
  1317  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1318  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1319  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1320  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1321  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1322  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1323  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1324  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1325  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1326  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1327  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1328  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1329  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1330  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1331  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1332  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1333  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1334  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1335  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1336  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1337  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1338  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1339  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1340  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1341  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1342  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1343  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1344  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1345  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1346  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1347  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1348  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1349  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1350  	{ALDMXCSR, ysvrs_mo, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1351  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1352  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1353  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1354  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1355  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1356  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1357  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1358  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1359  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1360  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1361  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1362  	{ALONG, ybyte, Px, [23]uint8{4}},
  1363  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1364  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1365  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1366  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1367  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1368  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1369  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1370  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1371  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1372  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1373  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1374  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1375  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1376  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1377  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1378  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1379  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1380  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1381  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1382  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1383  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1384  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1385  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1386  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1387  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1388  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1389  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1390  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1391  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1392  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1393  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1394  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1395  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1396  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1397  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1398  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1399  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1400  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1401  	{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
  1402  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1403  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1404  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1405  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1406  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1407  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1408  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1409  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1410  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1411  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1412  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1413  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1414  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1415  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1416  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1417  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1418  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1419  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1420  	{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
  1421  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1422  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1423  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1424  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1425  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1426  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1427  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1428  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1429  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1430  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1431  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1432  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1433  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1434  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1435  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1436  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1437  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1438  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1439  	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1440  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1441  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1442  	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1443  	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1444  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1445  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1446  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1447  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1448  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1449  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1450  	{APABSB, yxm_q4, Pq4, [23]uint8{0x1c}},
  1451  	{APABSD, yxm_q4, Pq4, [23]uint8{0x1e}},
  1452  	{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
  1453  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1454  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1455  	{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
  1456  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1457  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1458  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1459  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1460  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1461  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1462  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1463  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1464  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1465  	{APALIGNR, ypalignr, Pq, [23]uint8{0x3a, 0x0f}},
  1466  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1467  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1468  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1469  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1470  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1471  	{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
  1472  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1473  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1474  	{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
  1475  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1476  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1477  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1478  	{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
  1479  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1480  	{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
  1481  	{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
  1482  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1483  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1484  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1485  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1486  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1487  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1488  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1489  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1490  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1491  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1492  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1493  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1494  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1495  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1496  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1497  	{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
  1498  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1499  	{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
  1500  	{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
  1501  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1502  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1503  	{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
  1504  	{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
  1505  	{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
  1506  	{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
  1507  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1508  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1509  	{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
  1510  	{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
  1511  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1512  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1513  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1514  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1515  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1516  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1517  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1518  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1519  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1520  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1521  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1522  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1523  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1524  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1525  	{APMULHRSW, yxm_q4, Pq4, [23]uint8{0x0b}},
  1526  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1527  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1528  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1529  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1530  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1531  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1532  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1533  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1534  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1535  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1536  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1537  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1538  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1539  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1540  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1541  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1542  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1543  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1544  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1545  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1546  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1547  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1548  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1549  	{APSIGNB, yxm_q4, Pq4, [23]uint8{0x08}},
  1550  	{APSIGND, yxm_q4, Pq4, [23]uint8{0x0a}},
  1551  	{APSIGNW, yxm_q4, Pq4, [23]uint8{0x09}},
  1552  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1553  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1554  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1555  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1556  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1557  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1558  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1559  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1560  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1561  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1562  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1563  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1564  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1565  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1566  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1567  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1568  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1569  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1570  	{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
  1571  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1572  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1573  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1574  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1575  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1576  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1577  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1578  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1579  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1580  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1581  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1582  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1583  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1584  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1585  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1586  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1587  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1588  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1589  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1590  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1591  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1592  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1593  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1594  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1595  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1596  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1597  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1598  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1599  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1600  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1601  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1602  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1603  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1604  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1605  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1606  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1607  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1608  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1609  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1610  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1611  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1612  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1613  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1614  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1615  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1616  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1617  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1618  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1619  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1620  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1621  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1622  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1623  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1624  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1625  	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1626  	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1627  	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1628  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1629  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1630  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1631  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1632  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1633  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1634  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1635  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1636  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1637  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1638  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1639  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1640  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1641  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1642  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1643  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1644  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1645  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1646  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1647  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1648  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1649  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1650  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1651  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1652  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1653  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1654  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1655  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1656  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1657  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1658  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1659  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1660  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1661  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1662  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1663  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1664  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1665  	{ASTMXCSR, ysvrs_om, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1666  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1667  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1668  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1669  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1670  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1671  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1672  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1673  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1674  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1675  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1676  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1677  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1678  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1679  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1680  	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1681  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1682  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1683  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1684  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1685  	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
  1686  	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
  1687  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1688  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1689  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1690  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1691  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1692  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1693  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1694  	{AWORD, ybyte, Px, [23]uint8{2}},
  1695  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1696  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1697  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1698  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1699  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1700  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1701  	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1702  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1703  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1704  	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1705  	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1706  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1707  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1708  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1709  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1710  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1711  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1712  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1713  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1714  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1715  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1716  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1717  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1718  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1719  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1720  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1721  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1722  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1723  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1724  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1725  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1726  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1727  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1728  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1729  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1730  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1731  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1732  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1733  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1734  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1735  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1736  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1737  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1738  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1739  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1740  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1741  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1742  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1743  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1744  	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
  1745  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1746  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1747  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1748  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1749  	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
  1750  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1751  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1752  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1753  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1754  	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
  1755  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1756  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1757  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1758  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1759  	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
  1760  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1761  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1762  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1763  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1764  	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
  1765  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1766  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1767  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1768  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1769  	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
  1770  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1771  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1772  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1773  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1774  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1775  	{AFFREE, nil, 0, [23]uint8{}},
  1776  	{AFLDCW, ysvrs_mo, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1777  	{AFLDENV, ysvrs_mo, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1778  	{AFRSTOR, ysvrs_mo, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1779  	{AFSAVE, ysvrs_om, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1780  	{AFSTCW, ysvrs_om, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1781  	{AFSTENV, ysvrs_om, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1782  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1783  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1784  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1785  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1786  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1787  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1788  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1789  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1790  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1791  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1792  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1793  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1794  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1795  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1796  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1797  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1798  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1799  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1800  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1801  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1802  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1803  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1804  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1805  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1806  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1807  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1808  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1809  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1810  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1811  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1812  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1813  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1814  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1815  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1816  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1817  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1818  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1819  	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
  1820  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1821  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1822  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1823  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1824  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1825  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1826  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1827  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1828  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1829  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1830  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1831  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1832  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1833  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1834  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1835  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1836  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1837  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1838  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1839  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1840  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1841  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1842  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1843  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1844  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1845  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1846  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1847  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1848  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1849  	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1850  	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
  1851  	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
  1852  	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1853  	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1854  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1855  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1856  	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
  1857  	{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
  1858  	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
  1859  	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
  1860  	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
  1861  
  1862  	{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
  1863  	{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
  1864  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1865  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1866  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1867  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1868  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1869  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1870  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1871  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1872  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1873  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1874  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1875  
  1876  	// AVX2 gather instructions.
  1877  	// Added as a part of VSIB support implementation,
  1878  	// when x86avxgen will output these, they will be moved to
  1879  	// vex_optabs.go where they belong.
  1880  	{AVGATHERDPD, yvpgatherdq, Pvex, [23]uint8{
  1881  		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x92,
  1882  		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x92,
  1883  	}},
  1884  	{AVGATHERQPD, yvpgatherqq, Pvex, [23]uint8{
  1885  		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x93,
  1886  		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x93,
  1887  	}},
  1888  	{AVGATHERDPS, yvpgatherqq, Pvex, [23]uint8{
  1889  		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x92,
  1890  		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x92,
  1891  	}},
  1892  	{AVGATHERQPS, yvgatherqps, Pvex, [23]uint8{
  1893  		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x93,
  1894  		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x93,
  1895  	}},
  1896  	{AVPGATHERDD, yvpgatherqq, Pvex, [23]uint8{
  1897  		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x90,
  1898  		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x90,
  1899  	}},
  1900  	{AVPGATHERQD, yvgatherqps, Pvex, [23]uint8{
  1901  		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x91,
  1902  		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x91,
  1903  	}},
  1904  	{AVPGATHERDQ, yvpgatherdq, Pvex, [23]uint8{
  1905  		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x90,
  1906  		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x90,
  1907  	}},
  1908  	{AVPGATHERQQ, yvpgatherqq, Pvex, [23]uint8{
  1909  		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x91,
  1910  		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x91,
  1911  	}},
  1912  
  1913  	{obj.AEND, nil, 0, [23]uint8{}},
  1914  	{0, nil, 0, [23]uint8{}},
  1915  }
  1916  
  1917  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1918  
  1919  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1920  // This happens on systems like Solaris that call .so functions instead of system calls.
  1921  // It does not seem to be necessary for any other systems. This is probably working
  1922  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1923  // what that bug is. And this does fix it.
  1924  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1925  	if ctxt.Headtype == objabi.Hsolaris {
  1926  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1927  		return strings.HasPrefix(s.Name, "libc_")
  1928  	}
  1929  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1930  }
  1931  
  1932  // single-instruction no-ops of various lengths.
  1933  // constructed by hand and disassembled with gdb to verify.
  1934  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1935  var nop = [][16]uint8{
  1936  	{0x90},
  1937  	{0x66, 0x90},
  1938  	{0x0F, 0x1F, 0x00},
  1939  	{0x0F, 0x1F, 0x40, 0x00},
  1940  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1941  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1942  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1943  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1944  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1945  }
  1946  
  1947  // Native Client rejects the repeated 0x66 prefix.
  1948  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1949  func fillnop(p []byte, n int) {
  1950  	var m int
  1951  
  1952  	for n > 0 {
  1953  		m = n
  1954  		if m > len(nop) {
  1955  			m = len(nop)
  1956  		}
  1957  		copy(p[:m], nop[m-1][:m])
  1958  		p = p[m:]
  1959  		n -= m
  1960  	}
  1961  }
  1962  
  1963  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1964  	s.Grow(int64(c) + int64(pad))
  1965  	fillnop(s.P[c:], int(pad))
  1966  	return c + pad
  1967  }
  1968  
  1969  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1970  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1971  		return l
  1972  	}
  1973  	return q
  1974  }
  1975  
  1976  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1977  	if s.P != nil {
  1978  		return
  1979  	}
  1980  
  1981  	if ycover[0] == 0 {
  1982  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1983  	}
  1984  
  1985  	var asmbuf AsmBuf
  1986  
  1987  	for p := s.Func.Text; p != nil; p = p.Link {
  1988  		if p.To.Type == obj.TYPE_BRANCH {
  1989  			if p.Pcond == nil {
  1990  				p.Pcond = p
  1991  			}
  1992  		}
  1993  		if p.As == AADJSP {
  1994  			p.To.Type = obj.TYPE_REG
  1995  			p.To.Reg = REG_SP
  1996  			v := int32(-p.From.Offset)
  1997  			p.From.Offset = int64(v)
  1998  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1999  			if v < 0 {
  2000  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  2001  				v = -v
  2002  				p.From.Offset = int64(v)
  2003  			}
  2004  
  2005  			if v == 0 {
  2006  				p.As = obj.ANOP
  2007  			}
  2008  		}
  2009  	}
  2010  
  2011  	var q *obj.Prog
  2012  	var count int64 // rough count of number of instructions
  2013  	for p := s.Func.Text; p != nil; p = p.Link {
  2014  		count++
  2015  		p.Back = 2 // use short branches first time through
  2016  		q = p.Pcond
  2017  		if q != nil && (q.Back&2 != 0) {
  2018  			p.Back |= 1 // backward jump
  2019  			q.Back |= 4 // loop head
  2020  		}
  2021  
  2022  		if p.As == AADJSP {
  2023  			p.To.Type = obj.TYPE_REG
  2024  			p.To.Reg = REG_SP
  2025  			v := int32(-p.From.Offset)
  2026  			p.From.Offset = int64(v)
  2027  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  2028  			if v < 0 {
  2029  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  2030  				v = -v
  2031  				p.From.Offset = int64(v)
  2032  			}
  2033  
  2034  			if v == 0 {
  2035  				p.As = obj.ANOP
  2036  			}
  2037  		}
  2038  	}
  2039  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2040  
  2041  	n := 0
  2042  	var c int32
  2043  	errors := ctxt.Errors
  2044  	for {
  2045  		loop := int32(0)
  2046  		for i := range s.R {
  2047  			s.R[i] = obj.Reloc{}
  2048  		}
  2049  		s.R = s.R[:0]
  2050  		s.P = s.P[:0]
  2051  		c = 0
  2052  		for p := s.Func.Text; p != nil; p = p.Link {
  2053  			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
  2054  
  2055  				// pad everything to avoid crossing 32-byte boundary
  2056  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  2057  					c = naclpad(ctxt, s, c, -c&31)
  2058  				}
  2059  
  2060  				// pad call deferreturn to start at 32-byte boundary
  2061  				// so that subtracting 5 in jmpdefer will jump back
  2062  				// to that boundary and rerun the call.
  2063  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  2064  					c = naclpad(ctxt, s, c, -c&31)
  2065  				}
  2066  
  2067  				// pad call to end at 32-byte boundary
  2068  				if p.As == obj.ACALL {
  2069  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  2070  				}
  2071  
  2072  				// the linker treats REP and STOSQ as different instructions
  2073  				// but in fact the REP is a prefix on the STOSQ.
  2074  				// make sure REP has room for 2 more bytes, so that
  2075  				// padding will not be inserted before the next instruction.
  2076  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  2077  					c = naclpad(ctxt, s, c, -c&31)
  2078  				}
  2079  
  2080  				// same for LOCK.
  2081  				// various instructions follow; the longest is 4 bytes.
  2082  				// give ourselves 8 bytes so as to avoid surprises.
  2083  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  2084  					c = naclpad(ctxt, s, c, -c&31)
  2085  				}
  2086  			}
  2087  
  2088  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  2089  				// pad with NOPs
  2090  				v := -c & (LoopAlign - 1)
  2091  
  2092  				if v <= MaxLoopPad {
  2093  					s.Grow(int64(c) + int64(v))
  2094  					fillnop(s.P[c:], int(v))
  2095  					c += v
  2096  				}
  2097  			}
  2098  
  2099  			p.Pc = int64(c)
  2100  
  2101  			// process forward jumps to p
  2102  			for q = p.Rel; q != nil; q = q.Forwd {
  2103  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2104  				if q.Back&2 != 0 { // short
  2105  					if v > 127 {
  2106  						loop++
  2107  						q.Back ^= 2
  2108  					}
  2109  
  2110  					if q.As == AJCXZL || q.As == AXBEGIN {
  2111  						s.P[q.Pc+2] = byte(v)
  2112  					} else {
  2113  						s.P[q.Pc+1] = byte(v)
  2114  					}
  2115  				} else {
  2116  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2117  				}
  2118  			}
  2119  
  2120  			p.Rel = nil
  2121  
  2122  			p.Pc = int64(c)
  2123  			asmbuf.asmins(ctxt, s, p)
  2124  			m := asmbuf.Len()
  2125  			if int(p.Isize) != m {
  2126  				p.Isize = uint8(m)
  2127  				loop++
  2128  			}
  2129  
  2130  			s.Grow(p.Pc + int64(m))
  2131  			copy(s.P[p.Pc:], asmbuf.Bytes())
  2132  			c += int32(m)
  2133  		}
  2134  
  2135  		n++
  2136  		if n > 20 {
  2137  			ctxt.Diag("span must be looping")
  2138  			log.Fatalf("loop")
  2139  		}
  2140  		if loop == 0 {
  2141  			break
  2142  		}
  2143  		if ctxt.Errors > errors {
  2144  			return
  2145  		}
  2146  	}
  2147  
  2148  	if ctxt.Headtype == objabi.Hnacl {
  2149  		c = naclpad(ctxt, s, c, -c&31)
  2150  	}
  2151  
  2152  	s.Size = int64(c)
  2153  
  2154  	if false { /* debug['a'] > 1 */
  2155  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2156  		var i int
  2157  		for i = 0; i < len(s.P); i++ {
  2158  			fmt.Printf(" %.2x", s.P[i])
  2159  			if i%16 == 15 {
  2160  				fmt.Printf("\n  %.6x", uint(i+1))
  2161  			}
  2162  		}
  2163  
  2164  		if i%16 != 0 {
  2165  			fmt.Printf("\n")
  2166  		}
  2167  
  2168  		for i := 0; i < len(s.R); i++ {
  2169  			r := &s.R[i]
  2170  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2171  		}
  2172  	}
  2173  }
  2174  
  2175  func instinit(ctxt *obj.Link) {
  2176  	if ycover[0] != 0 {
  2177  		// Already initialized; stop now.
  2178  		// This happens in the cmd/asm tests,
  2179  		// each of which re-initializes the arch.
  2180  		return
  2181  	}
  2182  
  2183  	switch ctxt.Headtype {
  2184  	case objabi.Hplan9:
  2185  		plan9privates = ctxt.Lookup("_privates")
  2186  	case objabi.Hnacl:
  2187  		deferreturn = ctxt.Lookup("runtime.deferreturn")
  2188  	}
  2189  
  2190  	for i := range vexOptab {
  2191  		c := vexOptab[i].as
  2192  		if opindex[c&obj.AMask] != nil {
  2193  			ctxt.Diag("phase error in vexOptab: %d (%v)", i, c)
  2194  		}
  2195  		opindex[c&obj.AMask] = &vexOptab[i]
  2196  	}
  2197  	for i := 1; optab[i].as != 0; i++ {
  2198  		c := optab[i].as
  2199  		if opindex[c&obj.AMask] != nil {
  2200  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2201  		}
  2202  		opindex[c&obj.AMask] = &optab[i]
  2203  	}
  2204  
  2205  	for i := 0; i < Ymax; i++ {
  2206  		ycover[i*Ymax+i] = 1
  2207  	}
  2208  
  2209  	ycover[Yi0*Ymax+Yu2] = 1
  2210  	ycover[Yi1*Ymax+Yu2] = 1
  2211  
  2212  	ycover[Yi0*Ymax+Yi8] = 1
  2213  	ycover[Yi1*Ymax+Yi8] = 1
  2214  	ycover[Yu2*Ymax+Yi8] = 1
  2215  	ycover[Yu7*Ymax+Yi8] = 1
  2216  
  2217  	ycover[Yi0*Ymax+Yu7] = 1
  2218  	ycover[Yi1*Ymax+Yu7] = 1
  2219  	ycover[Yu2*Ymax+Yu7] = 1
  2220  
  2221  	ycover[Yi0*Ymax+Yu8] = 1
  2222  	ycover[Yi1*Ymax+Yu8] = 1
  2223  	ycover[Yu2*Ymax+Yu8] = 1
  2224  	ycover[Yu7*Ymax+Yu8] = 1
  2225  
  2226  	ycover[Yi0*Ymax+Ys32] = 1
  2227  	ycover[Yi1*Ymax+Ys32] = 1
  2228  	ycover[Yu2*Ymax+Ys32] = 1
  2229  	ycover[Yu7*Ymax+Ys32] = 1
  2230  	ycover[Yu8*Ymax+Ys32] = 1
  2231  	ycover[Yi8*Ymax+Ys32] = 1
  2232  
  2233  	ycover[Yi0*Ymax+Yi32] = 1
  2234  	ycover[Yi1*Ymax+Yi32] = 1
  2235  	ycover[Yu2*Ymax+Yi32] = 1
  2236  	ycover[Yu7*Ymax+Yi32] = 1
  2237  	ycover[Yu8*Ymax+Yi32] = 1
  2238  	ycover[Yi8*Ymax+Yi32] = 1
  2239  	ycover[Ys32*Ymax+Yi32] = 1
  2240  
  2241  	ycover[Yi0*Ymax+Yi64] = 1
  2242  	ycover[Yi1*Ymax+Yi64] = 1
  2243  	ycover[Yu7*Ymax+Yi64] = 1
  2244  	ycover[Yu2*Ymax+Yi64] = 1
  2245  	ycover[Yu8*Ymax+Yi64] = 1
  2246  	ycover[Yi8*Ymax+Yi64] = 1
  2247  	ycover[Ys32*Ymax+Yi64] = 1
  2248  	ycover[Yi32*Ymax+Yi64] = 1
  2249  
  2250  	ycover[Yal*Ymax+Yrb] = 1
  2251  	ycover[Ycl*Ymax+Yrb] = 1
  2252  	ycover[Yax*Ymax+Yrb] = 1
  2253  	ycover[Ycx*Ymax+Yrb] = 1
  2254  	ycover[Yrx*Ymax+Yrb] = 1
  2255  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2256  
  2257  	ycover[Ycl*Ymax+Ycx] = 1
  2258  
  2259  	ycover[Yax*Ymax+Yrx] = 1
  2260  	ycover[Ycx*Ymax+Yrx] = 1
  2261  
  2262  	ycover[Yax*Ymax+Yrl] = 1
  2263  	ycover[Ycx*Ymax+Yrl] = 1
  2264  	ycover[Yrx*Ymax+Yrl] = 1
  2265  	ycover[Yrl32*Ymax+Yrl] = 1
  2266  
  2267  	ycover[Yf0*Ymax+Yrf] = 1
  2268  
  2269  	ycover[Yal*Ymax+Ymb] = 1
  2270  	ycover[Ycl*Ymax+Ymb] = 1
  2271  	ycover[Yax*Ymax+Ymb] = 1
  2272  	ycover[Ycx*Ymax+Ymb] = 1
  2273  	ycover[Yrx*Ymax+Ymb] = 1
  2274  	ycover[Yrb*Ymax+Ymb] = 1
  2275  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2276  	ycover[Ym*Ymax+Ymb] = 1
  2277  
  2278  	ycover[Yax*Ymax+Yml] = 1
  2279  	ycover[Ycx*Ymax+Yml] = 1
  2280  	ycover[Yrx*Ymax+Yml] = 1
  2281  	ycover[Yrl*Ymax+Yml] = 1
  2282  	ycover[Yrl32*Ymax+Yml] = 1
  2283  	ycover[Ym*Ymax+Yml] = 1
  2284  
  2285  	ycover[Yax*Ymax+Ymm] = 1
  2286  	ycover[Ycx*Ymax+Ymm] = 1
  2287  	ycover[Yrx*Ymax+Ymm] = 1
  2288  	ycover[Yrl*Ymax+Ymm] = 1
  2289  	ycover[Yrl32*Ymax+Ymm] = 1
  2290  	ycover[Ym*Ymax+Ymm] = 1
  2291  	ycover[Ymr*Ymax+Ymm] = 1
  2292  
  2293  	ycover[Ym*Ymax+Yxm] = 1
  2294  	ycover[Yxr*Ymax+Yxm] = 1
  2295  
  2296  	ycover[Ym*Ymax+Yym] = 1
  2297  	ycover[Yyr*Ymax+Yym] = 1
  2298  
  2299  	for i := 0; i < MAXREG; i++ {
  2300  		reg[i] = -1
  2301  		if i >= REG_AL && i <= REG_R15B {
  2302  			reg[i] = (i - REG_AL) & 7
  2303  			if i >= REG_SPB && i <= REG_DIB {
  2304  				regrex[i] = 0x40
  2305  			}
  2306  			if i >= REG_R8B && i <= REG_R15B {
  2307  				regrex[i] = Rxr | Rxx | Rxb
  2308  			}
  2309  		}
  2310  
  2311  		if i >= REG_AH && i <= REG_BH {
  2312  			reg[i] = 4 + ((i - REG_AH) & 7)
  2313  		}
  2314  		if i >= REG_AX && i <= REG_R15 {
  2315  			reg[i] = (i - REG_AX) & 7
  2316  			if i >= REG_R8 {
  2317  				regrex[i] = Rxr | Rxx | Rxb
  2318  			}
  2319  		}
  2320  
  2321  		if i >= REG_F0 && i <= REG_F0+7 {
  2322  			reg[i] = (i - REG_F0) & 7
  2323  		}
  2324  		if i >= REG_M0 && i <= REG_M0+7 {
  2325  			reg[i] = (i - REG_M0) & 7
  2326  		}
  2327  		if i >= REG_X0 && i <= REG_X0+15 {
  2328  			reg[i] = (i - REG_X0) & 7
  2329  			if i >= REG_X0+8 {
  2330  				regrex[i] = Rxr | Rxx | Rxb
  2331  			}
  2332  		}
  2333  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2334  			reg[i] = (i - REG_Y0) & 7
  2335  			if i >= REG_Y0+8 {
  2336  				regrex[i] = Rxr | Rxx | Rxb
  2337  			}
  2338  		}
  2339  
  2340  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2341  			regrex[i] = Rxr
  2342  		}
  2343  	}
  2344  }
  2345  
  2346  var isAndroid = (objabi.GOOS == "android")
  2347  
  2348  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2349  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2350  		return 0
  2351  	}
  2352  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2353  		switch a.Reg {
  2354  		case REG_CS:
  2355  			return 0x2e
  2356  
  2357  		case REG_DS:
  2358  			return 0x3e
  2359  
  2360  		case REG_ES:
  2361  			return 0x26
  2362  
  2363  		case REG_FS:
  2364  			return 0x64
  2365  
  2366  		case REG_GS:
  2367  			return 0x65
  2368  
  2369  		case REG_TLS:
  2370  			// NOTE: Systems listed here should be only systems that
  2371  			// support direct TLS references like 8(TLS) implemented as
  2372  			// direct references from FS or GS. Systems that require
  2373  			// the initial-exec model, where you load the TLS base into
  2374  			// a register and then index from that register, do not reach
  2375  			// this code and should not be listed.
  2376  			if ctxt.Arch.Family == sys.I386 {
  2377  				switch ctxt.Headtype {
  2378  				default:
  2379  					if isAndroid {
  2380  						return 0x65 // GS
  2381  					}
  2382  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2383  
  2384  				case objabi.Hdarwin,
  2385  					objabi.Hdragonfly,
  2386  					objabi.Hfreebsd,
  2387  					objabi.Hnetbsd,
  2388  					objabi.Hopenbsd:
  2389  					return 0x65 // GS
  2390  				}
  2391  			}
  2392  
  2393  			switch ctxt.Headtype {
  2394  			default:
  2395  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2396  
  2397  			case objabi.Hlinux:
  2398  				if isAndroid {
  2399  					return 0x64 // FS
  2400  				}
  2401  
  2402  				if ctxt.Flag_shared {
  2403  					log.Fatalf("unknown TLS base register for linux with -shared")
  2404  				} else {
  2405  					return 0x64 // FS
  2406  				}
  2407  
  2408  			case objabi.Hdragonfly,
  2409  				objabi.Hfreebsd,
  2410  				objabi.Hnetbsd,
  2411  				objabi.Hopenbsd,
  2412  				objabi.Hsolaris:
  2413  				return 0x64 // FS
  2414  
  2415  			case objabi.Hdarwin:
  2416  				return 0x65 // GS
  2417  			}
  2418  		}
  2419  	}
  2420  
  2421  	if ctxt.Arch.Family == sys.I386 {
  2422  		if a.Index == REG_TLS && ctxt.Flag_shared {
  2423  			// When building for inclusion into a shared library, an instruction of the form
  2424  			//     MOVL 0(CX)(TLS*1), AX
  2425  			// becomes
  2426  			//     mov %gs:(%ecx), %eax
  2427  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2428  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2429  			// a shared library the instruction it becomes
  2430  			//     mov 0x0(%ecx), $eax
  2431  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2432  			if a.Offset != 0 {
  2433  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2434  			}
  2435  			return 0x65 // GS
  2436  		}
  2437  		return 0
  2438  	}
  2439  
  2440  	switch a.Index {
  2441  	case REG_CS:
  2442  		return 0x2e
  2443  
  2444  	case REG_DS:
  2445  		return 0x3e
  2446  
  2447  	case REG_ES:
  2448  		return 0x26
  2449  
  2450  	case REG_TLS:
  2451  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2452  			// When building for inclusion into a shared library, an instruction of the form
  2453  			//     MOV 0(CX)(TLS*1), AX
  2454  			// becomes
  2455  			//     mov %fs:(%rcx), %rax
  2456  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2457  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2458  			// a shared library the instruction does not require a prefix.
  2459  			if a.Offset != 0 {
  2460  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2461  			}
  2462  			return 0x64
  2463  		}
  2464  
  2465  	case REG_FS:
  2466  		return 0x64
  2467  
  2468  	case REG_GS:
  2469  		return 0x65
  2470  	}
  2471  
  2472  	return 0
  2473  }
  2474  
  2475  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2476  	switch a.Type {
  2477  	case obj.TYPE_NONE:
  2478  		return Ynone
  2479  
  2480  	case obj.TYPE_BRANCH:
  2481  		return Ybr
  2482  
  2483  	case obj.TYPE_INDIR:
  2484  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2485  			return Yindir
  2486  		}
  2487  		return Yxxx
  2488  
  2489  	case obj.TYPE_MEM:
  2490  		if a.Index == REG_SP {
  2491  			// Can't use SP as the index register
  2492  			return Yxxx
  2493  		}
  2494  		if a.Index >= REG_X0 && a.Index <= REG_X15 {
  2495  			if ctxt.Arch.Family == sys.I386 && a.Index > REG_X7 {
  2496  				return Yxxx
  2497  			}
  2498  			return Yxvm
  2499  		}
  2500  		if a.Index >= REG_Y0 && a.Index <= REG_Y15 {
  2501  			if ctxt.Arch.Family == sys.I386 && a.Index > REG_Y7 {
  2502  				return Yxxx
  2503  			}
  2504  			return Yyvm
  2505  		}
  2506  		if ctxt.Arch.Family == sys.AMD64 {
  2507  			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
  2508  			// where the sign extension doesn't matter).
  2509  			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
  2510  			if !(a.Offset == int64(int32(a.Offset)) ||
  2511  				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
  2512  				return Yxxx
  2513  			}
  2514  			switch a.Name {
  2515  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2516  				// Global variables can't use index registers and their
  2517  				// base register is %rip (%rip is encoded as REG_NONE).
  2518  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2519  					return Yxxx
  2520  				}
  2521  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2522  				// These names must have a base of SP.  The old compiler
  2523  				// uses 0 for the base register. SSA uses REG_SP.
  2524  				if a.Reg != REG_SP && a.Reg != 0 {
  2525  					return Yxxx
  2526  				}
  2527  			case obj.NAME_NONE:
  2528  				// everything is ok
  2529  			default:
  2530  				// unknown name
  2531  				return Yxxx
  2532  			}
  2533  		}
  2534  		return Ym
  2535  
  2536  	case obj.TYPE_ADDR:
  2537  		switch a.Name {
  2538  		case obj.NAME_GOTREF:
  2539  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2540  			return Yxxx
  2541  
  2542  		case obj.NAME_EXTERN,
  2543  			obj.NAME_STATIC:
  2544  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2545  				return Yi32
  2546  			}
  2547  			return Yiauto // use pc-relative addressing
  2548  
  2549  		case obj.NAME_AUTO,
  2550  			obj.NAME_PARAM:
  2551  			return Yiauto
  2552  		}
  2553  
  2554  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2555  		// and got Yi32 in an earlier version of this code.
  2556  		// Keep doing that until we fix yduff etc.
  2557  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2558  			return Yi32
  2559  		}
  2560  
  2561  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2562  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2563  		}
  2564  		fallthrough
  2565  
  2566  		// fall through
  2567  
  2568  	case obj.TYPE_CONST:
  2569  		if a.Sym != nil {
  2570  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2571  		}
  2572  
  2573  		v := a.Offset
  2574  		if ctxt.Arch.Family == sys.I386 {
  2575  			v = int64(int32(v))
  2576  		}
  2577  		if v == 0 {
  2578  			return Yi0
  2579  		}
  2580  		if v == 1 {
  2581  			return Yi1
  2582  		}
  2583  		if v >= 0 && v <= 3 {
  2584  			return Yu2
  2585  		}
  2586  		if v >= 0 && v <= 127 {
  2587  			return Yu7
  2588  		}
  2589  		if v >= 0 && v <= 255 {
  2590  			return Yu8
  2591  		}
  2592  		if v >= -128 && v <= 127 {
  2593  			return Yi8
  2594  		}
  2595  		if ctxt.Arch.Family == sys.I386 {
  2596  			return Yi32
  2597  		}
  2598  		l := int32(v)
  2599  		if int64(l) == v {
  2600  			return Ys32 /* can sign extend */
  2601  		}
  2602  		if v>>32 == 0 {
  2603  			return Yi32 /* unsigned */
  2604  		}
  2605  		return Yi64
  2606  
  2607  	case obj.TYPE_TEXTSIZE:
  2608  		return Ytextsize
  2609  	}
  2610  
  2611  	if a.Type != obj.TYPE_REG {
  2612  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2613  		return Yxxx
  2614  	}
  2615  
  2616  	switch a.Reg {
  2617  	case REG_AL:
  2618  		return Yal
  2619  
  2620  	case REG_AX:
  2621  		return Yax
  2622  
  2623  		/*
  2624  			case REG_SPB:
  2625  		*/
  2626  	case REG_BPB,
  2627  		REG_SIB,
  2628  		REG_DIB,
  2629  		REG_R8B,
  2630  		REG_R9B,
  2631  		REG_R10B,
  2632  		REG_R11B,
  2633  		REG_R12B,
  2634  		REG_R13B,
  2635  		REG_R14B,
  2636  		REG_R15B:
  2637  		if ctxt.Arch.Family == sys.I386 {
  2638  			return Yxxx
  2639  		}
  2640  		fallthrough
  2641  
  2642  	case REG_DL,
  2643  		REG_BL,
  2644  		REG_AH,
  2645  		REG_CH,
  2646  		REG_DH,
  2647  		REG_BH:
  2648  		return Yrb
  2649  
  2650  	case REG_CL:
  2651  		return Ycl
  2652  
  2653  	case REG_CX:
  2654  		return Ycx
  2655  
  2656  	case REG_DX, REG_BX:
  2657  		return Yrx
  2658  
  2659  	case REG_R8, /* not really Yrl */
  2660  		REG_R9,
  2661  		REG_R10,
  2662  		REG_R11,
  2663  		REG_R12,
  2664  		REG_R13,
  2665  		REG_R14,
  2666  		REG_R15:
  2667  		if ctxt.Arch.Family == sys.I386 {
  2668  			return Yxxx
  2669  		}
  2670  		fallthrough
  2671  
  2672  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2673  		if ctxt.Arch.Family == sys.I386 {
  2674  			return Yrl32
  2675  		}
  2676  		return Yrl
  2677  
  2678  	case REG_F0 + 0:
  2679  		return Yf0
  2680  
  2681  	case REG_F0 + 1,
  2682  		REG_F0 + 2,
  2683  		REG_F0 + 3,
  2684  		REG_F0 + 4,
  2685  		REG_F0 + 5,
  2686  		REG_F0 + 6,
  2687  		REG_F0 + 7:
  2688  		return Yrf
  2689  
  2690  	case REG_M0 + 0,
  2691  		REG_M0 + 1,
  2692  		REG_M0 + 2,
  2693  		REG_M0 + 3,
  2694  		REG_M0 + 4,
  2695  		REG_M0 + 5,
  2696  		REG_M0 + 6,
  2697  		REG_M0 + 7:
  2698  		return Ymr
  2699  
  2700  	case REG_X0 + 0,
  2701  		REG_X0 + 1,
  2702  		REG_X0 + 2,
  2703  		REG_X0 + 3,
  2704  		REG_X0 + 4,
  2705  		REG_X0 + 5,
  2706  		REG_X0 + 6,
  2707  		REG_X0 + 7,
  2708  		REG_X0 + 8,
  2709  		REG_X0 + 9,
  2710  		REG_X0 + 10,
  2711  		REG_X0 + 11,
  2712  		REG_X0 + 12,
  2713  		REG_X0 + 13,
  2714  		REG_X0 + 14,
  2715  		REG_X0 + 15:
  2716  		return Yxr
  2717  
  2718  	case REG_Y0 + 0,
  2719  		REG_Y0 + 1,
  2720  		REG_Y0 + 2,
  2721  		REG_Y0 + 3,
  2722  		REG_Y0 + 4,
  2723  		REG_Y0 + 5,
  2724  		REG_Y0 + 6,
  2725  		REG_Y0 + 7,
  2726  		REG_Y0 + 8,
  2727  		REG_Y0 + 9,
  2728  		REG_Y0 + 10,
  2729  		REG_Y0 + 11,
  2730  		REG_Y0 + 12,
  2731  		REG_Y0 + 13,
  2732  		REG_Y0 + 14,
  2733  		REG_Y0 + 15:
  2734  		return Yyr
  2735  
  2736  	case REG_CS:
  2737  		return Ycs
  2738  	case REG_SS:
  2739  		return Yss
  2740  	case REG_DS:
  2741  		return Yds
  2742  	case REG_ES:
  2743  		return Yes
  2744  	case REG_FS:
  2745  		return Yfs
  2746  	case REG_GS:
  2747  		return Ygs
  2748  	case REG_TLS:
  2749  		return Ytls
  2750  
  2751  	case REG_GDTR:
  2752  		return Ygdtr
  2753  	case REG_IDTR:
  2754  		return Yidtr
  2755  	case REG_LDTR:
  2756  		return Yldtr
  2757  	case REG_MSW:
  2758  		return Ymsw
  2759  	case REG_TASK:
  2760  		return Ytask
  2761  
  2762  	case REG_CR + 0:
  2763  		return Ycr0
  2764  	case REG_CR + 1:
  2765  		return Ycr1
  2766  	case REG_CR + 2:
  2767  		return Ycr2
  2768  	case REG_CR + 3:
  2769  		return Ycr3
  2770  	case REG_CR + 4:
  2771  		return Ycr4
  2772  	case REG_CR + 5:
  2773  		return Ycr5
  2774  	case REG_CR + 6:
  2775  		return Ycr6
  2776  	case REG_CR + 7:
  2777  		return Ycr7
  2778  	case REG_CR + 8:
  2779  		return Ycr8
  2780  
  2781  	case REG_DR + 0:
  2782  		return Ydr0
  2783  	case REG_DR + 1:
  2784  		return Ydr1
  2785  	case REG_DR + 2:
  2786  		return Ydr2
  2787  	case REG_DR + 3:
  2788  		return Ydr3
  2789  	case REG_DR + 4:
  2790  		return Ydr4
  2791  	case REG_DR + 5:
  2792  		return Ydr5
  2793  	case REG_DR + 6:
  2794  		return Ydr6
  2795  	case REG_DR + 7:
  2796  		return Ydr7
  2797  
  2798  	case REG_TR + 0:
  2799  		return Ytr0
  2800  	case REG_TR + 1:
  2801  		return Ytr1
  2802  	case REG_TR + 2:
  2803  		return Ytr2
  2804  	case REG_TR + 3:
  2805  		return Ytr3
  2806  	case REG_TR + 4:
  2807  		return Ytr4
  2808  	case REG_TR + 5:
  2809  		return Ytr5
  2810  	case REG_TR + 6:
  2811  		return Ytr6
  2812  	case REG_TR + 7:
  2813  		return Ytr7
  2814  	}
  2815  
  2816  	return Yxxx
  2817  }
  2818  
  2819  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2820  // and hold assembly state.
  2821  type AsmBuf struct {
  2822  	buf     [100]byte
  2823  	off     int
  2824  	rexflag int
  2825  	vexflag int
  2826  	rep     int
  2827  	repn    int
  2828  	lock    bool
  2829  }
  2830  
  2831  // Put1 appends one byte to the end of the buffer.
  2832  func (a *AsmBuf) Put1(x byte) {
  2833  	a.buf[a.off] = x
  2834  	a.off++
  2835  }
  2836  
  2837  // Put2 appends two bytes to the end of the buffer.
  2838  func (a *AsmBuf) Put2(x, y byte) {
  2839  	a.buf[a.off+0] = x
  2840  	a.buf[a.off+1] = y
  2841  	a.off += 2
  2842  }
  2843  
  2844  // Put3 appends three bytes to the end of the buffer.
  2845  func (a *AsmBuf) Put3(x, y, z byte) {
  2846  	a.buf[a.off+0] = x
  2847  	a.buf[a.off+1] = y
  2848  	a.buf[a.off+2] = z
  2849  	a.off += 3
  2850  }
  2851  
  2852  // Put4 appends four bytes to the end of the buffer.
  2853  func (a *AsmBuf) Put4(x, y, z, w byte) {
  2854  	a.buf[a.off+0] = x
  2855  	a.buf[a.off+1] = y
  2856  	a.buf[a.off+2] = z
  2857  	a.buf[a.off+3] = w
  2858  	a.off += 4
  2859  }
  2860  
  2861  // PutInt16 writes v into the buffer using little-endian encoding.
  2862  func (a *AsmBuf) PutInt16(v int16) {
  2863  	a.buf[a.off+0] = byte(v)
  2864  	a.buf[a.off+1] = byte(v >> 8)
  2865  	a.off += 2
  2866  }
  2867  
  2868  // PutInt32 writes v into the buffer using little-endian encoding.
  2869  func (a *AsmBuf) PutInt32(v int32) {
  2870  	a.buf[a.off+0] = byte(v)
  2871  	a.buf[a.off+1] = byte(v >> 8)
  2872  	a.buf[a.off+2] = byte(v >> 16)
  2873  	a.buf[a.off+3] = byte(v >> 24)
  2874  	a.off += 4
  2875  }
  2876  
  2877  // PutInt64 writes v into the buffer using little-endian encoding.
  2878  func (a *AsmBuf) PutInt64(v int64) {
  2879  	a.buf[a.off+0] = byte(v)
  2880  	a.buf[a.off+1] = byte(v >> 8)
  2881  	a.buf[a.off+2] = byte(v >> 16)
  2882  	a.buf[a.off+3] = byte(v >> 24)
  2883  	a.buf[a.off+4] = byte(v >> 32)
  2884  	a.buf[a.off+5] = byte(v >> 40)
  2885  	a.buf[a.off+6] = byte(v >> 48)
  2886  	a.buf[a.off+7] = byte(v >> 56)
  2887  	a.off += 8
  2888  }
  2889  
  2890  // Put copies b into the buffer.
  2891  func (a *AsmBuf) Put(b []byte) {
  2892  	copy(a.buf[a.off:], b)
  2893  	a.off += len(b)
  2894  }
  2895  
  2896  // Insert inserts b at offset i.
  2897  func (a *AsmBuf) Insert(i int, b byte) {
  2898  	a.off++
  2899  	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
  2900  	a.buf[i] = b
  2901  }
  2902  
  2903  // Last returns the byte at the end of the buffer.
  2904  func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
  2905  
  2906  // Len returns the length of the buffer.
  2907  func (a *AsmBuf) Len() int { return a.off }
  2908  
  2909  // Bytes returns the contents of the buffer.
  2910  func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
  2911  
  2912  // Reset empties the buffer.
  2913  func (a *AsmBuf) Reset() { a.off = 0 }
  2914  
  2915  // At returns the byte at offset i.
  2916  func (a *AsmBuf) At(i int) byte { return a.buf[i] }
  2917  
  2918  // asmidx emits SIB byte.
  2919  func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2920  	var i int
  2921  
  2922  	// X/Y index register is used in VSIB.
  2923  	switch index {
  2924  	default:
  2925  		goto bad
  2926  
  2927  	case REG_NONE:
  2928  		i = 4 << 3
  2929  		goto bas
  2930  
  2931  	case REG_R8,
  2932  		REG_R9,
  2933  		REG_R10,
  2934  		REG_R11,
  2935  		REG_R12,
  2936  		REG_R13,
  2937  		REG_R14,
  2938  		REG_R15,
  2939  		REG_X8,
  2940  		REG_X9,
  2941  		REG_X10,
  2942  		REG_X11,
  2943  		REG_X12,
  2944  		REG_X13,
  2945  		REG_X14,
  2946  		REG_X15,
  2947  		REG_Y8,
  2948  		REG_Y9,
  2949  		REG_Y10,
  2950  		REG_Y11,
  2951  		REG_Y12,
  2952  		REG_Y13,
  2953  		REG_Y14,
  2954  		REG_Y15:
  2955  		if ctxt.Arch.Family == sys.I386 {
  2956  			goto bad
  2957  		}
  2958  		fallthrough
  2959  
  2960  	case REG_AX,
  2961  		REG_CX,
  2962  		REG_DX,
  2963  		REG_BX,
  2964  		REG_BP,
  2965  		REG_SI,
  2966  		REG_DI,
  2967  		REG_X0,
  2968  		REG_X1,
  2969  		REG_X2,
  2970  		REG_X3,
  2971  		REG_X4,
  2972  		REG_X5,
  2973  		REG_X6,
  2974  		REG_X7,
  2975  		REG_Y0,
  2976  		REG_Y1,
  2977  		REG_Y2,
  2978  		REG_Y3,
  2979  		REG_Y4,
  2980  		REG_Y5,
  2981  		REG_Y6,
  2982  		REG_Y7:
  2983  		i = reg[index] << 3
  2984  	}
  2985  
  2986  	switch scale {
  2987  	default:
  2988  		goto bad
  2989  
  2990  	case 1:
  2991  		break
  2992  
  2993  	case 2:
  2994  		i |= 1 << 6
  2995  
  2996  	case 4:
  2997  		i |= 2 << 6
  2998  
  2999  	case 8:
  3000  		i |= 3 << 6
  3001  	}
  3002  
  3003  bas:
  3004  	switch base {
  3005  	default:
  3006  		goto bad
  3007  
  3008  	case REG_NONE: /* must be mod=00 */
  3009  		i |= 5
  3010  
  3011  	case REG_R8,
  3012  		REG_R9,
  3013  		REG_R10,
  3014  		REG_R11,
  3015  		REG_R12,
  3016  		REG_R13,
  3017  		REG_R14,
  3018  		REG_R15:
  3019  		if ctxt.Arch.Family == sys.I386 {
  3020  			goto bad
  3021  		}
  3022  		fallthrough
  3023  
  3024  	case REG_AX,
  3025  		REG_CX,
  3026  		REG_DX,
  3027  		REG_BX,
  3028  		REG_SP,
  3029  		REG_BP,
  3030  		REG_SI,
  3031  		REG_DI:
  3032  		i |= reg[base]
  3033  	}
  3034  
  3035  	asmbuf.Put1(byte(i))
  3036  	return
  3037  
  3038  bad:
  3039  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3040  	asmbuf.Put1(0)
  3041  	return
  3042  }
  3043  
  3044  func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3045  	var rel obj.Reloc
  3046  
  3047  	v := vaddr(ctxt, p, a, &rel)
  3048  	if rel.Siz != 0 {
  3049  		if rel.Siz != 4 {
  3050  			ctxt.Diag("bad reloc")
  3051  		}
  3052  		r := obj.Addrel(cursym)
  3053  		*r = rel
  3054  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3055  	}
  3056  
  3057  	asmbuf.PutInt32(int32(v))
  3058  }
  3059  
  3060  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3061  	if r != nil {
  3062  		*r = obj.Reloc{}
  3063  	}
  3064  
  3065  	switch a.Name {
  3066  	case obj.NAME_STATIC,
  3067  		obj.NAME_GOTREF,
  3068  		obj.NAME_EXTERN:
  3069  		s := a.Sym
  3070  		if r == nil {
  3071  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3072  			log.Fatalf("reloc")
  3073  		}
  3074  
  3075  		if a.Name == obj.NAME_GOTREF {
  3076  			r.Siz = 4
  3077  			r.Type = objabi.R_GOTPCREL
  3078  		} else if useAbs(ctxt, s) {
  3079  			r.Siz = 4
  3080  			r.Type = objabi.R_ADDR
  3081  		} else {
  3082  			r.Siz = 4
  3083  			r.Type = objabi.R_PCREL
  3084  		}
  3085  
  3086  		r.Off = -1 // caller must fill in
  3087  		r.Sym = s
  3088  		r.Add = a.Offset
  3089  
  3090  		return 0
  3091  	}
  3092  
  3093  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3094  		if r == nil {
  3095  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3096  			log.Fatalf("reloc")
  3097  		}
  3098  
  3099  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3100  			r.Type = objabi.R_TLS_LE
  3101  			r.Siz = 4
  3102  			r.Off = -1 // caller must fill in
  3103  			r.Add = a.Offset
  3104  		}
  3105  		return 0
  3106  	}
  3107  
  3108  	return a.Offset
  3109  }
  3110  
  3111  func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3112  	var base int
  3113  	var rel obj.Reloc
  3114  
  3115  	rex &= 0x40 | Rxr
  3116  	switch {
  3117  	case int64(int32(a.Offset)) == a.Offset:
  3118  		// Offset fits in sign-extended 32 bits.
  3119  	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
  3120  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  3121  		// This is allowed for assembly that wants to use 32-bit hex
  3122  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3123  	default:
  3124  		ctxt.Diag("offset too large in %s", p)
  3125  	}
  3126  	v := int32(a.Offset)
  3127  	rel.Siz = 0
  3128  
  3129  	switch a.Type {
  3130  	case obj.TYPE_ADDR:
  3131  		if a.Name == obj.NAME_NONE {
  3132  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3133  		}
  3134  		if a.Index == REG_TLS {
  3135  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3136  		}
  3137  		goto bad
  3138  
  3139  	case obj.TYPE_REG:
  3140  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  3141  			goto bad
  3142  		}
  3143  		if v != 0 {
  3144  			goto bad
  3145  		}
  3146  		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3147  		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3148  		return
  3149  	}
  3150  
  3151  	if a.Type != obj.TYPE_MEM {
  3152  		goto bad
  3153  	}
  3154  
  3155  	if a.Index != REG_NONE && a.Index != REG_TLS {
  3156  		base := int(a.Reg)
  3157  		switch a.Name {
  3158  		case obj.NAME_EXTERN,
  3159  			obj.NAME_GOTREF,
  3160  			obj.NAME_STATIC:
  3161  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3162  				goto bad
  3163  			}
  3164  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3165  				// The base register has already been set. It holds the PC
  3166  				// of this instruction returned by a PC-reading thunk.
  3167  				// See obj6.go:rewriteToPcrel.
  3168  			} else {
  3169  				base = REG_NONE
  3170  			}
  3171  			v = int32(vaddr(ctxt, p, a, &rel))
  3172  
  3173  		case obj.NAME_AUTO,
  3174  			obj.NAME_PARAM:
  3175  			base = REG_SP
  3176  		}
  3177  
  3178  		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3179  		if base == REG_NONE {
  3180  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  3181  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3182  			goto putrelv
  3183  		}
  3184  
  3185  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3186  			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  3187  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3188  			return
  3189  		}
  3190  
  3191  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3192  			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  3193  			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3194  			asmbuf.Put1(byte(v))
  3195  			return
  3196  		}
  3197  
  3198  		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  3199  		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3200  		goto putrelv
  3201  	}
  3202  
  3203  	base = int(a.Reg)
  3204  	switch a.Name {
  3205  	case obj.NAME_STATIC,
  3206  		obj.NAME_GOTREF,
  3207  		obj.NAME_EXTERN:
  3208  		if a.Sym == nil {
  3209  			ctxt.Diag("bad addr: %v", p)
  3210  		}
  3211  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3212  			// The base register has already been set. It holds the PC
  3213  			// of this instruction returned by a PC-reading thunk.
  3214  			// See obj6.go:rewriteToPcrel.
  3215  		} else {
  3216  			base = REG_NONE
  3217  		}
  3218  		v = int32(vaddr(ctxt, p, a, &rel))
  3219  
  3220  	case obj.NAME_AUTO,
  3221  		obj.NAME_PARAM:
  3222  		base = REG_SP
  3223  	}
  3224  
  3225  	if base == REG_TLS {
  3226  		v = int32(vaddr(ctxt, p, a, &rel))
  3227  	}
  3228  
  3229  	asmbuf.rexflag |= regrex[base]&Rxb | rex
  3230  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3231  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3232  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3233  				ctxt.Diag("%v has offset against gotref", p)
  3234  			}
  3235  			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  3236  			goto putrelv
  3237  		}
  3238  
  3239  		// temporary
  3240  		asmbuf.Put2(
  3241  			byte(0<<6|4<<0|r<<3), // sib present
  3242  			0<<6|4<<3|5<<0,       // DS:d32
  3243  		)
  3244  		goto putrelv
  3245  	}
  3246  
  3247  	if base == REG_SP || base == REG_R12 {
  3248  		if v == 0 {
  3249  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3250  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3251  			return
  3252  		}
  3253  
  3254  		if v >= -128 && v < 128 {
  3255  			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3256  			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3257  			asmbuf.Put1(byte(v))
  3258  			return
  3259  		}
  3260  
  3261  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3262  		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3263  		goto putrelv
  3264  	}
  3265  
  3266  	if REG_AX <= base && base <= REG_R15 {
  3267  		if a.Index == REG_TLS && !ctxt.Flag_shared {
  3268  			rel = obj.Reloc{}
  3269  			rel.Type = objabi.R_TLS_LE
  3270  			rel.Siz = 4
  3271  			rel.Sym = nil
  3272  			rel.Add = int64(v)
  3273  			v = 0
  3274  		}
  3275  
  3276  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3277  			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3278  			return
  3279  		}
  3280  
  3281  		if v >= -128 && v < 128 && rel.Siz == 0 {
  3282  			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  3283  			return
  3284  		}
  3285  
  3286  		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3287  		goto putrelv
  3288  	}
  3289  
  3290  	goto bad
  3291  
  3292  putrelv:
  3293  	if rel.Siz != 0 {
  3294  		if rel.Siz != 4 {
  3295  			ctxt.Diag("bad rel")
  3296  			goto bad
  3297  		}
  3298  
  3299  		r := obj.Addrel(cursym)
  3300  		*r = rel
  3301  		r.Off = int32(p.Pc + int64(asmbuf.Len()))
  3302  	}
  3303  
  3304  	asmbuf.PutInt32(v)
  3305  	return
  3306  
  3307  bad:
  3308  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3309  	return
  3310  }
  3311  
  3312  func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3313  	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3314  }
  3315  
  3316  func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3317  	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3318  }
  3319  
  3320  func bytereg(a *obj.Addr, t *uint8) {
  3321  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3322  		a.Reg += REG_AL - REG_AX
  3323  		*t = 0
  3324  	}
  3325  }
  3326  
  3327  func unbytereg(a *obj.Addr, t *uint8) {
  3328  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3329  		a.Reg += REG_AX - REG_AL
  3330  		*t = 0
  3331  	}
  3332  }
  3333  
  3334  const (
  3335  	E = 0xff
  3336  )
  3337  
  3338  var ymovtab = []Movtab{
  3339  	/* push */
  3340  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3341  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3342  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3343  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3344  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3345  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3346  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3347  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3348  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3349  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3350  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3351  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3352  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3353  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3354  
  3355  	/* pop */
  3356  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3357  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3358  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3359  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3360  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3361  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3362  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3363  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3364  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3365  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3366  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3367  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3368  
  3369  	/* mov seg */
  3370  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3371  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3372  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3373  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3374  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3375  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3376  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3377  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3378  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3379  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3380  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3381  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3382  
  3383  	/* mov cr */
  3384  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3385  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3386  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3387  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3388  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3389  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3390  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3391  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3392  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3393  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3394  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3395  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3396  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3397  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3398  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3399  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3400  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3401  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3402  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3403  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3404  
  3405  	/* mov dr */
  3406  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3407  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3408  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3409  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3410  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3411  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3412  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3413  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3414  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3415  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3416  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3417  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3418  
  3419  	/* mov tr */
  3420  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3421  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3422  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3423  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3424  
  3425  	/* lgdt, sgdt, lidt, sidt */
  3426  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3427  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3428  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3429  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3430  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3431  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3432  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3433  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3434  
  3435  	/* lldt, sldt */
  3436  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3437  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3438  
  3439  	/* lmsw, smsw */
  3440  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3441  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3442  
  3443  	/* ltr, str */
  3444  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3445  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3446  
  3447  	/* load full pointer - unsupported
  3448  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3449  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3450  	*/
  3451  
  3452  	/* double shift */
  3453  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3454  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3455  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3456  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3457  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3458  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3459  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3460  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3461  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3462  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3463  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3464  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3465  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3466  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3467  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3468  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3469  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3470  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3471  
  3472  	/* load TLS base */
  3473  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3474  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3475  	{0, 0, 0, 0, 0, [4]uint8{}},
  3476  }
  3477  
  3478  func isax(a *obj.Addr) bool {
  3479  	switch a.Reg {
  3480  	case REG_AX, REG_AL, REG_AH:
  3481  		return true
  3482  	}
  3483  
  3484  	if a.Index == REG_AX {
  3485  		return true
  3486  	}
  3487  	return false
  3488  }
  3489  
  3490  func subreg(p *obj.Prog, from int, to int) {
  3491  	if false { /* debug['Q'] */
  3492  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3493  	}
  3494  
  3495  	if int(p.From.Reg) == from {
  3496  		p.From.Reg = int16(to)
  3497  		p.Ft = 0
  3498  	}
  3499  
  3500  	if int(p.To.Reg) == from {
  3501  		p.To.Reg = int16(to)
  3502  		p.Tt = 0
  3503  	}
  3504  
  3505  	if int(p.From.Index) == from {
  3506  		p.From.Index = int16(to)
  3507  		p.Ft = 0
  3508  	}
  3509  
  3510  	if int(p.To.Index) == from {
  3511  		p.To.Index = int16(to)
  3512  		p.Tt = 0
  3513  	}
  3514  
  3515  	if false { /* debug['Q'] */
  3516  		fmt.Printf("%v\n", p)
  3517  	}
  3518  }
  3519  
  3520  func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3521  	switch op {
  3522  	case Pm, Pe, Pf2, Pf3:
  3523  		if osize != 1 {
  3524  			if op != Pm {
  3525  				asmbuf.Put1(byte(op))
  3526  			}
  3527  			asmbuf.Put1(Pm)
  3528  			z++
  3529  			op = int(o.op[z])
  3530  			break
  3531  		}
  3532  		fallthrough
  3533  
  3534  	default:
  3535  		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
  3536  			asmbuf.Put1(Pm)
  3537  		}
  3538  	}
  3539  
  3540  	asmbuf.Put1(byte(op))
  3541  	return z
  3542  }
  3543  
  3544  var bpduff1 = []byte{
  3545  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3546  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3547  }
  3548  
  3549  var bpduff2 = []byte{
  3550  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3551  }
  3552  
  3553  // Emit VEX prefix and opcode byte.
  3554  // The three addresses are the r/m, vvvv, and reg fields.
  3555  // The reg and rm arguments appear in the same order as the
  3556  // arguments to asmand, which typically follows the call to asmvex.
  3557  // The final two arguments are the VEX prefix (see encoding above)
  3558  // and the opcode byte.
  3559  // For details about vex prefix see:
  3560  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3561  func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3562  	asmbuf.vexflag = 1
  3563  	rexR := 0
  3564  	if r != nil {
  3565  		rexR = regrex[r.Reg] & Rxr
  3566  	}
  3567  	rexB := 0
  3568  	rexX := 0
  3569  	if rm != nil {
  3570  		rexB = regrex[rm.Reg] & Rxb
  3571  		rexX = regrex[rm.Index] & Rxx
  3572  	}
  3573  	vexM := (vex >> 3) & 0xF
  3574  	vexWLP := vex & 0x87
  3575  	vexV := byte(0)
  3576  	if v != nil {
  3577  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3578  	}
  3579  	vexV ^= 0xF
  3580  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3581  		// Can use 2-byte encoding.
  3582  		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3583  	} else {
  3584  		// Must use 3-byte encoding.
  3585  		asmbuf.Put3(0xc4,
  3586  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3587  			vexV<<3|vexWLP,
  3588  		)
  3589  	}
  3590  	asmbuf.Put1(opcode)
  3591  }
  3592  
  3593  // regIndex returns register index that fits in 4 bits.
  3594  //
  3595  // Examples:
  3596  //   REG_X15 => 15
  3597  //   REG_R9  => 9
  3598  //   REG_AX  => 0
  3599  //
  3600  func regIndex(r int16) int {
  3601  	lower3bits := reg[r]
  3602  	high4bit := regrex[r] & Rxr << 1
  3603  	return lower3bits | high4bit
  3604  }
  3605  
  3606  // avx2gatherValid returns true if p satisfies AVX2 gather constraints.
  3607  // Reports errors via ctxt.
  3608  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3609  	// If any pair of the index, mask, or destination registers
  3610  	// are the same, this instruction results a #UD fault.
  3611  	index := regIndex(p.GetFrom3().Index)
  3612  	mask := regIndex(p.From.Reg)
  3613  	dest := regIndex(p.To.Reg)
  3614  	if dest == mask || dest == index || mask == index {
  3615  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  3616  		return false
  3617  	}
  3618  
  3619  	return true
  3620  }
  3621  
  3622  func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3623  	o := opindex[p.As&obj.AMask]
  3624  
  3625  	if o == nil {
  3626  		ctxt.Diag("asmins: missing op %v", p)
  3627  		return
  3628  	}
  3629  
  3630  	pre := prefixof(ctxt, p, &p.From)
  3631  	if pre != 0 {
  3632  		asmbuf.Put1(byte(pre))
  3633  	}
  3634  	pre = prefixof(ctxt, p, &p.To)
  3635  	if pre != 0 {
  3636  		asmbuf.Put1(byte(pre))
  3637  	}
  3638  
  3639  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3640  	// which encodes as SHRQ $32(DX*0), AX.
  3641  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3642  	// Change encoding generated by assemblers and compilers and remove.
  3643  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3644  		p.SetFrom3(obj.Addr{
  3645  			Type: obj.TYPE_REG,
  3646  			Reg:  p.From.Index,
  3647  		})
  3648  		p.From.Index = 0
  3649  	}
  3650  
  3651  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3652  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3653  	switch p.As {
  3654  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3655  		if p.From3Type() == obj.TYPE_NONE {
  3656  			p.SetFrom3(p.From)
  3657  			p.From = obj.Addr{}
  3658  			p.From.Type = obj.TYPE_CONST
  3659  			p.From.Offset = p.To.Offset
  3660  			p.To.Offset = 0
  3661  		}
  3662  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3663  		if p.From3Type() == obj.TYPE_NONE {
  3664  			p.SetFrom3(p.To)
  3665  			p.To = obj.Addr{}
  3666  			p.To.Type = obj.TYPE_CONST
  3667  			p.To.Offset = p.GetFrom3().Offset
  3668  			p.GetFrom3().Offset = 0
  3669  		}
  3670  
  3671  	case AVGATHERDPD,
  3672  		AVGATHERQPD,
  3673  		AVGATHERDPS,
  3674  		AVGATHERQPS,
  3675  		AVPGATHERDD,
  3676  		AVPGATHERQD,
  3677  		AVPGATHERDQ,
  3678  		AVPGATHERQQ:
  3679  		if !avx2gatherValid(ctxt, p) {
  3680  			return
  3681  		}
  3682  	}
  3683  
  3684  	if p.Ft == 0 {
  3685  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3686  	}
  3687  	if p.Tt == 0 {
  3688  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3689  	}
  3690  
  3691  	ft := int(p.Ft) * Ymax
  3692  	var f3t int
  3693  	tt := int(p.Tt) * Ymax
  3694  
  3695  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3696  	z := 0
  3697  	var a *obj.Addr
  3698  	var l int
  3699  	var op int
  3700  	var q *obj.Prog
  3701  	var r *obj.Reloc
  3702  	var rel obj.Reloc
  3703  	var v int64
  3704  
  3705  	args := make([]int, 0, 6)
  3706  	if ft != Ynone*Ymax {
  3707  		args = append(args, ft)
  3708  	}
  3709  	for i := range p.RestArgs {
  3710  		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  3711  	}
  3712  	if tt != Ynone*Ymax {
  3713  		args = append(args, tt)
  3714  	}
  3715  
  3716  	for _, yt := range o.ytab {
  3717  		if !yt.match(args) {
  3718  			z += int(yt.zoffset) + xo
  3719  		} else {
  3720  			switch o.prefix {
  3721  			case Px1: /* first option valid only in 32-bit mode */
  3722  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3723  					z += int(yt.zoffset) + xo
  3724  					continue
  3725  				}
  3726  			case Pq: /* 16 bit escape and opcode escape */
  3727  				asmbuf.Put2(Pe, Pm)
  3728  
  3729  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3730  				asmbuf.rexflag |= Pw
  3731  				asmbuf.Put2(Pe, Pm)
  3732  
  3733  			case Pq4: /*  66 0F 38 */
  3734  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3735  
  3736  			case Pq4w: /*  66 0F 38 + REX.W */
  3737  				asmbuf.rexflag |= Pw
  3738  				asmbuf.Put3(0x66, 0x0F, 0x38)
  3739  
  3740  			case Pq5: /*  F3 0F 38 */
  3741  				asmbuf.Put3(0xF3, 0x0F, 0x38)
  3742  
  3743  			case Pq5w: /*  F3 0F 38 + REX.W */
  3744  				asmbuf.rexflag |= Pw
  3745  				asmbuf.Put3(0xF3, 0x0F, 0x38)
  3746  
  3747  			case Pf2, /* xmm opcode escape */
  3748  				Pf3:
  3749  				asmbuf.Put2(o.prefix, Pm)
  3750  
  3751  			case Pef3:
  3752  				asmbuf.Put3(Pe, Pf3, Pm)
  3753  
  3754  			case Pfw: /* xmm opcode escape + REX.W */
  3755  				asmbuf.rexflag |= Pw
  3756  				asmbuf.Put2(Pf3, Pm)
  3757  
  3758  			case Pm: /* opcode escape */
  3759  				asmbuf.Put1(Pm)
  3760  
  3761  			case Pe: /* 16 bit escape */
  3762  				asmbuf.Put1(Pe)
  3763  
  3764  			case Pw: /* 64-bit escape */
  3765  				if ctxt.Arch.Family != sys.AMD64 {
  3766  					ctxt.Diag("asmins: illegal 64: %v", p)
  3767  				}
  3768  				asmbuf.rexflag |= Pw
  3769  
  3770  			case Pw8: /* 64-bit escape if z >= 8 */
  3771  				if z >= 8 {
  3772  					if ctxt.Arch.Family != sys.AMD64 {
  3773  						ctxt.Diag("asmins: illegal 64: %v", p)
  3774  					}
  3775  					asmbuf.rexflag |= Pw
  3776  				}
  3777  
  3778  			case Pb: /* botch */
  3779  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3780  					goto bad
  3781  				}
  3782  				// NOTE(rsc): This is probably safe to do always,
  3783  				// but when enabled it chooses different encodings
  3784  				// than the old cmd/internal/obj/i386 code did,
  3785  				// which breaks our "same bits out" checks.
  3786  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3787  				// in the original obj/i386, and it would encode
  3788  				// (using a valid, shorter form) as 3c 00 if we enabled
  3789  				// the call to bytereg here.
  3790  				if ctxt.Arch.Family == sys.AMD64 {
  3791  					bytereg(&p.From, &p.Ft)
  3792  					bytereg(&p.To, &p.Tt)
  3793  				}
  3794  
  3795  			case P32: /* 32 bit but illegal if 64-bit mode */
  3796  				if ctxt.Arch.Family == sys.AMD64 {
  3797  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3798  				}
  3799  
  3800  			case Py: /* 64-bit only, no prefix */
  3801  				if ctxt.Arch.Family != sys.AMD64 {
  3802  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3803  				}
  3804  
  3805  			case Py1: /* 64-bit only if z < 1, no prefix */
  3806  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3807  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3808  				}
  3809  
  3810  			case Py3: /* 64-bit only if z < 3, no prefix */
  3811  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3812  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3813  				}
  3814  			}
  3815  
  3816  			if z >= len(o.op) {
  3817  				log.Fatalf("asmins bad table %v", p)
  3818  			}
  3819  			op = int(o.op[z])
  3820  			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
  3821  			if op == 0x0f && o.prefix != Pvex {
  3822  				asmbuf.Put1(byte(op))
  3823  				z++
  3824  				op = int(o.op[z])
  3825  			}
  3826  
  3827  			switch yt.zcase {
  3828  			default:
  3829  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3830  				return
  3831  
  3832  			case Zpseudo:
  3833  				break
  3834  
  3835  			case Zlit:
  3836  				for ; ; z++ {
  3837  					op = int(o.op[z])
  3838  					if op == 0 {
  3839  						break
  3840  					}
  3841  					asmbuf.Put1(byte(op))
  3842  				}
  3843  
  3844  			case Zlitm_r:
  3845  				for ; ; z++ {
  3846  					op = int(o.op[z])
  3847  					if op == 0 {
  3848  						break
  3849  					}
  3850  					asmbuf.Put1(byte(op))
  3851  				}
  3852  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3853  
  3854  			case Zmb_r:
  3855  				bytereg(&p.From, &p.Ft)
  3856  				fallthrough
  3857  
  3858  			case Zm_r:
  3859  				asmbuf.Put1(byte(op))
  3860  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3861  
  3862  			case Zm2_r:
  3863  				asmbuf.Put2(byte(op), o.op[z+1])
  3864  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3865  
  3866  			case Zm_r_xm:
  3867  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3868  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3869  
  3870  			case Zm_r_xm_nr:
  3871  				asmbuf.rexflag = 0
  3872  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3873  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3874  
  3875  			case Zm_r_i_xm:
  3876  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3877  				asmbuf.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  3878  				asmbuf.Put1(byte(p.To.Offset))
  3879  
  3880  			case Zibm_r, Zibr_m:
  3881  				for {
  3882  					tmp1 := z
  3883  					z++
  3884  					op = int(o.op[tmp1])
  3885  					if op == 0 {
  3886  						break
  3887  					}
  3888  					asmbuf.Put1(byte(op))
  3889  				}
  3890  				if yt.zcase == Zibr_m {
  3891  					asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  3892  				} else {
  3893  					asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3894  				}
  3895  				asmbuf.Put1(byte(p.From.Offset))
  3896  
  3897  			case Zaut_r:
  3898  				asmbuf.Put1(0x8d) // leal
  3899  				if p.From.Type != obj.TYPE_ADDR {
  3900  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3901  				}
  3902  				p.From.Type = obj.TYPE_MEM
  3903  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3904  				p.From.Type = obj.TYPE_ADDR
  3905  
  3906  			case Zm_o:
  3907  				asmbuf.Put1(byte(op))
  3908  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3909  
  3910  			case Zr_m:
  3911  				asmbuf.Put1(byte(op))
  3912  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3913  
  3914  			case Zvex:
  3915  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3916  
  3917  			case Zvex_rm_v_r:
  3918  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3919  				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  3920  
  3921  			case Zvex_rm_v_ro:
  3922  				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  3923  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  3924  
  3925  			case Zvex_i_r_v:
  3926  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  3927  				regnum := byte(0x7)
  3928  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  3929  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  3930  				} else {
  3931  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  3932  				}
  3933  				asmbuf.Put1(byte(o.op[z+2]) | regnum)
  3934  				asmbuf.Put1(byte(p.From.Offset))
  3935  
  3936  			case Zvex_i_rm_v_r:
  3937  				imm, from, from3, to := unpackOps4(p)
  3938  				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  3939  				asmbuf.asmand(ctxt, cursym, p, from, to)
  3940  				asmbuf.Put1(byte(imm.Offset))
  3941  
  3942  			case Zvex_i_rm_r:
  3943  				asmbuf.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  3944  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3945  				asmbuf.Put1(byte(p.From.Offset))
  3946  
  3947  			case Zvex_v_rm_r:
  3948  				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  3949  				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3950  
  3951  			case Zvex_r_v_rm:
  3952  				asmbuf.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  3953  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3954  
  3955  			case Zvex_rm_r_vo:
  3956  				asmbuf.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  3957  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  3958  
  3959  			case Zvex_i_r_rm:
  3960  				asmbuf.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  3961  				asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  3962  				asmbuf.Put1(byte(p.From.Offset))
  3963  
  3964  			case Zvex_hr_rm_v_r:
  3965  				hr, from, from3, to := unpackOps4(p)
  3966  				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  3967  				asmbuf.asmand(ctxt, cursym, p, from, to)
  3968  				asmbuf.Put1(byte(regIndex(hr.Reg) << 4))
  3969  
  3970  			case Zr_m_xm:
  3971  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3972  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3973  
  3974  			case Zr_m_xm_nr:
  3975  				asmbuf.rexflag = 0
  3976  				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3977  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
  3978  
  3979  			case Zo_m:
  3980  				asmbuf.Put1(byte(op))
  3981  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  3982  
  3983  			case Zcallindreg:
  3984  				r = obj.Addrel(cursym)
  3985  				r.Off = int32(p.Pc)
  3986  				r.Type = objabi.R_CALLIND
  3987  				r.Siz = 0
  3988  				fallthrough
  3989  
  3990  			case Zo_m64:
  3991  				asmbuf.Put1(byte(op))
  3992  				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  3993  
  3994  			case Zm_ibo:
  3995  				asmbuf.Put1(byte(op))
  3996  				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  3997  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3998  
  3999  			case Zibo_m:
  4000  				asmbuf.Put1(byte(op))
  4001  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4002  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4003  
  4004  			case Zibo_m_xm:
  4005  				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4006  				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4007  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4008  
  4009  			case Z_ib, Zib_:
  4010  				if yt.zcase == Zib_ {
  4011  					a = &p.From
  4012  				} else {
  4013  					a = &p.To
  4014  				}
  4015  				asmbuf.Put1(byte(op))
  4016  				if p.As == AXABORT {
  4017  					asmbuf.Put1(o.op[z+1])
  4018  				}
  4019  				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  4020  
  4021  			case Zib_rp:
  4022  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4023  				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4024  
  4025  			case Zil_rp:
  4026  				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  4027  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  4028  				if o.prefix == Pe {
  4029  					v = vaddr(ctxt, p, &p.From, nil)
  4030  					asmbuf.PutInt16(int16(v))
  4031  				} else {
  4032  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  4033  				}
  4034  
  4035  			case Zo_iw:
  4036  				asmbuf.Put1(byte(op))
  4037  				if p.From.Type != obj.TYPE_NONE {
  4038  					v = vaddr(ctxt, p, &p.From, nil)
  4039  					asmbuf.PutInt16(int16(v))
  4040  				}
  4041  
  4042  			case Ziq_rp:
  4043  				v = vaddr(ctxt, p, &p.From, &rel)
  4044  				l = int(v >> 32)
  4045  				if l == 0 && rel.Siz != 8 {
  4046  					//p->mark |= 0100;
  4047  					//print("zero: %llux %v\n", v, p);
  4048  					asmbuf.rexflag &^= (0x40 | Rxw)
  4049  
  4050  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  4051  					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  4052  					if rel.Type != 0 {
  4053  						r = obj.Addrel(cursym)
  4054  						*r = rel
  4055  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4056  					}
  4057  
  4058  					asmbuf.PutInt32(int32(v))
  4059  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  4060  
  4061  					//p->mark |= 0100;
  4062  					//print("sign: %llux %v\n", v, p);
  4063  					asmbuf.Put1(0xc7)
  4064  					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
  4065  
  4066  					asmbuf.PutInt32(int32(v)) // need all 8
  4067  				} else {
  4068  					//print("all: %llux %v\n", v, p);
  4069  					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
  4070  					asmbuf.Put1(byte(op + reg[p.To.Reg]))
  4071  					if rel.Type != 0 {
  4072  						r = obj.Addrel(cursym)
  4073  						*r = rel
  4074  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4075  					}
  4076  
  4077  					asmbuf.PutInt64(v)
  4078  				}
  4079  
  4080  			case Zib_rr:
  4081  				asmbuf.Put1(byte(op))
  4082  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  4083  				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4084  
  4085  			case Z_il, Zil_:
  4086  				if yt.zcase == Zil_ {
  4087  					a = &p.From
  4088  				} else {
  4089  					a = &p.To
  4090  				}
  4091  				asmbuf.Put1(byte(op))
  4092  				if o.prefix == Pe {
  4093  					v = vaddr(ctxt, p, a, nil)
  4094  					asmbuf.PutInt16(int16(v))
  4095  				} else {
  4096  					asmbuf.relput4(ctxt, cursym, p, a)
  4097  				}
  4098  
  4099  			case Zm_ilo, Zilo_m:
  4100  				asmbuf.Put1(byte(op))
  4101  				if yt.zcase == Zilo_m {
  4102  					a = &p.From
  4103  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4104  				} else {
  4105  					a = &p.To
  4106  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4107  				}
  4108  
  4109  				if o.prefix == Pe {
  4110  					v = vaddr(ctxt, p, a, nil)
  4111  					asmbuf.PutInt16(int16(v))
  4112  				} else {
  4113  					asmbuf.relput4(ctxt, cursym, p, a)
  4114  				}
  4115  
  4116  			case Zil_rr:
  4117  				asmbuf.Put1(byte(op))
  4118  				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
  4119  				if o.prefix == Pe {
  4120  					v = vaddr(ctxt, p, &p.From, nil)
  4121  					asmbuf.PutInt16(int16(v))
  4122  				} else {
  4123  					asmbuf.relput4(ctxt, cursym, p, &p.From)
  4124  				}
  4125  
  4126  			case Z_rp:
  4127  				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4128  				asmbuf.Put1(byte(op + reg[p.To.Reg]))
  4129  
  4130  			case Zrp_:
  4131  				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4132  				asmbuf.Put1(byte(op + reg[p.From.Reg]))
  4133  
  4134  			case Zcallcon, Zjmpcon:
  4135  				if yt.zcase == Zcallcon {
  4136  					asmbuf.Put1(byte(op))
  4137  				} else {
  4138  					asmbuf.Put1(o.op[z+1])
  4139  				}
  4140  				r = obj.Addrel(cursym)
  4141  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4142  				r.Type = objabi.R_PCREL
  4143  				r.Siz = 4
  4144  				r.Add = p.To.Offset
  4145  				asmbuf.PutInt32(0)
  4146  
  4147  			case Zcallind:
  4148  				asmbuf.Put2(byte(op), o.op[z+1])
  4149  				r = obj.Addrel(cursym)
  4150  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4151  				if ctxt.Arch.Family == sys.AMD64 {
  4152  					r.Type = objabi.R_PCREL
  4153  				} else {
  4154  					r.Type = objabi.R_ADDR
  4155  				}
  4156  				r.Siz = 4
  4157  				r.Add = p.To.Offset
  4158  				r.Sym = p.To.Sym
  4159  				asmbuf.PutInt32(0)
  4160  
  4161  			case Zcall, Zcallduff:
  4162  				if p.To.Sym == nil {
  4163  					ctxt.Diag("call without target")
  4164  					ctxt.DiagFlush()
  4165  					log.Fatalf("bad code")
  4166  				}
  4167  
  4168  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4169  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4170  				}
  4171  
  4172  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4173  					// Maintain BP around call, since duffcopy/duffzero can't do it
  4174  					// (the call jumps into the middle of the function).
  4175  					// This makes it possible to see call sites for duffcopy/duffzero in
  4176  					// BP-based profiling tools like Linux perf (which is the
  4177  					// whole point of obj.Framepointer_enabled).
  4178  					// MOVQ BP, -16(SP)
  4179  					// LEAQ -16(SP), BP
  4180  					asmbuf.Put(bpduff1)
  4181  				}
  4182  				asmbuf.Put1(byte(op))
  4183  				r = obj.Addrel(cursym)
  4184  				r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4185  				r.Sym = p.To.Sym
  4186  				r.Add = p.To.Offset
  4187  				r.Type = objabi.R_CALL
  4188  				r.Siz = 4
  4189  				asmbuf.PutInt32(0)
  4190  
  4191  				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4192  					// Pop BP pushed above.
  4193  					// MOVQ 0(BP), BP
  4194  					asmbuf.Put(bpduff2)
  4195  				}
  4196  
  4197  			// TODO: jump across functions needs reloc
  4198  			case Zbr, Zjmp, Zloop:
  4199  				if p.As == AXBEGIN {
  4200  					asmbuf.Put1(byte(op))
  4201  				}
  4202  				if p.To.Sym != nil {
  4203  					if yt.zcase != Zjmp {
  4204  						ctxt.Diag("branch to ATEXT")
  4205  						ctxt.DiagFlush()
  4206  						log.Fatalf("bad code")
  4207  					}
  4208  
  4209  					asmbuf.Put1(o.op[z+1])
  4210  					r = obj.Addrel(cursym)
  4211  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4212  					r.Sym = p.To.Sym
  4213  					r.Type = objabi.R_PCREL
  4214  					r.Siz = 4
  4215  					asmbuf.PutInt32(0)
  4216  					break
  4217  				}
  4218  
  4219  				// Assumes q is in this function.
  4220  				// TODO: Check in input, preserve in brchain.
  4221  
  4222  				// Fill in backward jump now.
  4223  				q = p.Pcond
  4224  
  4225  				if q == nil {
  4226  					ctxt.Diag("jmp/branch/loop without target")
  4227  					ctxt.DiagFlush()
  4228  					log.Fatalf("bad code")
  4229  				}
  4230  
  4231  				if p.Back&1 != 0 {
  4232  					v = q.Pc - (p.Pc + 2)
  4233  					if v >= -128 && p.As != AXBEGIN {
  4234  						if p.As == AJCXZL {
  4235  							asmbuf.Put1(0x67)
  4236  						}
  4237  						asmbuf.Put2(byte(op), byte(v))
  4238  					} else if yt.zcase == Zloop {
  4239  						ctxt.Diag("loop too far: %v", p)
  4240  					} else {
  4241  						v -= 5 - 2
  4242  						if p.As == AXBEGIN {
  4243  							v--
  4244  						}
  4245  						if yt.zcase == Zbr {
  4246  							asmbuf.Put1(0x0f)
  4247  							v--
  4248  						}
  4249  
  4250  						asmbuf.Put1(o.op[z+1])
  4251  						asmbuf.PutInt32(int32(v))
  4252  					}
  4253  
  4254  					break
  4255  				}
  4256  
  4257  				// Annotate target; will fill in later.
  4258  				p.Forwd = q.Rel
  4259  
  4260  				q.Rel = p
  4261  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  4262  					if p.As == AJCXZL {
  4263  						asmbuf.Put1(0x67)
  4264  					}
  4265  					asmbuf.Put2(byte(op), 0)
  4266  				} else if yt.zcase == Zloop {
  4267  					ctxt.Diag("loop too far: %v", p)
  4268  				} else {
  4269  					if yt.zcase == Zbr {
  4270  						asmbuf.Put1(0x0f)
  4271  					}
  4272  					asmbuf.Put1(o.op[z+1])
  4273  					asmbuf.PutInt32(0)
  4274  				}
  4275  
  4276  				break
  4277  
  4278  			/*
  4279  				v = q->pc - p->pc - 2;
  4280  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  4281  					*ctxt->andptr++ = op;
  4282  					*ctxt->andptr++ = v;
  4283  				} else {
  4284  					v -= 5-2;
  4285  					if(yt.zcase == Zbr) {
  4286  						*ctxt->andptr++ = 0x0f;
  4287  						v--;
  4288  					}
  4289  					*ctxt->andptr++ = o->op[z+1];
  4290  					*ctxt->andptr++ = v;
  4291  					*ctxt->andptr++ = v>>8;
  4292  					*ctxt->andptr++ = v>>16;
  4293  					*ctxt->andptr++ = v>>24;
  4294  				}
  4295  			*/
  4296  
  4297  			case Zbyte:
  4298  				v = vaddr(ctxt, p, &p.From, &rel)
  4299  				if rel.Siz != 0 {
  4300  					rel.Siz = uint8(op)
  4301  					r = obj.Addrel(cursym)
  4302  					*r = rel
  4303  					r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4304  				}
  4305  
  4306  				asmbuf.Put1(byte(v))
  4307  				if op > 1 {
  4308  					asmbuf.Put1(byte(v >> 8))
  4309  					if op > 2 {
  4310  						asmbuf.PutInt16(int16(v >> 16))
  4311  						if op > 4 {
  4312  							asmbuf.PutInt32(int32(v >> 32))
  4313  						}
  4314  					}
  4315  				}
  4316  			}
  4317  
  4318  			return
  4319  		}
  4320  	}
  4321  	f3t = Ynone * Ymax
  4322  	if p.GetFrom3() != nil {
  4323  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4324  	}
  4325  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4326  		var pp obj.Prog
  4327  		var t []byte
  4328  		if p.As == mo[0].as {
  4329  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4330  				t = mo[0].op[:]
  4331  				switch mo[0].code {
  4332  				default:
  4333  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4334  
  4335  				case 0: /* lit */
  4336  					for z = 0; t[z] != E; z++ {
  4337  						asmbuf.Put1(t[z])
  4338  					}
  4339  
  4340  				case 1: /* r,m */
  4341  					asmbuf.Put1(t[0])
  4342  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4343  
  4344  				case 2: /* m,r */
  4345  					asmbuf.Put1(t[0])
  4346  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4347  
  4348  				case 3: /* r,m - 2op */
  4349  					asmbuf.Put2(t[0], t[1])
  4350  					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4351  					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4352  
  4353  				case 4: /* m,r - 2op */
  4354  					asmbuf.Put2(t[0], t[1])
  4355  					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4356  					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4357  
  4358  				case 5: /* load full pointer, trash heap */
  4359  					if t[0] != 0 {
  4360  						asmbuf.Put1(t[0])
  4361  					}
  4362  					switch p.To.Index {
  4363  					default:
  4364  						goto bad
  4365  
  4366  					case REG_DS:
  4367  						asmbuf.Put1(0xc5)
  4368  
  4369  					case REG_SS:
  4370  						asmbuf.Put2(0x0f, 0xb2)
  4371  
  4372  					case REG_ES:
  4373  						asmbuf.Put1(0xc4)
  4374  
  4375  					case REG_FS:
  4376  						asmbuf.Put2(0x0f, 0xb4)
  4377  
  4378  					case REG_GS:
  4379  						asmbuf.Put2(0x0f, 0xb5)
  4380  					}
  4381  
  4382  					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
  4383  
  4384  				case 6: /* double shift */
  4385  					if t[0] == Pw {
  4386  						if ctxt.Arch.Family != sys.AMD64 {
  4387  							ctxt.Diag("asmins: illegal 64: %v", p)
  4388  						}
  4389  						asmbuf.rexflag |= Pw
  4390  						t = t[1:]
  4391  					} else if t[0] == Pe {
  4392  						asmbuf.Put1(Pe)
  4393  						t = t[1:]
  4394  					}
  4395  
  4396  					switch p.From.Type {
  4397  					default:
  4398  						goto bad
  4399  
  4400  					case obj.TYPE_CONST:
  4401  						asmbuf.Put2(0x0f, t[0])
  4402  						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4403  						asmbuf.Put1(byte(p.From.Offset))
  4404  
  4405  					case obj.TYPE_REG:
  4406  						switch p.From.Reg {
  4407  						default:
  4408  							goto bad
  4409  
  4410  						case REG_CL, REG_CX:
  4411  							asmbuf.Put2(0x0f, t[1])
  4412  							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4413  						}
  4414  					}
  4415  
  4416  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4417  				// where you load the TLS base register into a register and then index off that
  4418  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4419  				// are handled in prefixof above and should not be listed here.
  4420  				case 7: /* mov tls, r */
  4421  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4422  						ctxt.Diag("invalid load of TLS: %v", p)
  4423  					}
  4424  
  4425  					if ctxt.Arch.Family == sys.I386 {
  4426  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4427  						// where you load the TLS base register into a register and then index off that
  4428  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4429  						// are handled in prefixof above and should not be listed here.
  4430  						switch ctxt.Headtype {
  4431  						default:
  4432  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4433  
  4434  						case objabi.Hlinux,
  4435  							objabi.Hnacl:
  4436  							if ctxt.Flag_shared {
  4437  								// Note that this is not generating the same insns as the other cases.
  4438  								//     MOV TLS, dst
  4439  								// becomes
  4440  								//     call __x86.get_pc_thunk.dst
  4441  								//     movl (gotpc + g@gotntpoff)(dst), dst
  4442  								// which is encoded as
  4443  								//     call __x86.get_pc_thunk.dst
  4444  								//     movq 0(dst), dst
  4445  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4446  								// is g, which we can't check here, but will when we assemble the second
  4447  								// instruction.
  4448  								dst := p.To.Reg
  4449  								asmbuf.Put1(0xe8)
  4450  								r = obj.Addrel(cursym)
  4451  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4452  								r.Type = objabi.R_CALL
  4453  								r.Siz = 4
  4454  								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4455  								asmbuf.PutInt32(0)
  4456  
  4457  								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4458  								r = obj.Addrel(cursym)
  4459  								r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4460  								r.Type = objabi.R_TLS_IE
  4461  								r.Siz = 4
  4462  								r.Add = 2
  4463  								asmbuf.PutInt32(0)
  4464  							} else {
  4465  								// ELF TLS base is 0(GS).
  4466  								pp.From = p.From
  4467  
  4468  								pp.From.Type = obj.TYPE_MEM
  4469  								pp.From.Reg = REG_GS
  4470  								pp.From.Offset = 0
  4471  								pp.From.Index = REG_NONE
  4472  								pp.From.Scale = 0
  4473  								asmbuf.Put2(0x65, // GS
  4474  									0x8B)
  4475  								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4476  							}
  4477  						case objabi.Hplan9:
  4478  							pp.From = obj.Addr{}
  4479  							pp.From.Type = obj.TYPE_MEM
  4480  							pp.From.Name = obj.NAME_EXTERN
  4481  							pp.From.Sym = plan9privates
  4482  							pp.From.Offset = 0
  4483  							pp.From.Index = REG_NONE
  4484  							asmbuf.Put1(0x8B)
  4485  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4486  
  4487  						case objabi.Hwindows:
  4488  							// Windows TLS base is always 0x14(FS).
  4489  							pp.From = p.From
  4490  
  4491  							pp.From.Type = obj.TYPE_MEM
  4492  							pp.From.Reg = REG_FS
  4493  							pp.From.Offset = 0x14
  4494  							pp.From.Index = REG_NONE
  4495  							pp.From.Scale = 0
  4496  							asmbuf.Put2(0x64, // FS
  4497  								0x8B)
  4498  							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4499  						}
  4500  						break
  4501  					}
  4502  
  4503  					switch ctxt.Headtype {
  4504  					default:
  4505  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4506  
  4507  					case objabi.Hlinux:
  4508  						if !ctxt.Flag_shared {
  4509  							log.Fatalf("unknown TLS base location for linux without -shared")
  4510  						}
  4511  						// Note that this is not generating the same insn as the other cases.
  4512  						//     MOV TLS, R_to
  4513  						// becomes
  4514  						//     movq g@gottpoff(%rip), R_to
  4515  						// which is encoded as
  4516  						//     movq 0(%rip), R_to
  4517  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4518  						// is g, which we can't check here, but will when we assemble the second
  4519  						// instruction.
  4520  						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4521  
  4522  						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4523  						r = obj.Addrel(cursym)
  4524  						r.Off = int32(p.Pc + int64(asmbuf.Len()))
  4525  						r.Type = objabi.R_TLS_IE
  4526  						r.Siz = 4
  4527  						r.Add = -4
  4528  						asmbuf.PutInt32(0)
  4529  
  4530  					case objabi.Hplan9:
  4531  						pp.From = obj.Addr{}
  4532  						pp.From.Type = obj.TYPE_MEM
  4533  						pp.From.Name = obj.NAME_EXTERN
  4534  						pp.From.Sym = plan9privates
  4535  						pp.From.Offset = 0
  4536  						pp.From.Index = REG_NONE
  4537  						asmbuf.rexflag |= Pw
  4538  						asmbuf.Put1(0x8B)
  4539  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4540  
  4541  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4542  						// TLS base is 0(FS).
  4543  						pp.From = p.From
  4544  
  4545  						pp.From.Type = obj.TYPE_MEM
  4546  						pp.From.Name = obj.NAME_NONE
  4547  						pp.From.Reg = REG_NONE
  4548  						pp.From.Offset = 0
  4549  						pp.From.Index = REG_NONE
  4550  						pp.From.Scale = 0
  4551  						asmbuf.rexflag |= Pw
  4552  						asmbuf.Put2(0x64, // FS
  4553  							0x8B)
  4554  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4555  
  4556  					case objabi.Hwindows:
  4557  						// Windows TLS base is always 0x28(GS).
  4558  						pp.From = p.From
  4559  
  4560  						pp.From.Type = obj.TYPE_MEM
  4561  						pp.From.Name = obj.NAME_NONE
  4562  						pp.From.Reg = REG_GS
  4563  						pp.From.Offset = 0x28
  4564  						pp.From.Index = REG_NONE
  4565  						pp.From.Scale = 0
  4566  						asmbuf.rexflag |= Pw
  4567  						asmbuf.Put2(0x65, // GS
  4568  							0x8B)
  4569  						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4570  					}
  4571  				}
  4572  				return
  4573  			}
  4574  		}
  4575  	}
  4576  	goto bad
  4577  
  4578  bad:
  4579  	if ctxt.Arch.Family != sys.AMD64 {
  4580  		/*
  4581  		 * here, the assembly has failed.
  4582  		 * if its a byte instruction that has
  4583  		 * unaddressable registers, try to
  4584  		 * exchange registers and reissue the
  4585  		 * instruction with the operands renamed.
  4586  		 */
  4587  		pp := *p
  4588  
  4589  		unbytereg(&pp.From, &pp.Ft)
  4590  		unbytereg(&pp.To, &pp.Tt)
  4591  
  4592  		z := int(p.From.Reg)
  4593  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4594  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4595  			// For now, different to keep bit-for-bit compatibility.
  4596  			if ctxt.Arch.Family == sys.I386 {
  4597  				breg := byteswapreg(ctxt, &p.To)
  4598  				if breg != REG_AX {
  4599  					asmbuf.Put1(0x87) // xchg lhs,bx
  4600  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4601  					subreg(&pp, z, breg)
  4602  					asmbuf.doasm(ctxt, cursym, &pp)
  4603  					asmbuf.Put1(0x87) // xchg lhs,bx
  4604  					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4605  				} else {
  4606  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4607  					subreg(&pp, z, REG_AX)
  4608  					asmbuf.doasm(ctxt, cursym, &pp)
  4609  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4610  				}
  4611  				return
  4612  			}
  4613  
  4614  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4615  				// We certainly don't want to exchange
  4616  				// with AX if the op is MUL or DIV.
  4617  				asmbuf.Put1(0x87) // xchg lhs,bx
  4618  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4619  				subreg(&pp, z, REG_BX)
  4620  				asmbuf.doasm(ctxt, cursym, &pp)
  4621  				asmbuf.Put1(0x87) // xchg lhs,bx
  4622  				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4623  			} else {
  4624  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4625  				subreg(&pp, z, REG_AX)
  4626  				asmbuf.doasm(ctxt, cursym, &pp)
  4627  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4628  			}
  4629  			return
  4630  		}
  4631  
  4632  		z = int(p.To.Reg)
  4633  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4634  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4635  			// For now, different to keep bit-for-bit compatibility.
  4636  			if ctxt.Arch.Family == sys.I386 {
  4637  				breg := byteswapreg(ctxt, &p.From)
  4638  				if breg != REG_AX {
  4639  					asmbuf.Put1(0x87) //xchg rhs,bx
  4640  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4641  					subreg(&pp, z, breg)
  4642  					asmbuf.doasm(ctxt, cursym, &pp)
  4643  					asmbuf.Put1(0x87) // xchg rhs,bx
  4644  					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4645  				} else {
  4646  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4647  					subreg(&pp, z, REG_AX)
  4648  					asmbuf.doasm(ctxt, cursym, &pp)
  4649  					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4650  				}
  4651  				return
  4652  			}
  4653  
  4654  			if isax(&p.From) {
  4655  				asmbuf.Put1(0x87) // xchg rhs,bx
  4656  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4657  				subreg(&pp, z, REG_BX)
  4658  				asmbuf.doasm(ctxt, cursym, &pp)
  4659  				asmbuf.Put1(0x87) // xchg rhs,bx
  4660  				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4661  			} else {
  4662  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4663  				subreg(&pp, z, REG_AX)
  4664  				asmbuf.doasm(ctxt, cursym, &pp)
  4665  				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4666  			}
  4667  			return
  4668  		}
  4669  	}
  4670  
  4671  	ctxt.Diag("invalid instruction: %v", p)
  4672  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4673  	return
  4674  }
  4675  
  4676  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4677  // which is not referenced in a.
  4678  // If a is empty, it returns BX to account for MULB-like instructions
  4679  // that might use DX and AX.
  4680  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4681  	cana, canb, canc, cand := true, true, true, true
  4682  	if a.Type == obj.TYPE_NONE {
  4683  		cana, cand = false, false
  4684  	}
  4685  
  4686  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4687  		switch a.Reg {
  4688  		case REG_NONE:
  4689  			cana, cand = false, false
  4690  		case REG_AX, REG_AL, REG_AH:
  4691  			cana = false
  4692  		case REG_BX, REG_BL, REG_BH:
  4693  			canb = false
  4694  		case REG_CX, REG_CL, REG_CH:
  4695  			canc = false
  4696  		case REG_DX, REG_DL, REG_DH:
  4697  			cand = false
  4698  		}
  4699  	}
  4700  
  4701  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4702  		switch a.Index {
  4703  		case REG_AX:
  4704  			cana = false
  4705  		case REG_BX:
  4706  			canb = false
  4707  		case REG_CX:
  4708  			canc = false
  4709  		case REG_DX:
  4710  			cand = false
  4711  		}
  4712  	}
  4713  
  4714  	switch {
  4715  	case cana:
  4716  		return REG_AX
  4717  	case canb:
  4718  		return REG_BX
  4719  	case canc:
  4720  		return REG_CX
  4721  	case cand:
  4722  		return REG_DX
  4723  	default:
  4724  		ctxt.Diag("impossible byte register")
  4725  		ctxt.DiagFlush()
  4726  		log.Fatalf("bad code")
  4727  		return 0
  4728  	}
  4729  }
  4730  
  4731  func isbadbyte(a *obj.Addr) bool {
  4732  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4733  }
  4734  
  4735  var naclret = []uint8{
  4736  	0x5e, // POPL SI
  4737  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4738  	0x83,
  4739  	0xe6,
  4740  	0xe0, // ANDL $~31, SI
  4741  	0x4c,
  4742  	0x01,
  4743  	0xfe, // ADDQ R15, SI
  4744  	0xff,
  4745  	0xe6, // JMP SI
  4746  }
  4747  
  4748  var naclret8 = []uint8{
  4749  	0x5d, // POPL BP
  4750  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4751  	0x83,
  4752  	0xe5,
  4753  	0xe0, // ANDL $~31, BP
  4754  	0xff,
  4755  	0xe5, // JMP BP
  4756  }
  4757  
  4758  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4759  
  4760  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4761  
  4762  var naclmovs = []uint8{
  4763  	0x89,
  4764  	0xf6, // MOVL SI, SI
  4765  	0x49,
  4766  	0x8d,
  4767  	0x34,
  4768  	0x37, // LEAQ (R15)(SI*1), SI
  4769  	0x89,
  4770  	0xff, // MOVL DI, DI
  4771  	0x49,
  4772  	0x8d,
  4773  	0x3c,
  4774  	0x3f, // LEAQ (R15)(DI*1), DI
  4775  }
  4776  
  4777  var naclstos = []uint8{
  4778  	0x89,
  4779  	0xff, // MOVL DI, DI
  4780  	0x49,
  4781  	0x8d,
  4782  	0x3c,
  4783  	0x3f, // LEAQ (R15)(DI*1), DI
  4784  }
  4785  
  4786  func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
  4787  	if reg >= REG_R8 {
  4788  		asmbuf.Put1(0x45)
  4789  	}
  4790  	reg = (reg - REG_AX) & 7
  4791  	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4792  }
  4793  
  4794  func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4795  	asmbuf.Reset()
  4796  
  4797  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
  4798  		switch p.As {
  4799  		case obj.ARET:
  4800  			asmbuf.Put(naclret8)
  4801  			return
  4802  
  4803  		case obj.ACALL,
  4804  			obj.AJMP:
  4805  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4806  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4807  			}
  4808  
  4809  		case AINT:
  4810  			asmbuf.Put1(0xf4)
  4811  			return
  4812  		}
  4813  	}
  4814  
  4815  	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
  4816  		if p.As == AREP {
  4817  			asmbuf.rep++
  4818  			return
  4819  		}
  4820  
  4821  		if p.As == AREPN {
  4822  			asmbuf.repn++
  4823  			return
  4824  		}
  4825  
  4826  		if p.As == ALOCK {
  4827  			asmbuf.lock = true
  4828  			return
  4829  		}
  4830  
  4831  		if p.As != ALEAQ && p.As != ALEAL {
  4832  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4833  				asmbuf.nacltrunc(ctxt, int(p.From.Index))
  4834  			}
  4835  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4836  				asmbuf.nacltrunc(ctxt, int(p.To.Index))
  4837  			}
  4838  		}
  4839  
  4840  		switch p.As {
  4841  		case obj.ARET:
  4842  			asmbuf.Put(naclret)
  4843  			return
  4844  
  4845  		case obj.ACALL,
  4846  			obj.AJMP:
  4847  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4848  				// ANDL $~31, reg
  4849  				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4850  				// ADDQ R15, reg
  4851  				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4852  			}
  4853  
  4854  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4855  				// ANDL $~31, reg
  4856  				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4857  				// ADDQ R15, reg
  4858  				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4859  			}
  4860  
  4861  		case AINT:
  4862  			asmbuf.Put1(0xf4)
  4863  			return
  4864  
  4865  		case ASCASB,
  4866  			ASCASW,
  4867  			ASCASL,
  4868  			ASCASQ,
  4869  			ASTOSB,
  4870  			ASTOSW,
  4871  			ASTOSL,
  4872  			ASTOSQ:
  4873  			asmbuf.Put(naclstos)
  4874  
  4875  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4876  			asmbuf.Put(naclmovs)
  4877  		}
  4878  
  4879  		if asmbuf.rep != 0 {
  4880  			asmbuf.Put1(0xf3)
  4881  			asmbuf.rep = 0
  4882  		}
  4883  
  4884  		if asmbuf.repn != 0 {
  4885  			asmbuf.Put1(0xf2)
  4886  			asmbuf.repn = 0
  4887  		}
  4888  
  4889  		if asmbuf.lock {
  4890  			asmbuf.Put1(0xf0)
  4891  			asmbuf.lock = false
  4892  		}
  4893  	}
  4894  
  4895  	asmbuf.rexflag = 0
  4896  	asmbuf.vexflag = 0
  4897  	mark := asmbuf.Len()
  4898  	asmbuf.doasm(ctxt, cursym, p)
  4899  	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4900  		/*
  4901  		 * as befits the whole approach of the architecture,
  4902  		 * the rex prefix must appear before the first opcode byte
  4903  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4904  		 * before the 0f opcode escape!), or it might be ignored.
  4905  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4906  		 */
  4907  		if ctxt.Arch.Family != sys.AMD64 {
  4908  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4909  		}
  4910  		n := asmbuf.Len()
  4911  		var np int
  4912  		for np = mark; np < n; np++ {
  4913  			c := asmbuf.At(np)
  4914  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4915  				break
  4916  			}
  4917  		}
  4918  		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
  4919  	}
  4920  
  4921  	n := asmbuf.Len()
  4922  	for i := len(cursym.R) - 1; i >= 0; i-- {
  4923  		r := &cursym.R[i]
  4924  		if int64(r.Off) < p.Pc {
  4925  			break
  4926  		}
  4927  		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
  4928  			r.Off++
  4929  		}
  4930  		if r.Type == objabi.R_PCREL {
  4931  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4932  				// PC-relative addressing is relative to the end of the instruction,
  4933  				// but the relocations applied by the linker are relative to the end
  4934  				// of the relocation. Because immediate instruction
  4935  				// arguments can follow the PC-relative memory reference in the
  4936  				// instruction encoding, the two may not coincide. In this case,
  4937  				// adjust addend so that linker can keep relocating relative to the
  4938  				// end of the relocation.
  4939  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4940  			} else if ctxt.Arch.Family == sys.I386 {
  4941  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4942  				// assumes that the previous instruction loaded the PC of the end
  4943  				// of that instruction into CX, so the adjustment is relative to
  4944  				// that.
  4945  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4946  			}
  4947  		}
  4948  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4949  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4950  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4951  		}
  4952  
  4953  	}
  4954  
  4955  	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4956  		switch p.To.Reg {
  4957  		case REG_SP:
  4958  			asmbuf.Put(naclspfix)
  4959  		case REG_BP:
  4960  			asmbuf.Put(naclbpfix)
  4961  		}
  4962  	}
  4963  }
  4964  
  4965  // Extract 4 operands from p.
  4966  func unpackOps4(p *obj.Prog) (*obj.Addr, *obj.Addr, *obj.Addr, *obj.Addr) {
  4967  	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  4968  }