github.com/aloncn/graphics-go@v0.0.1/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	// Loop alignment constants:
    44  	// want to align loop entry to LoopAlign-byte boundary,
    45  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    46  	// We define a loop entry as the target of a backward jump.
    47  	//
    48  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    49  	// and it aligns all jump targets, not just backward jump targets.
    50  	//
    51  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    52  	// is very slight but negative, so the alignment is disabled by
    53  	// setting MaxLoopPad = 0. The code is here for reference and
    54  	// for future experiments.
    55  	//
    56  	LoopAlign  = 16
    57  	MaxLoopPad = 0
    58  	FuncAlign  = 16
    59  )
    60  
    61  type Optab struct {
    62  	as     int16
    63  	ytab   []ytab
    64  	prefix uint8
    65  	op     [23]uint8
    66  }
    67  
    68  type ytab struct {
    69  	from    uint8
    70  	from3   uint8
    71  	to      uint8
    72  	zcase   uint8
    73  	zoffset uint8
    74  }
    75  
    76  type Movtab struct {
    77  	as   int16
    78  	ft   uint8
    79  	f3t  uint8
    80  	tt   uint8
    81  	code uint8
    82  	op   [4]uint8
    83  }
    84  
    85  const (
    86  	Yxxx = iota
    87  	Ynone
    88  	Yi0 // $0
    89  	Yi1 // $1
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Yyr
   152  	Yym
   153  	Ytls
   154  	Ytextsize
   155  	Yindir
   156  	Ymax
   157  )
   158  
   159  const (
   160  	Zxxx = iota
   161  	Zlit
   162  	Zlitm_r
   163  	Z_rp
   164  	Zbr
   165  	Zcall
   166  	Zcallcon
   167  	Zcallduff
   168  	Zcallind
   169  	Zcallindreg
   170  	Zib_
   171  	Zib_rp
   172  	Zibo_m
   173  	Zibo_m_xm
   174  	Zil_
   175  	Zil_rp
   176  	Ziq_rp
   177  	Zilo_m
   178  	Zjmp
   179  	Zjmpcon
   180  	Zloop
   181  	Zo_iw
   182  	Zm_o
   183  	Zm_r
   184  	Zm2_r
   185  	Zm_r_xm
   186  	Zm_r_i_xm
   187  	Zm_r_3d
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zmax
   212  )
   213  
   214  const (
   215  	Px   = 0
   216  	Px1  = 1    // symbolic; exact value doesn't matter
   217  	P32  = 0x32 /* 32-bit only */
   218  	Pe   = 0x66 /* operand escape */
   219  	Pm   = 0x0f /* 2byte opcode escape */
   220  	Pq   = 0xff /* both escapes: 66 0f */
   221  	Pb   = 0xfe /* byte operands */
   222  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   223  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   224  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   225  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   226  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   227  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   228  	Pw   = 0x48 /* Rex.w */
   229  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   230  	Py   = 0x80 /* defaults to 64-bit mode */
   231  	Py1  = 0x81 // symbolic; exact value doesn't matter
   232  	Py3  = 0x83 // symbolic; exact value doesn't matter
   233  	Pvex = 0x84 // symbolic: exact value doesn't matter
   234  
   235  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   236  	Rxr = 1 << 2 /* extend modrm reg */
   237  	Rxx = 1 << 1 /* extend sib index */
   238  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   239  )
   240  
   241  const (
   242  	// Encoding for VEX prefix in tables.
   243  	// The P, L, and W fields are chosen to match
   244  	// their eventual locations in the VEX prefix bytes.
   245  
   246  	// P field - 2 bits
   247  	vex66 = 1 << 0
   248  	vexF3 = 2 << 0
   249  	vexF2 = 3 << 0
   250  	// L field - 1 bit
   251  	vexLZ  = 0 << 2
   252  	vexLIG = 0 << 2
   253  	vex128 = 0 << 2
   254  	vex256 = 1 << 2
   255  	// W field - 1 bit
   256  	vexWIG = 0 << 7
   257  	vexW0  = 0 << 7
   258  	vexW1  = 1 << 7
   259  	// M field - 5 bits, but mostly reserved; we can store up to 4
   260  	vex0F   = 1 << 3
   261  	vex0F38 = 2 << 3
   262  	vex0F3A = 3 << 3
   263  
   264  	// Combinations used in the manual.
   265  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   266  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   267  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   268  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   269  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   270  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   271  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   272  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   273  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   274  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   275  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   276  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   277  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   278  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   279  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   280  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   281  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   282  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   283  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   284  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   285  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   286  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   287  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   288  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   289  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   290  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   291  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   292  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   293  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   294  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   295  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   296  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   297  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   298  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   299  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   300  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   301  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   302  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   303  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   304  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   305  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   306  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   307  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   308  )
   309  
   310  var ycover [Ymax * Ymax]uint8
   311  
   312  var reg [MAXREG]int
   313  
   314  var regrex [MAXREG + 1]int
   315  
   316  var ynone = []ytab{
   317  	{Ynone, Ynone, Ynone, Zlit, 1},
   318  }
   319  
   320  var ytext = []ytab{
   321  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   322  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   323  }
   324  
   325  var ynop = []ytab{
   326  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   327  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   328  	{Ynone, Ynone, Yml, Zpseudo, 0},
   329  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   330  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   331  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   332  	{Yml, Ynone, Ynone, Zpseudo, 0},
   333  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   334  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   335  }
   336  
   337  var yfuncdata = []ytab{
   338  	{Yi32, Ynone, Ym, Zpseudo, 0},
   339  }
   340  
   341  var ypcdata = []ytab{
   342  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   343  }
   344  
   345  var yxorb = []ytab{
   346  	{Yi32, Ynone, Yal, Zib_, 1},
   347  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   348  	{Yrb, Ynone, Ymb, Zr_m, 1},
   349  	{Ymb, Ynone, Yrb, Zm_r, 1},
   350  }
   351  
   352  var yxorl = []ytab{
   353  	{Yi8, Ynone, Yml, Zibo_m, 2},
   354  	{Yi32, Ynone, Yax, Zil_, 1},
   355  	{Yi32, Ynone, Yml, Zilo_m, 2},
   356  	{Yrl, Ynone, Yml, Zr_m, 1},
   357  	{Yml, Ynone, Yrl, Zm_r, 1},
   358  }
   359  
   360  var yaddl = []ytab{
   361  	{Yi8, Ynone, Yml, Zibo_m, 2},
   362  	{Yi32, Ynone, Yax, Zil_, 1},
   363  	{Yi32, Ynone, Yml, Zilo_m, 2},
   364  	{Yrl, Ynone, Yml, Zr_m, 1},
   365  	{Yml, Ynone, Yrl, Zm_r, 1},
   366  }
   367  
   368  var yincb = []ytab{
   369  	{Ynone, Ynone, Ymb, Zo_m, 2},
   370  }
   371  
   372  var yincw = []ytab{
   373  	{Ynone, Ynone, Yml, Zo_m, 2},
   374  }
   375  
   376  var yincl = []ytab{
   377  	{Ynone, Ynone, Yrl, Z_rp, 1},
   378  	{Ynone, Ynone, Yml, Zo_m, 2},
   379  }
   380  
   381  var yincq = []ytab{
   382  	{Ynone, Ynone, Yml, Zo_m, 2},
   383  }
   384  
   385  var ycmpb = []ytab{
   386  	{Yal, Ynone, Yi32, Z_ib, 1},
   387  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   388  	{Ymb, Ynone, Yrb, Zm_r, 1},
   389  	{Yrb, Ynone, Ymb, Zr_m, 1},
   390  }
   391  
   392  var ycmpl = []ytab{
   393  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   394  	{Yax, Ynone, Yi32, Z_il, 1},
   395  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   396  	{Yml, Ynone, Yrl, Zm_r, 1},
   397  	{Yrl, Ynone, Yml, Zr_m, 1},
   398  }
   399  
   400  var yshb = []ytab{
   401  	{Yi1, Ynone, Ymb, Zo_m, 2},
   402  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   403  	{Ycx, Ynone, Ymb, Zo_m, 2},
   404  }
   405  
   406  var yshl = []ytab{
   407  	{Yi1, Ynone, Yml, Zo_m, 2},
   408  	{Yi32, Ynone, Yml, Zibo_m, 2},
   409  	{Ycl, Ynone, Yml, Zo_m, 2},
   410  	{Ycx, Ynone, Yml, Zo_m, 2},
   411  }
   412  
   413  var ytestb = []ytab{
   414  	{Yi32, Ynone, Yal, Zib_, 1},
   415  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   416  	{Yrb, Ynone, Ymb, Zr_m, 1},
   417  	{Ymb, Ynone, Yrb, Zm_r, 1},
   418  }
   419  
   420  var ytestl = []ytab{
   421  	{Yi32, Ynone, Yax, Zil_, 1},
   422  	{Yi32, Ynone, Yml, Zilo_m, 2},
   423  	{Yrl, Ynone, Yml, Zr_m, 1},
   424  	{Yml, Ynone, Yrl, Zm_r, 1},
   425  }
   426  
   427  var ymovb = []ytab{
   428  	{Yrb, Ynone, Ymb, Zr_m, 1},
   429  	{Ymb, Ynone, Yrb, Zm_r, 1},
   430  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   431  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   432  }
   433  
   434  var ymbs = []ytab{
   435  	{Ymb, Ynone, Ynone, Zm_o, 2},
   436  }
   437  
   438  var ybtl = []ytab{
   439  	{Yi8, Ynone, Yml, Zibo_m, 2},
   440  	{Yrl, Ynone, Yml, Zr_m, 1},
   441  }
   442  
   443  var ymovw = []ytab{
   444  	{Yrl, Ynone, Yml, Zr_m, 1},
   445  	{Yml, Ynone, Yrl, Zm_r, 1},
   446  	{Yi0, Ynone, Yrl, Zclr, 1},
   447  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   448  	{Yi32, Ynone, Yml, Zilo_m, 2},
   449  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   450  }
   451  
   452  var ymovl = []ytab{
   453  	{Yrl, Ynone, Yml, Zr_m, 1},
   454  	{Yml, Ynone, Yrl, Zm_r, 1},
   455  	{Yi0, Ynone, Yrl, Zclr, 1},
   456  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   457  	{Yi32, Ynone, Yml, Zilo_m, 2},
   458  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   459  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   460  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   461  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   462  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   463  }
   464  
   465  var yret = []ytab{
   466  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   467  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   468  }
   469  
   470  var ymovq = []ytab{
   471  	// valid in 32-bit mode
   472  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   473  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   474  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   475  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   476  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   477  
   478  	// valid only in 64-bit mode, usually with 64-bit prefix
   479  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   480  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   481  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   482  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   483  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   484  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   485  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   486  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   487  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   488  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   489  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   490  }
   491  
   492  var ym_rl = []ytab{
   493  	{Ym, Ynone, Yrl, Zm_r, 1},
   494  }
   495  
   496  var yrl_m = []ytab{
   497  	{Yrl, Ynone, Ym, Zr_m, 1},
   498  }
   499  
   500  var ymb_rl = []ytab{
   501  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   502  }
   503  
   504  var yml_rl = []ytab{
   505  	{Yml, Ynone, Yrl, Zm_r, 1},
   506  }
   507  
   508  var yrl_ml = []ytab{
   509  	{Yrl, Ynone, Yml, Zr_m, 1},
   510  }
   511  
   512  var yml_mb = []ytab{
   513  	{Yrb, Ynone, Ymb, Zr_m, 1},
   514  	{Ymb, Ynone, Yrb, Zm_r, 1},
   515  }
   516  
   517  var yrb_mb = []ytab{
   518  	{Yrb, Ynone, Ymb, Zr_m, 1},
   519  }
   520  
   521  var yxchg = []ytab{
   522  	{Yax, Ynone, Yrl, Z_rp, 1},
   523  	{Yrl, Ynone, Yax, Zrp_, 1},
   524  	{Yrl, Ynone, Yml, Zr_m, 1},
   525  	{Yml, Ynone, Yrl, Zm_r, 1},
   526  }
   527  
   528  var ydivl = []ytab{
   529  	{Yml, Ynone, Ynone, Zm_o, 2},
   530  }
   531  
   532  var ydivb = []ytab{
   533  	{Ymb, Ynone, Ynone, Zm_o, 2},
   534  }
   535  
   536  var yimul = []ytab{
   537  	{Yml, Ynone, Ynone, Zm_o, 2},
   538  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   539  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   540  	{Yml, Ynone, Yrl, Zm_r, 2},
   541  }
   542  
   543  var yimul3 = []ytab{
   544  	{Yi8, Yml, Yrl, Zibm_r, 2},
   545  }
   546  
   547  var ybyte = []ytab{
   548  	{Yi64, Ynone, Ynone, Zbyte, 1},
   549  }
   550  
   551  var yin = []ytab{
   552  	{Yi32, Ynone, Ynone, Zib_, 1},
   553  	{Ynone, Ynone, Ynone, Zlit, 1},
   554  }
   555  
   556  var yint = []ytab{
   557  	{Yi32, Ynone, Ynone, Zib_, 1},
   558  }
   559  
   560  var ypushl = []ytab{
   561  	{Yrl, Ynone, Ynone, Zrp_, 1},
   562  	{Ym, Ynone, Ynone, Zm_o, 2},
   563  	{Yi8, Ynone, Ynone, Zib_, 1},
   564  	{Yi32, Ynone, Ynone, Zil_, 1},
   565  }
   566  
   567  var ypopl = []ytab{
   568  	{Ynone, Ynone, Yrl, Z_rp, 1},
   569  	{Ynone, Ynone, Ym, Zo_m, 2},
   570  }
   571  
   572  var ybswap = []ytab{
   573  	{Ynone, Ynone, Yrl, Z_rp, 2},
   574  }
   575  
   576  var yscond = []ytab{
   577  	{Ynone, Ynone, Ymb, Zo_m, 2},
   578  }
   579  
   580  var yjcond = []ytab{
   581  	{Ynone, Ynone, Ybr, Zbr, 0},
   582  	{Yi0, Ynone, Ybr, Zbr, 0},
   583  	{Yi1, Ynone, Ybr, Zbr, 1},
   584  }
   585  
   586  var yloop = []ytab{
   587  	{Ynone, Ynone, Ybr, Zloop, 1},
   588  }
   589  
   590  var ycall = []ytab{
   591  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   592  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   593  	{Ynone, Ynone, Yindir, Zcallind, 2},
   594  	{Ynone, Ynone, Ybr, Zcall, 0},
   595  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   596  }
   597  
   598  var yduff = []ytab{
   599  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   600  }
   601  
   602  var yjmp = []ytab{
   603  	{Ynone, Ynone, Yml, Zo_m64, 2},
   604  	{Ynone, Ynone, Ybr, Zjmp, 0},
   605  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   606  }
   607  
   608  var yfmvd = []ytab{
   609  	{Ym, Ynone, Yf0, Zm_o, 2},
   610  	{Yf0, Ynone, Ym, Zo_m, 2},
   611  	{Yrf, Ynone, Yf0, Zm_o, 2},
   612  	{Yf0, Ynone, Yrf, Zo_m, 2},
   613  }
   614  
   615  var yfmvdp = []ytab{
   616  	{Yf0, Ynone, Ym, Zo_m, 2},
   617  	{Yf0, Ynone, Yrf, Zo_m, 2},
   618  }
   619  
   620  var yfmvf = []ytab{
   621  	{Ym, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Ym, Zo_m, 2},
   623  }
   624  
   625  var yfmvx = []ytab{
   626  	{Ym, Ynone, Yf0, Zm_o, 2},
   627  }
   628  
   629  var yfmvp = []ytab{
   630  	{Yf0, Ynone, Ym, Zo_m, 2},
   631  }
   632  
   633  var yfcmv = []ytab{
   634  	{Yrf, Ynone, Yf0, Zm_o, 2},
   635  }
   636  
   637  var yfadd = []ytab{
   638  	{Ym, Ynone, Yf0, Zm_o, 2},
   639  	{Yrf, Ynone, Yf0, Zm_o, 2},
   640  	{Yf0, Ynone, Yrf, Zo_m, 2},
   641  }
   642  
   643  var yfaddp = []ytab{
   644  	{Yf0, Ynone, Yrf, Zo_m, 2},
   645  }
   646  
   647  var yfxch = []ytab{
   648  	{Yf0, Ynone, Yrf, Zo_m, 2},
   649  	{Yrf, Ynone, Yf0, Zm_o, 2},
   650  }
   651  
   652  var ycompp = []ytab{
   653  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   654  }
   655  
   656  var ystsw = []ytab{
   657  	{Ynone, Ynone, Ym, Zo_m, 2},
   658  	{Ynone, Ynone, Yax, Zlit, 1},
   659  }
   660  
   661  var ystcw = []ytab{
   662  	{Ynone, Ynone, Ym, Zo_m, 2},
   663  	{Ym, Ynone, Ynone, Zm_o, 2},
   664  }
   665  
   666  var ysvrs = []ytab{
   667  	{Ynone, Ynone, Ym, Zo_m, 2},
   668  	{Ym, Ynone, Ynone, Zm_o, 2},
   669  }
   670  
   671  var ymm = []ytab{
   672  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   673  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   674  }
   675  
   676  var yxm = []ytab{
   677  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   678  }
   679  
   680  var yxm_q4 = []ytab{
   681  	{Yxm, Ynone, Yxr, Zm_r, 1},
   682  }
   683  
   684  var yxcvm1 = []ytab{
   685  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   686  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   687  }
   688  
   689  var yxcvm2 = []ytab{
   690  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   691  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   692  }
   693  
   694  /*
   695  var yxmq = []ytab{
   696  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   697  }
   698  */
   699  
   700  var yxr = []ytab{
   701  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   702  }
   703  
   704  var yxr_ml = []ytab{
   705  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   706  }
   707  
   708  var ymr = []ytab{
   709  	{Ymr, Ynone, Ymr, Zm_r, 1},
   710  }
   711  
   712  var ymr_ml = []ytab{
   713  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   714  }
   715  
   716  var yxcmp = []ytab{
   717  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   718  }
   719  
   720  var yxcmpi = []ytab{
   721  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   722  }
   723  
   724  var yxmov = []ytab{
   725  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   726  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   727  }
   728  
   729  var yxcvfl = []ytab{
   730  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   731  }
   732  
   733  var yxcvlf = []ytab{
   734  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   735  }
   736  
   737  var yxcvfq = []ytab{
   738  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   739  }
   740  
   741  var yxcvqf = []ytab{
   742  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   743  }
   744  
   745  var yps = []ytab{
   746  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   747  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   748  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   749  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   750  }
   751  
   752  var yxrrl = []ytab{
   753  	{Yxr, Ynone, Yrl, Zm_r, 1},
   754  }
   755  
   756  var ymfp = []ytab{
   757  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   758  }
   759  
   760  var ymrxr = []ytab{
   761  	{Ymr, Ynone, Yxr, Zm_r, 1},
   762  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   763  }
   764  
   765  var ymshuf = []ytab{
   766  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   767  }
   768  
   769  var ymshufb = []ytab{
   770  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   771  }
   772  
   773  var yxshuf = []ytab{
   774  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   775  }
   776  
   777  var yextrw = []ytab{
   778  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   779  }
   780  
   781  var yextr = []ytab{
   782  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   783  }
   784  
   785  var yinsrw = []ytab{
   786  	{Yu8, Yml, Yxr, Zibm_r, 2},
   787  }
   788  
   789  var yinsr = []ytab{
   790  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   791  }
   792  
   793  var ypsdq = []ytab{
   794  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   795  }
   796  
   797  var ymskb = []ytab{
   798  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   799  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   800  }
   801  
   802  var ycrc32l = []ytab{
   803  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   804  }
   805  
   806  var yprefetch = []ytab{
   807  	{Ym, Ynone, Ynone, Zm_o, 2},
   808  }
   809  
   810  var yaes = []ytab{
   811  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   812  }
   813  
   814  var yaes2 = []ytab{
   815  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   816  }
   817  
   818  var yxbegin = []ytab{
   819  	{Ynone, Ynone, Ybr, Zjmp, 1},
   820  }
   821  
   822  var yxabort = []ytab{
   823  	{Yu8, Ynone, Ynone, Zib_, 1},
   824  }
   825  
   826  var ylddqu = []ytab{
   827  	{Ym, Ynone, Yxr, Zm_r, 1},
   828  }
   829  
   830  // VEX instructions that come in two forms:
   831  //	VTHING xmm2/m128, xmmV, xmm1
   832  //	VTHING ymm2/m256, ymmV, ymm1
   833  // The opcode array in the corresponding Optab entry
   834  // should contain the (VEX prefixes, opcode byte) pair
   835  // for each of the two forms.
   836  // For example, the entries for VPXOR are:
   837  //
   838  //	VPXOR xmm2/m128, xmmV, xmm1
   839  //	VEX.NDS.128.66.0F.WIG EF /r
   840  //
   841  //	VPXOR ymm2/m256, ymmV, ymm1
   842  //	VEX.NDS.256.66.0F.WIG EF /r
   843  //
   844  // The NDS/NDD/DDS part can be dropped, producing this
   845  // Optab entry:
   846  //
   847  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   848  //
   849  var yvex_xy3 = []ytab{
   850  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   851  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   852  }
   853  
   854  var yvex_r3 = []ytab{
   855  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   856  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   857  }
   858  
   859  var yvex_vmr3 = []ytab{
   860  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   861  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   862  }
   863  
   864  var yvex_xy2 = []ytab{
   865  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   866  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   867  }
   868  
   869  var yvex_xyr2 = []ytab{
   870  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   871  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   872  }
   873  
   874  var yvex_vmovdqa = []ytab{
   875  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   876  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   877  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   878  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   879  }
   880  
   881  var yvex_vmovntdq = []ytab{
   882  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   883  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   884  }
   885  
   886  var yvex_vpbroadcast = []ytab{
   887  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   888  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   889  }
   890  
   891  var yvex_xxmyxm = []ytab{
   892  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   893  	{Yyr, Ynone, Yxm, Zvex_r_v_rm, 2},
   894  }
   895  
   896  var ymmxmm0f38 = []ytab{
   897  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   898  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   899  }
   900  
   901  /*
   902   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   903   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   904   * the entry with the given p->as and then looks through the ytable for that
   905   * instruction (the second field in the optab struct) for a line whose first
   906   * two values match the Ytypes of the p->from and p->to operands.  The function
   907   * oclass in span.c computes the specific Ytype of an operand and then the set
   908   * of more general Ytypes that it satisfies is implied by the ycover table, set
   909   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   910   * from the more general 8-bit constants, but instinit says
   911   *
   912   *        ycover[Yi0*Ymax + Ys32] = 1;
   913   *        ycover[Yi1*Ymax + Ys32] = 1;
   914   *        ycover[Yi8*Ymax + Ys32] = 1;
   915   *
   916   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   917   * if that's what an instruction can handle.
   918   *
   919   * In parallel with the scan through the ytable for the appropriate line, there
   920   * is a z pointer that starts out pointing at the strange magic byte list in
   921   * the Optab struct.  With each step past a non-matching ytable line, z
   922   * advances by the 4th entry in the line.  When a matching line is found, that
   923   * z pointer has the extra data to use in laying down the instruction bytes.
   924   * The actual bytes laid down are a function of the 3rd entry in the line (that
   925   * is, the Ztype) and the z bytes.
   926   *
   927   * For example, let's look at AADDL.  The optab line says:
   928   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   929   *
   930   * and yaddl says
   931   *        uchar   yaddl[] =
   932   *        {
   933   *                Yi8,    Yml,    Zibo_m, 2,
   934   *                Yi32,   Yax,    Zil_,   1,
   935   *                Yi32,   Yml,    Zilo_m, 2,
   936   *                Yrl,    Yml,    Zr_m,   1,
   937   *                Yml,    Yrl,    Zm_r,   1,
   938   *                0
   939   *        };
   940   *
   941   * so there are 5 possible types of ADDL instruction that can be laid down, and
   942   * possible states used to lay them down (Ztype and z pointer, assuming z
   943   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   944   *
   945   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   946   *        Yi32, Yax -> Zil_, z+2 (0x05)
   947   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   948   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   949   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   950   *
   951   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   952   * relatively straightforward as this program goes.
   953   *
   954   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   955   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   956   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   957   * Zilo_m is the same but a long (32-bit) immediate.
   958   */
   959  var optab =
   960  /*	as, ytab, andproto, opcode */
   961  []Optab{
   962  	{obj.AXXX, nil, 0, [23]uint8{}},
   963  	{AAAA, ynone, P32, [23]uint8{0x37}},
   964  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   965  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   966  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   967  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   968  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   969  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   970  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   971  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   972  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   973  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   974  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   975  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   976  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   977  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   978  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   979  	{AADJSP, nil, 0, [23]uint8{}},
   980  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   981  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   982  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   983  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   984  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   985  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   986  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   987  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   988  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   989  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   990  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   991  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   992  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   993  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   994  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   995  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   996  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   997  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   998  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   999  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
  1000  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
  1001  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
  1002  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
  1003  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1004  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1005  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1006  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1007  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1008  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1009  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1010  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1011  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1012  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1013  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1014  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1015  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1016  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1017  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1018  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1019  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1020  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1021  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1022  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1023  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1024  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1025  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1026  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1027  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1028  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1029  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1030  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1031  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1032  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1033  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1034  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1035  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1036  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1037  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1038  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1039  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1040  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1041  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1042  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1043  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1044  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1045  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1046  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1047  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1048  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1049  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1050  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1051  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1052  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1053  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1054  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1055  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1056  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1057  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1058  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1059  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1060  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1061  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1062  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1063  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1064  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1065  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1066  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1067  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1068  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1069  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1070  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1071  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1072  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1073  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1074  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1075  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1076  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1077  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1078  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1079  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
  1080  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
  1081  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1082  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1083  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1084  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1085  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1086  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1087  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1088  	{API2FW, ymfp, Px, [23]uint8{0x0c}},
  1089  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1090  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1091  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1092  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1093  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1094  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1095  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1096  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1097  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1098  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1099  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1100  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1101  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1102  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1103  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1104  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1105  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1106  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1107  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1108  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1109  	{obj.ADATA, nil, 0, [23]uint8{}},
  1110  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
  1111  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1112  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1113  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
  1114  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1115  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1116  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1117  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1118  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1119  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1120  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1121  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1122  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1123  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1124  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1125  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1126  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1127  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1128  	{obj.AGLOBL, nil, 0, [23]uint8{}},
  1129  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1130  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1131  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1132  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1133  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1134  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1135  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1136  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1137  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1138  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1139  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1140  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1141  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1142  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1143  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1144  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1145  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1146  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1147  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1148  	{AINT, yint, Px, [23]uint8{0xcd}},
  1149  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1150  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1151  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1152  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1153  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1154  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1155  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1156  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1157  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1158  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1159  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1160  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1161  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1162  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1163  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1164  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1165  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1166  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1167  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1168  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1169  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1170  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1171  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1172  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1173  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1174  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1175  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1176  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1177  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1178  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1179  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1180  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1181  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1182  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1183  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1184  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1185  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1186  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1187  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1188  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1189  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1190  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1191  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1192  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1193  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1194  	{ALONG, ybyte, Px, [23]uint8{4}},
  1195  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1196  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1197  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1198  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1199  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1200  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1201  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1202  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1203  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1204  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1205  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1206  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1207  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1208  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1209  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1210  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1211  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1212  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1213  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1214  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1215  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1216  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1217  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1218  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1219  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1220  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1221  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1222  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1223  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1224  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1225  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1226  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1227  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1228  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1229  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1230  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1231  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1232  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1233  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1234  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1235  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1236  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1237  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1238  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1239  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1240  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1241  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1242  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1243  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1244  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1245  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1246  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1247  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1248  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1249  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1250  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1251  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1252  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1253  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1254  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1255  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1256  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1257  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1258  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1259  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1260  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1261  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1262  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1263  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1264  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1265  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1266  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1267  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1268  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1269  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1270  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1271  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1272  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1273  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1274  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1275  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1276  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1277  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1278  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1279  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1280  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1281  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1282  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1283  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1284  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1285  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1286  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1287  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1288  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1289  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1290  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1291  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1292  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1293  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1294  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1295  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1296  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1297  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1298  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1299  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1300  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1301  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1302  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1303  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1304  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1305  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1306  	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1307  	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1308  	{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1309  	{APFACC, ymfp, Px, [23]uint8{0xae}},
  1310  	{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1311  	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1312  	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1313  	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1314  	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1315  	{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1316  	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1317  	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1318  	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1319  	{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1320  	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1321  	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1322  	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1323  	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1324  	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1325  	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1326  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1327  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1328  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1329  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1330  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1331  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1332  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1333  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1334  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1335  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1336  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1337  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1338  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1339  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1340  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1341  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1342  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1343  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1344  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1345  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1346  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1347  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1348  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1349  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1350  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1351  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1352  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1353  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1354  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1355  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1356  	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1357  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1358  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1359  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1360  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1361  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1362  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1363  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1364  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1365  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1366  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1367  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1368  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1369  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1370  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1371  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1372  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1373  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1374  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1375  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1376  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1377  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1378  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1379  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1380  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1381  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1382  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1383  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1384  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1385  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1386  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1387  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1388  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1389  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1390  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1391  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1392  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1393  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1394  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1395  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1396  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1397  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1398  	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1399  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1400  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1401  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1402  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1403  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1404  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1405  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1406  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1407  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1408  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1409  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1410  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1411  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1412  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1413  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1414  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1415  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1416  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1417  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1418  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1419  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1420  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1421  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1422  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1423  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1424  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1425  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1426  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1427  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1428  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1429  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1430  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1431  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1432  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1433  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1434  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1435  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1436  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1437  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1438  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1439  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1440  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1441  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1442  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1443  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1444  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1445  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1446  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1447  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1448  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1449  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1450  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1451  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1452  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1453  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1454  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1455  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1456  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1457  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1458  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1459  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1460  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1461  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1462  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1463  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1464  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1465  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1466  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1467  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1468  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1469  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1470  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1471  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1472  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1473  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1474  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1475  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1476  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1477  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1478  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1479  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1480  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1481  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1482  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1483  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1484  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1485  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1486  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1487  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1488  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1489  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1490  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1491  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1492  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1493  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1494  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1495  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1496  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1497  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1498  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1499  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1500  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1501  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1502  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1503  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1504  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1505  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1506  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1507  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1508  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1509  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1510  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1511  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1512  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1513  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1514  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1515  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1516  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1517  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1518  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1519  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1520  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1521  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1522  	{AWORD, ybyte, Px, [23]uint8{2}},
  1523  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1524  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1525  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1526  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1527  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1528  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1529  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1531  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1532  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1533  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1534  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1535  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1536  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1537  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1538  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1539  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1540  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1541  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1542  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1543  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1544  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1545  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1546  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1547  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1548  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1549  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1550  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1551  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1552  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1553  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1554  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1555  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1556  	{AFCOMB, nil, 0, [23]uint8{}},
  1557  	{AFCOMBP, nil, 0, [23]uint8{}},
  1558  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1559  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1560  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1561  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1562  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1563  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1564  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1565  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1566  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1567  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1568  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1569  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1570  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1571  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1572  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1573  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1574  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1575  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1576  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1577  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1578  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1579  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1580  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1581  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1582  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1583  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1584  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1585  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1586  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1587  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1588  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1589  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1590  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1591  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1592  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1593  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1594  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1595  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1596  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1597  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1598  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1599  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1600  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1601  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1602  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1603  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1604  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1605  	{AFFREE, nil, 0, [23]uint8{}},
  1606  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1607  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1608  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1609  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1610  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1611  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1612  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1613  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1614  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1615  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1616  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1617  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1618  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1619  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1620  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1621  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1622  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1623  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1624  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1625  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1626  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1627  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1628  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1629  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1630  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1631  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1632  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1633  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1634  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1635  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1636  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1637  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1638  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1639  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1640  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1641  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1642  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1643  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1644  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1645  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1646  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1647  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1648  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1649  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1650  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1651  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1652  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1653  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1654  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1655  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1656  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1657  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1658  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1659  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1660  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1661  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1662  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1663  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1664  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1665  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1666  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1667  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1668  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1669  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1670  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1671  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1672  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1673  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1674  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1675  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1676  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1677  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1678  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1679  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1680  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1681  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1682  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1683  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1684  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1685  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1686  
  1687  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1688  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1689  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1690  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1691  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1692  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1693  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1694  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1695  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1696  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1697  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1698  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1699  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1700  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1701  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1702  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1703  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1704  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1705  
  1706  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1707  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1708  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1709  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1710  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1711  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1712  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1713  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1714  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1715  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1716  
  1717  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1718  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1719  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1720  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1721  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1722  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1723  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1724  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1725  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1726  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1727  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1728  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1729  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1730  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1731  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1732  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1733  	{obj.AEND, nil, 0, [23]uint8{}},
  1734  	{0, nil, 0, [23]uint8{}},
  1735  }
  1736  
  1737  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1738  
  1739  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1740  // This happens on systems like Solaris that call .so functions instead of system calls.
  1741  // It does not seem to be necessary for any other systems. This is probably working
  1742  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1743  // what that bug is. And this does fix it.
  1744  func isextern(s *obj.LSym) bool {
  1745  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1746  	return strings.HasPrefix(s.Name, "libc_")
  1747  }
  1748  
  1749  // single-instruction no-ops of various lengths.
  1750  // constructed by hand and disassembled with gdb to verify.
  1751  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1752  var nop = [][16]uint8{
  1753  	{0x90},
  1754  	{0x66, 0x90},
  1755  	{0x0F, 0x1F, 0x00},
  1756  	{0x0F, 0x1F, 0x40, 0x00},
  1757  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1758  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1759  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1760  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1761  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1762  }
  1763  
  1764  // Native Client rejects the repeated 0x66 prefix.
  1765  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1766  func fillnop(p []byte, n int) {
  1767  	var m int
  1768  
  1769  	for n > 0 {
  1770  		m = n
  1771  		if m > len(nop) {
  1772  			m = len(nop)
  1773  		}
  1774  		copy(p[:m], nop[m-1][:m])
  1775  		p = p[m:]
  1776  		n -= m
  1777  	}
  1778  }
  1779  
  1780  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1781  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1782  	fillnop(s.P[c:], int(pad))
  1783  	return c + pad
  1784  }
  1785  
  1786  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1787  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1788  		return l
  1789  	}
  1790  	return q
  1791  }
  1792  
  1793  func span6(ctxt *obj.Link, s *obj.LSym) {
  1794  	ctxt.Cursym = s
  1795  
  1796  	if s.P != nil {
  1797  		return
  1798  	}
  1799  
  1800  	if ycover[0] == 0 {
  1801  		instinit()
  1802  	}
  1803  
  1804  	var v int32
  1805  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1806  		if p.To.Type == obj.TYPE_BRANCH {
  1807  			if p.Pcond == nil {
  1808  				p.Pcond = p
  1809  			}
  1810  		}
  1811  		if p.As == AADJSP {
  1812  			p.To.Type = obj.TYPE_REG
  1813  			p.To.Reg = REG_SP
  1814  			v = int32(-p.From.Offset)
  1815  			p.From.Offset = int64(v)
  1816  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1817  			if v < 0 {
  1818  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1819  				v = -v
  1820  				p.From.Offset = int64(v)
  1821  			}
  1822  
  1823  			if v == 0 {
  1824  				p.As = obj.ANOP
  1825  			}
  1826  		}
  1827  	}
  1828  
  1829  	var q *obj.Prog
  1830  	for p := s.Text; p != nil; p = p.Link {
  1831  		p.Back = 2 // use short branches first time through
  1832  		q = p.Pcond
  1833  		if q != nil && (q.Back&2 != 0) {
  1834  			p.Back |= 1 // backward jump
  1835  			q.Back |= 4 // loop head
  1836  		}
  1837  
  1838  		if p.As == AADJSP {
  1839  			p.To.Type = obj.TYPE_REG
  1840  			p.To.Reg = REG_SP
  1841  			v = int32(-p.From.Offset)
  1842  			p.From.Offset = int64(v)
  1843  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1844  			if v < 0 {
  1845  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1846  				v = -v
  1847  				p.From.Offset = int64(v)
  1848  			}
  1849  
  1850  			if v == 0 {
  1851  				p.As = obj.ANOP
  1852  			}
  1853  		}
  1854  	}
  1855  
  1856  	n := 0
  1857  	var bp []byte
  1858  	var c int32
  1859  	var i int
  1860  	var loop int32
  1861  	var m int
  1862  	var p *obj.Prog
  1863  	errors := ctxt.Errors
  1864  	for {
  1865  		loop = 0
  1866  		for i = 0; i < len(s.R); i++ {
  1867  			s.R[i] = obj.Reloc{}
  1868  		}
  1869  		s.R = s.R[:0]
  1870  		s.P = s.P[:0]
  1871  		c = 0
  1872  		for p = s.Text; p != nil; p = p.Link {
  1873  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1874  				var deferreturn *obj.LSym
  1875  
  1876  				if deferreturn == nil {
  1877  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1878  				}
  1879  
  1880  				// pad everything to avoid crossing 32-byte boundary
  1881  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1882  					c = naclpad(ctxt, s, c, -c&31)
  1883  				}
  1884  
  1885  				// pad call deferreturn to start at 32-byte boundary
  1886  				// so that subtracting 5 in jmpdefer will jump back
  1887  				// to that boundary and rerun the call.
  1888  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1889  					c = naclpad(ctxt, s, c, -c&31)
  1890  				}
  1891  
  1892  				// pad call to end at 32-byte boundary
  1893  				if p.As == obj.ACALL {
  1894  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1895  				}
  1896  
  1897  				// the linker treats REP and STOSQ as different instructions
  1898  				// but in fact the REP is a prefix on the STOSQ.
  1899  				// make sure REP has room for 2 more bytes, so that
  1900  				// padding will not be inserted before the next instruction.
  1901  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1902  					c = naclpad(ctxt, s, c, -c&31)
  1903  				}
  1904  
  1905  				// same for LOCK.
  1906  				// various instructions follow; the longest is 4 bytes.
  1907  				// give ourselves 8 bytes so as to avoid surprises.
  1908  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1909  					c = naclpad(ctxt, s, c, -c&31)
  1910  				}
  1911  			}
  1912  
  1913  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1914  				// pad with NOPs
  1915  				v = -c & (LoopAlign - 1)
  1916  
  1917  				if v <= MaxLoopPad {
  1918  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1919  					fillnop(s.P[c:], int(v))
  1920  					c += v
  1921  				}
  1922  			}
  1923  
  1924  			p.Pc = int64(c)
  1925  
  1926  			// process forward jumps to p
  1927  			for q = p.Rel; q != nil; q = q.Forwd {
  1928  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1929  				if q.Back&2 != 0 { // short
  1930  					if v > 127 {
  1931  						loop++
  1932  						q.Back ^= 2
  1933  					}
  1934  
  1935  					if q.As == AJCXZL || q.As == AXBEGIN {
  1936  						s.P[q.Pc+2] = byte(v)
  1937  					} else {
  1938  						s.P[q.Pc+1] = byte(v)
  1939  					}
  1940  				} else {
  1941  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1942  					bp[0] = byte(v)
  1943  					bp = bp[1:]
  1944  					bp[0] = byte(v >> 8)
  1945  					bp = bp[1:]
  1946  					bp[0] = byte(v >> 16)
  1947  					bp = bp[1:]
  1948  					bp[0] = byte(v >> 24)
  1949  				}
  1950  			}
  1951  
  1952  			p.Rel = nil
  1953  
  1954  			p.Pc = int64(c)
  1955  			asmins(ctxt, p)
  1956  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1957  			if int(p.Isize) != m {
  1958  				p.Isize = uint8(m)
  1959  				loop++
  1960  			}
  1961  
  1962  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1963  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1964  			p.Mark = uint16(m)
  1965  			c += int32(m)
  1966  		}
  1967  
  1968  		n++
  1969  		if n > 20 {
  1970  			ctxt.Diag("span must be looping")
  1971  			log.Fatalf("loop")
  1972  		}
  1973  		if loop == 0 {
  1974  			break
  1975  		}
  1976  		if ctxt.Errors > errors {
  1977  			return
  1978  		}
  1979  	}
  1980  
  1981  	if ctxt.Headtype == obj.Hnacl {
  1982  		c = naclpad(ctxt, s, c, -c&31)
  1983  	}
  1984  
  1985  	// Pad functions with trap instruction, to catch invalid jumps
  1986  	if c&(FuncAlign-1) != 0 {
  1987  		v = -c & (FuncAlign - 1)
  1988  		obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1989  		for i := c; i < c+v; i++ {
  1990  			// 0xCC is INT $3 - breakpoint instruction
  1991  			s.P[i] = uint8(0xCC)
  1992  		}
  1993  		c += v
  1994  	}
  1995  	s.Size = int64(c)
  1996  
  1997  	if false { /* debug['a'] > 1 */
  1998  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1999  		var i int
  2000  		for i = 0; i < len(s.P); i++ {
  2001  			fmt.Printf(" %.2x", s.P[i])
  2002  			if i%16 == 15 {
  2003  				fmt.Printf("\n  %.6x", uint(i+1))
  2004  			}
  2005  		}
  2006  
  2007  		if i%16 != 0 {
  2008  			fmt.Printf("\n")
  2009  		}
  2010  
  2011  		for i := 0; i < len(s.R); i++ {
  2012  			r := &s.R[i]
  2013  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2014  		}
  2015  	}
  2016  }
  2017  
  2018  func instinit() {
  2019  	var c int
  2020  
  2021  	for i := 1; optab[i].as != 0; i++ {
  2022  		c = int(optab[i].as)
  2023  		if opindex[c&obj.AMask] != nil {
  2024  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  2025  		}
  2026  		opindex[c&obj.AMask] = &optab[i]
  2027  	}
  2028  
  2029  	for i := 0; i < Ymax; i++ {
  2030  		ycover[i*Ymax+i] = 1
  2031  	}
  2032  
  2033  	ycover[Yi0*Ymax+Yi8] = 1
  2034  	ycover[Yi1*Ymax+Yi8] = 1
  2035  	ycover[Yu7*Ymax+Yi8] = 1
  2036  
  2037  	ycover[Yi0*Ymax+Yu7] = 1
  2038  	ycover[Yi1*Ymax+Yu7] = 1
  2039  
  2040  	ycover[Yi0*Ymax+Yu8] = 1
  2041  	ycover[Yi1*Ymax+Yu8] = 1
  2042  	ycover[Yu7*Ymax+Yu8] = 1
  2043  
  2044  	ycover[Yi0*Ymax+Ys32] = 1
  2045  	ycover[Yi1*Ymax+Ys32] = 1
  2046  	ycover[Yu7*Ymax+Ys32] = 1
  2047  	ycover[Yu8*Ymax+Ys32] = 1
  2048  	ycover[Yi8*Ymax+Ys32] = 1
  2049  
  2050  	ycover[Yi0*Ymax+Yi32] = 1
  2051  	ycover[Yi1*Ymax+Yi32] = 1
  2052  	ycover[Yu7*Ymax+Yi32] = 1
  2053  	ycover[Yu8*Ymax+Yi32] = 1
  2054  	ycover[Yi8*Ymax+Yi32] = 1
  2055  	ycover[Ys32*Ymax+Yi32] = 1
  2056  
  2057  	ycover[Yi0*Ymax+Yi64] = 1
  2058  	ycover[Yi1*Ymax+Yi64] = 1
  2059  	ycover[Yu7*Ymax+Yi64] = 1
  2060  	ycover[Yu8*Ymax+Yi64] = 1
  2061  	ycover[Yi8*Ymax+Yi64] = 1
  2062  	ycover[Ys32*Ymax+Yi64] = 1
  2063  	ycover[Yi32*Ymax+Yi64] = 1
  2064  
  2065  	ycover[Yal*Ymax+Yrb] = 1
  2066  	ycover[Ycl*Ymax+Yrb] = 1
  2067  	ycover[Yax*Ymax+Yrb] = 1
  2068  	ycover[Ycx*Ymax+Yrb] = 1
  2069  	ycover[Yrx*Ymax+Yrb] = 1
  2070  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2071  
  2072  	ycover[Ycl*Ymax+Ycx] = 1
  2073  
  2074  	ycover[Yax*Ymax+Yrx] = 1
  2075  	ycover[Ycx*Ymax+Yrx] = 1
  2076  
  2077  	ycover[Yax*Ymax+Yrl] = 1
  2078  	ycover[Ycx*Ymax+Yrl] = 1
  2079  	ycover[Yrx*Ymax+Yrl] = 1
  2080  	ycover[Yrl32*Ymax+Yrl] = 1
  2081  
  2082  	ycover[Yf0*Ymax+Yrf] = 1
  2083  
  2084  	ycover[Yal*Ymax+Ymb] = 1
  2085  	ycover[Ycl*Ymax+Ymb] = 1
  2086  	ycover[Yax*Ymax+Ymb] = 1
  2087  	ycover[Ycx*Ymax+Ymb] = 1
  2088  	ycover[Yrx*Ymax+Ymb] = 1
  2089  	ycover[Yrb*Ymax+Ymb] = 1
  2090  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2091  	ycover[Ym*Ymax+Ymb] = 1
  2092  
  2093  	ycover[Yax*Ymax+Yml] = 1
  2094  	ycover[Ycx*Ymax+Yml] = 1
  2095  	ycover[Yrx*Ymax+Yml] = 1
  2096  	ycover[Yrl*Ymax+Yml] = 1
  2097  	ycover[Yrl32*Ymax+Yml] = 1
  2098  	ycover[Ym*Ymax+Yml] = 1
  2099  
  2100  	ycover[Yax*Ymax+Ymm] = 1
  2101  	ycover[Ycx*Ymax+Ymm] = 1
  2102  	ycover[Yrx*Ymax+Ymm] = 1
  2103  	ycover[Yrl*Ymax+Ymm] = 1
  2104  	ycover[Yrl32*Ymax+Ymm] = 1
  2105  	ycover[Ym*Ymax+Ymm] = 1
  2106  	ycover[Ymr*Ymax+Ymm] = 1
  2107  
  2108  	ycover[Ym*Ymax+Yxm] = 1
  2109  	ycover[Yxr*Ymax+Yxm] = 1
  2110  
  2111  	ycover[Ym*Ymax+Yym] = 1
  2112  	ycover[Yyr*Ymax+Yym] = 1
  2113  
  2114  	for i := 0; i < MAXREG; i++ {
  2115  		reg[i] = -1
  2116  		if i >= REG_AL && i <= REG_R15B {
  2117  			reg[i] = (i - REG_AL) & 7
  2118  			if i >= REG_SPB && i <= REG_DIB {
  2119  				regrex[i] = 0x40
  2120  			}
  2121  			if i >= REG_R8B && i <= REG_R15B {
  2122  				regrex[i] = Rxr | Rxx | Rxb
  2123  			}
  2124  		}
  2125  
  2126  		if i >= REG_AH && i <= REG_BH {
  2127  			reg[i] = 4 + ((i - REG_AH) & 7)
  2128  		}
  2129  		if i >= REG_AX && i <= REG_R15 {
  2130  			reg[i] = (i - REG_AX) & 7
  2131  			if i >= REG_R8 {
  2132  				regrex[i] = Rxr | Rxx | Rxb
  2133  			}
  2134  		}
  2135  
  2136  		if i >= REG_F0 && i <= REG_F0+7 {
  2137  			reg[i] = (i - REG_F0) & 7
  2138  		}
  2139  		if i >= REG_M0 && i <= REG_M0+7 {
  2140  			reg[i] = (i - REG_M0) & 7
  2141  		}
  2142  		if i >= REG_X0 && i <= REG_X0+15 {
  2143  			reg[i] = (i - REG_X0) & 7
  2144  			if i >= REG_X0+8 {
  2145  				regrex[i] = Rxr | Rxx | Rxb
  2146  			}
  2147  		}
  2148  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2149  			reg[i] = (i - REG_Y0) & 7
  2150  			if i >= REG_Y0+8 {
  2151  				regrex[i] = Rxr | Rxx | Rxb
  2152  			}
  2153  		}
  2154  
  2155  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2156  			regrex[i] = Rxr
  2157  		}
  2158  	}
  2159  }
  2160  
  2161  var isAndroid = (obj.Getgoos() == "android")
  2162  
  2163  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2164  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2165  		return 0
  2166  	}
  2167  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2168  		switch a.Reg {
  2169  		case REG_CS:
  2170  			return 0x2e
  2171  
  2172  		case REG_DS:
  2173  			return 0x3e
  2174  
  2175  		case REG_ES:
  2176  			return 0x26
  2177  
  2178  		case REG_FS:
  2179  			return 0x64
  2180  
  2181  		case REG_GS:
  2182  			return 0x65
  2183  
  2184  		case REG_TLS:
  2185  			// NOTE: Systems listed here should be only systems that
  2186  			// support direct TLS references like 8(TLS) implemented as
  2187  			// direct references from FS or GS. Systems that require
  2188  			// the initial-exec model, where you load the TLS base into
  2189  			// a register and then index from that register, do not reach
  2190  			// this code and should not be listed.
  2191  			if p.Mode == 32 {
  2192  				switch ctxt.Headtype {
  2193  				default:
  2194  					if isAndroid {
  2195  						return 0x65 // GS
  2196  					}
  2197  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2198  
  2199  				case obj.Hdarwin,
  2200  					obj.Hdragonfly,
  2201  					obj.Hfreebsd,
  2202  					obj.Hnetbsd,
  2203  					obj.Hopenbsd:
  2204  					return 0x65 // GS
  2205  				}
  2206  			}
  2207  
  2208  			switch ctxt.Headtype {
  2209  			default:
  2210  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2211  
  2212  			case obj.Hlinux:
  2213  				if isAndroid {
  2214  					return 0x64 // FS
  2215  				}
  2216  
  2217  				if ctxt.Flag_shared != 0 {
  2218  					log.Fatalf("unknown TLS base register for linux with -shared")
  2219  				} else {
  2220  					return 0x64 // FS
  2221  				}
  2222  
  2223  			case obj.Hdragonfly,
  2224  				obj.Hfreebsd,
  2225  				obj.Hnetbsd,
  2226  				obj.Hopenbsd,
  2227  				obj.Hsolaris:
  2228  				return 0x64 // FS
  2229  
  2230  			case obj.Hdarwin:
  2231  				return 0x65 // GS
  2232  			}
  2233  		}
  2234  	}
  2235  
  2236  	if p.Mode == 32 {
  2237  		if a.Index == REG_TLS && ctxt.Flag_shared != 0 {
  2238  			// When building for inclusion into a shared library, an instruction of the form
  2239  			//     MOVL 0(CX)(TLS*1), AX
  2240  			// becomes
  2241  			//     mov %gs:(%ecx), %eax
  2242  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2243  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2244  			// a shared library the instruction it becomes
  2245  			//     mov 0x0(%ecx), $eax
  2246  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2247  			if a.Offset != 0 {
  2248  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2249  			}
  2250  			return 0x65 // GS
  2251  		}
  2252  		return 0
  2253  	}
  2254  
  2255  	switch a.Index {
  2256  	case REG_CS:
  2257  		return 0x2e
  2258  
  2259  	case REG_DS:
  2260  		return 0x3e
  2261  
  2262  	case REG_ES:
  2263  		return 0x26
  2264  
  2265  	case REG_TLS:
  2266  		if ctxt.Flag_shared != 0 {
  2267  			// When building for inclusion into a shared library, an instruction of the form
  2268  			//     MOV 0(CX)(TLS*1), AX
  2269  			// becomes
  2270  			//     mov %fs:(%rcx), %rax
  2271  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2272  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2273  			// a shared library the instruction does not require a prefix.
  2274  			if a.Offset != 0 {
  2275  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2276  			}
  2277  			return 0x64
  2278  		}
  2279  
  2280  	case REG_FS:
  2281  		return 0x64
  2282  
  2283  	case REG_GS:
  2284  		return 0x65
  2285  	}
  2286  
  2287  	return 0
  2288  }
  2289  
  2290  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2291  	switch a.Type {
  2292  	case obj.TYPE_NONE:
  2293  		return Ynone
  2294  
  2295  	case obj.TYPE_BRANCH:
  2296  		return Ybr
  2297  
  2298  	case obj.TYPE_INDIR:
  2299  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2300  			return Yindir
  2301  		}
  2302  		return Yxxx
  2303  
  2304  	case obj.TYPE_MEM:
  2305  		if a.Name != obj.NAME_NONE {
  2306  			if ctxt.Asmode == 64 && (a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0) {
  2307  				return Yxxx
  2308  			}
  2309  		}
  2310  		return Ym
  2311  
  2312  	case obj.TYPE_ADDR:
  2313  		switch a.Name {
  2314  		case obj.NAME_GOTREF:
  2315  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2316  			return Yxxx
  2317  
  2318  		case obj.NAME_EXTERN,
  2319  			obj.NAME_STATIC:
  2320  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && ctxt.Flag_shared == 0) {
  2321  				return Yi32
  2322  			}
  2323  			return Yiauto // use pc-relative addressing
  2324  
  2325  		case obj.NAME_AUTO,
  2326  			obj.NAME_PARAM:
  2327  			return Yiauto
  2328  		}
  2329  
  2330  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2331  		// and got Yi32 in an earlier version of this code.
  2332  		// Keep doing that until we fix yduff etc.
  2333  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2334  			return Yi32
  2335  		}
  2336  
  2337  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2338  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2339  		}
  2340  		fallthrough
  2341  
  2342  		// fall through
  2343  
  2344  	case obj.TYPE_CONST:
  2345  		if a.Sym != nil {
  2346  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2347  		}
  2348  
  2349  		v := a.Offset
  2350  		if p.Mode == 32 {
  2351  			v = int64(int32(v))
  2352  		}
  2353  		if v == 0 {
  2354  			return Yi0
  2355  		}
  2356  		if v == 1 {
  2357  			return Yi1
  2358  		}
  2359  		if v >= 0 && v <= 127 {
  2360  			return Yu7
  2361  		}
  2362  		if v >= 0 && v <= 255 {
  2363  			return Yu8
  2364  		}
  2365  		if v >= -128 && v <= 127 {
  2366  			return Yi8
  2367  		}
  2368  		if p.Mode == 32 {
  2369  			return Yi32
  2370  		}
  2371  		l := int32(v)
  2372  		if int64(l) == v {
  2373  			return Ys32 /* can sign extend */
  2374  		}
  2375  		if v>>32 == 0 {
  2376  			return Yi32 /* unsigned */
  2377  		}
  2378  		return Yi64
  2379  
  2380  	case obj.TYPE_TEXTSIZE:
  2381  		return Ytextsize
  2382  	}
  2383  
  2384  	if a.Type != obj.TYPE_REG {
  2385  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2386  		return Yxxx
  2387  	}
  2388  
  2389  	switch a.Reg {
  2390  	case REG_AL:
  2391  		return Yal
  2392  
  2393  	case REG_AX:
  2394  		return Yax
  2395  
  2396  		/*
  2397  			case REG_SPB:
  2398  		*/
  2399  	case REG_BPB,
  2400  		REG_SIB,
  2401  		REG_DIB,
  2402  		REG_R8B,
  2403  		REG_R9B,
  2404  		REG_R10B,
  2405  		REG_R11B,
  2406  		REG_R12B,
  2407  		REG_R13B,
  2408  		REG_R14B,
  2409  		REG_R15B:
  2410  		if ctxt.Asmode != 64 {
  2411  			return Yxxx
  2412  		}
  2413  		fallthrough
  2414  
  2415  	case REG_DL,
  2416  		REG_BL,
  2417  		REG_AH,
  2418  		REG_CH,
  2419  		REG_DH,
  2420  		REG_BH:
  2421  		return Yrb
  2422  
  2423  	case REG_CL:
  2424  		return Ycl
  2425  
  2426  	case REG_CX:
  2427  		return Ycx
  2428  
  2429  	case REG_DX, REG_BX:
  2430  		return Yrx
  2431  
  2432  	case REG_R8, /* not really Yrl */
  2433  		REG_R9,
  2434  		REG_R10,
  2435  		REG_R11,
  2436  		REG_R12,
  2437  		REG_R13,
  2438  		REG_R14,
  2439  		REG_R15:
  2440  		if ctxt.Asmode != 64 {
  2441  			return Yxxx
  2442  		}
  2443  		fallthrough
  2444  
  2445  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2446  		if p.Mode == 32 {
  2447  			return Yrl32
  2448  		}
  2449  		return Yrl
  2450  
  2451  	case REG_F0 + 0:
  2452  		return Yf0
  2453  
  2454  	case REG_F0 + 1,
  2455  		REG_F0 + 2,
  2456  		REG_F0 + 3,
  2457  		REG_F0 + 4,
  2458  		REG_F0 + 5,
  2459  		REG_F0 + 6,
  2460  		REG_F0 + 7:
  2461  		return Yrf
  2462  
  2463  	case REG_M0 + 0,
  2464  		REG_M0 + 1,
  2465  		REG_M0 + 2,
  2466  		REG_M0 + 3,
  2467  		REG_M0 + 4,
  2468  		REG_M0 + 5,
  2469  		REG_M0 + 6,
  2470  		REG_M0 + 7:
  2471  		return Ymr
  2472  
  2473  	case REG_X0 + 0,
  2474  		REG_X0 + 1,
  2475  		REG_X0 + 2,
  2476  		REG_X0 + 3,
  2477  		REG_X0 + 4,
  2478  		REG_X0 + 5,
  2479  		REG_X0 + 6,
  2480  		REG_X0 + 7,
  2481  		REG_X0 + 8,
  2482  		REG_X0 + 9,
  2483  		REG_X0 + 10,
  2484  		REG_X0 + 11,
  2485  		REG_X0 + 12,
  2486  		REG_X0 + 13,
  2487  		REG_X0 + 14,
  2488  		REG_X0 + 15:
  2489  		return Yxr
  2490  
  2491  	case REG_Y0 + 0,
  2492  		REG_Y0 + 1,
  2493  		REG_Y0 + 2,
  2494  		REG_Y0 + 3,
  2495  		REG_Y0 + 4,
  2496  		REG_Y0 + 5,
  2497  		REG_Y0 + 6,
  2498  		REG_Y0 + 7,
  2499  		REG_Y0 + 8,
  2500  		REG_Y0 + 9,
  2501  		REG_Y0 + 10,
  2502  		REG_Y0 + 11,
  2503  		REG_Y0 + 12,
  2504  		REG_Y0 + 13,
  2505  		REG_Y0 + 14,
  2506  		REG_Y0 + 15:
  2507  		return Yyr
  2508  
  2509  	case REG_CS:
  2510  		return Ycs
  2511  	case REG_SS:
  2512  		return Yss
  2513  	case REG_DS:
  2514  		return Yds
  2515  	case REG_ES:
  2516  		return Yes
  2517  	case REG_FS:
  2518  		return Yfs
  2519  	case REG_GS:
  2520  		return Ygs
  2521  	case REG_TLS:
  2522  		return Ytls
  2523  
  2524  	case REG_GDTR:
  2525  		return Ygdtr
  2526  	case REG_IDTR:
  2527  		return Yidtr
  2528  	case REG_LDTR:
  2529  		return Yldtr
  2530  	case REG_MSW:
  2531  		return Ymsw
  2532  	case REG_TASK:
  2533  		return Ytask
  2534  
  2535  	case REG_CR + 0:
  2536  		return Ycr0
  2537  	case REG_CR + 1:
  2538  		return Ycr1
  2539  	case REG_CR + 2:
  2540  		return Ycr2
  2541  	case REG_CR + 3:
  2542  		return Ycr3
  2543  	case REG_CR + 4:
  2544  		return Ycr4
  2545  	case REG_CR + 5:
  2546  		return Ycr5
  2547  	case REG_CR + 6:
  2548  		return Ycr6
  2549  	case REG_CR + 7:
  2550  		return Ycr7
  2551  	case REG_CR + 8:
  2552  		return Ycr8
  2553  
  2554  	case REG_DR + 0:
  2555  		return Ydr0
  2556  	case REG_DR + 1:
  2557  		return Ydr1
  2558  	case REG_DR + 2:
  2559  		return Ydr2
  2560  	case REG_DR + 3:
  2561  		return Ydr3
  2562  	case REG_DR + 4:
  2563  		return Ydr4
  2564  	case REG_DR + 5:
  2565  		return Ydr5
  2566  	case REG_DR + 6:
  2567  		return Ydr6
  2568  	case REG_DR + 7:
  2569  		return Ydr7
  2570  
  2571  	case REG_TR + 0:
  2572  		return Ytr0
  2573  	case REG_TR + 1:
  2574  		return Ytr1
  2575  	case REG_TR + 2:
  2576  		return Ytr2
  2577  	case REG_TR + 3:
  2578  		return Ytr3
  2579  	case REG_TR + 4:
  2580  		return Ytr4
  2581  	case REG_TR + 5:
  2582  		return Ytr5
  2583  	case REG_TR + 6:
  2584  		return Ytr6
  2585  	case REG_TR + 7:
  2586  		return Ytr7
  2587  	}
  2588  
  2589  	return Yxxx
  2590  }
  2591  
  2592  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2593  	var i int
  2594  
  2595  	switch index {
  2596  	default:
  2597  		goto bad
  2598  
  2599  	case REG_NONE:
  2600  		i = 4 << 3
  2601  		goto bas
  2602  
  2603  	case REG_R8,
  2604  		REG_R9,
  2605  		REG_R10,
  2606  		REG_R11,
  2607  		REG_R12,
  2608  		REG_R13,
  2609  		REG_R14,
  2610  		REG_R15:
  2611  		if ctxt.Asmode != 64 {
  2612  			goto bad
  2613  		}
  2614  		fallthrough
  2615  
  2616  	case REG_AX,
  2617  		REG_CX,
  2618  		REG_DX,
  2619  		REG_BX,
  2620  		REG_BP,
  2621  		REG_SI,
  2622  		REG_DI:
  2623  		i = reg[index] << 3
  2624  	}
  2625  
  2626  	switch scale {
  2627  	default:
  2628  		goto bad
  2629  
  2630  	case 1:
  2631  		break
  2632  
  2633  	case 2:
  2634  		i |= 1 << 6
  2635  
  2636  	case 4:
  2637  		i |= 2 << 6
  2638  
  2639  	case 8:
  2640  		i |= 3 << 6
  2641  	}
  2642  
  2643  bas:
  2644  	switch base {
  2645  	default:
  2646  		goto bad
  2647  
  2648  	case REG_NONE: /* must be mod=00 */
  2649  		i |= 5
  2650  
  2651  	case REG_R8,
  2652  		REG_R9,
  2653  		REG_R10,
  2654  		REG_R11,
  2655  		REG_R12,
  2656  		REG_R13,
  2657  		REG_R14,
  2658  		REG_R15:
  2659  		if ctxt.Asmode != 64 {
  2660  			goto bad
  2661  		}
  2662  		fallthrough
  2663  
  2664  	case REG_AX,
  2665  		REG_CX,
  2666  		REG_DX,
  2667  		REG_BX,
  2668  		REG_SP,
  2669  		REG_BP,
  2670  		REG_SI,
  2671  		REG_DI:
  2672  		i |= reg[base]
  2673  	}
  2674  
  2675  	ctxt.Andptr[0] = byte(i)
  2676  	ctxt.Andptr = ctxt.Andptr[1:]
  2677  	return
  2678  
  2679  bad:
  2680  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2681  	ctxt.Andptr[0] = 0
  2682  	ctxt.Andptr = ctxt.Andptr[1:]
  2683  	return
  2684  }
  2685  
  2686  func put4(ctxt *obj.Link, v int32) {
  2687  	ctxt.Andptr[0] = byte(v)
  2688  	ctxt.Andptr[1] = byte(v >> 8)
  2689  	ctxt.Andptr[2] = byte(v >> 16)
  2690  	ctxt.Andptr[3] = byte(v >> 24)
  2691  	ctxt.Andptr = ctxt.Andptr[4:]
  2692  }
  2693  
  2694  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2695  	var rel obj.Reloc
  2696  
  2697  	v := vaddr(ctxt, p, a, &rel)
  2698  	if rel.Siz != 0 {
  2699  		if rel.Siz != 4 {
  2700  			ctxt.Diag("bad reloc")
  2701  		}
  2702  		r := obj.Addrel(ctxt.Cursym)
  2703  		*r = rel
  2704  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2705  	}
  2706  
  2707  	put4(ctxt, int32(v))
  2708  }
  2709  
  2710  func put8(ctxt *obj.Link, v int64) {
  2711  	ctxt.Andptr[0] = byte(v)
  2712  	ctxt.Andptr[1] = byte(v >> 8)
  2713  	ctxt.Andptr[2] = byte(v >> 16)
  2714  	ctxt.Andptr[3] = byte(v >> 24)
  2715  	ctxt.Andptr[4] = byte(v >> 32)
  2716  	ctxt.Andptr[5] = byte(v >> 40)
  2717  	ctxt.Andptr[6] = byte(v >> 48)
  2718  	ctxt.Andptr[7] = byte(v >> 56)
  2719  	ctxt.Andptr = ctxt.Andptr[8:]
  2720  }
  2721  
  2722  /*
  2723  static void
  2724  relput8(Prog *p, Addr *a)
  2725  {
  2726  	vlong v;
  2727  	Reloc rel, *r;
  2728  
  2729  	v = vaddr(ctxt, p, a, &rel);
  2730  	if(rel.siz != 0) {
  2731  		r = addrel(ctxt->cursym);
  2732  		*r = rel;
  2733  		r->siz = 8;
  2734  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2735  	}
  2736  	put8(ctxt, v);
  2737  }
  2738  */
  2739  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2740  	if r != nil {
  2741  		*r = obj.Reloc{}
  2742  	}
  2743  
  2744  	switch a.Name {
  2745  	case obj.NAME_STATIC,
  2746  		obj.NAME_GOTREF,
  2747  		obj.NAME_EXTERN:
  2748  		s := a.Sym
  2749  		if r == nil {
  2750  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2751  			log.Fatalf("reloc")
  2752  		}
  2753  
  2754  		if a.Name == obj.NAME_GOTREF {
  2755  			r.Siz = 4
  2756  			r.Type = obj.R_GOTPCREL
  2757  		} else if isextern(s) || (p.Mode != 64 && ctxt.Flag_shared == 0) {
  2758  			r.Siz = 4
  2759  			r.Type = obj.R_ADDR
  2760  		} else {
  2761  			r.Siz = 4
  2762  			r.Type = obj.R_PCREL
  2763  		}
  2764  
  2765  		r.Off = -1 // caller must fill in
  2766  		r.Sym = s
  2767  		r.Add = a.Offset
  2768  
  2769  		return 0
  2770  	}
  2771  
  2772  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2773  		if r == nil {
  2774  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2775  			log.Fatalf("reloc")
  2776  		}
  2777  
  2778  		if ctxt.Flag_shared == 0 || isAndroid {
  2779  			r.Type = obj.R_TLS_LE
  2780  			r.Siz = 4
  2781  			r.Off = -1 // caller must fill in
  2782  			r.Add = a.Offset
  2783  		}
  2784  		return 0
  2785  	}
  2786  
  2787  	return a.Offset
  2788  }
  2789  
  2790  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2791  	var base int
  2792  	var rel obj.Reloc
  2793  
  2794  	rex &= 0x40 | Rxr
  2795  	v := int32(a.Offset)
  2796  	rel.Siz = 0
  2797  
  2798  	switch a.Type {
  2799  	case obj.TYPE_ADDR:
  2800  		if a.Name == obj.NAME_NONE {
  2801  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2802  		}
  2803  		if a.Index == REG_TLS {
  2804  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2805  		}
  2806  		goto bad
  2807  
  2808  	case obj.TYPE_REG:
  2809  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2810  			goto bad
  2811  		}
  2812  		if v != 0 {
  2813  			goto bad
  2814  		}
  2815  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2816  		ctxt.Andptr = ctxt.Andptr[1:]
  2817  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2818  		return
  2819  	}
  2820  
  2821  	if a.Type != obj.TYPE_MEM {
  2822  		goto bad
  2823  	}
  2824  
  2825  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2826  		base := int(a.Reg)
  2827  		switch a.Name {
  2828  		case obj.NAME_EXTERN,
  2829  			obj.NAME_GOTREF,
  2830  			obj.NAME_STATIC:
  2831  			if !isextern(a.Sym) && p.Mode == 64 {
  2832  				goto bad
  2833  			}
  2834  			if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2835  				base = REG_CX
  2836  			} else {
  2837  				base = REG_NONE
  2838  			}
  2839  			v = int32(vaddr(ctxt, p, a, &rel))
  2840  
  2841  		case obj.NAME_AUTO,
  2842  			obj.NAME_PARAM:
  2843  			base = REG_SP
  2844  		}
  2845  
  2846  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2847  		if base == REG_NONE {
  2848  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2849  			ctxt.Andptr = ctxt.Andptr[1:]
  2850  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2851  			goto putrelv
  2852  		}
  2853  
  2854  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2855  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2856  			ctxt.Andptr = ctxt.Andptr[1:]
  2857  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2858  			return
  2859  		}
  2860  
  2861  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2862  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2863  			ctxt.Andptr = ctxt.Andptr[1:]
  2864  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2865  			ctxt.Andptr[0] = byte(v)
  2866  			ctxt.Andptr = ctxt.Andptr[1:]
  2867  			return
  2868  		}
  2869  
  2870  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2871  		ctxt.Andptr = ctxt.Andptr[1:]
  2872  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2873  		goto putrelv
  2874  	}
  2875  
  2876  	base = int(a.Reg)
  2877  	switch a.Name {
  2878  	case obj.NAME_STATIC,
  2879  		obj.NAME_GOTREF,
  2880  		obj.NAME_EXTERN:
  2881  		if a.Sym == nil {
  2882  			ctxt.Diag("bad addr: %v", p)
  2883  		}
  2884  		if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2885  			base = REG_CX
  2886  		} else {
  2887  			base = REG_NONE
  2888  		}
  2889  		v = int32(vaddr(ctxt, p, a, &rel))
  2890  
  2891  	case obj.NAME_AUTO,
  2892  		obj.NAME_PARAM:
  2893  		base = REG_SP
  2894  	}
  2895  
  2896  	if base == REG_TLS {
  2897  		v = int32(vaddr(ctxt, p, a, &rel))
  2898  	}
  2899  
  2900  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2901  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2902  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2903  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2904  				ctxt.Diag("%v has offset against gotref", p)
  2905  			}
  2906  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2907  			ctxt.Andptr = ctxt.Andptr[1:]
  2908  			goto putrelv
  2909  		}
  2910  
  2911  		/* temporary */
  2912  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2913  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2914  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2915  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2916  		goto putrelv
  2917  	}
  2918  
  2919  	if base == REG_SP || base == REG_R12 {
  2920  		if v == 0 {
  2921  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2922  			ctxt.Andptr = ctxt.Andptr[1:]
  2923  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2924  			return
  2925  		}
  2926  
  2927  		if v >= -128 && v < 128 {
  2928  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2929  			ctxt.Andptr = ctxt.Andptr[1:]
  2930  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2931  			ctxt.Andptr[0] = byte(v)
  2932  			ctxt.Andptr = ctxt.Andptr[1:]
  2933  			return
  2934  		}
  2935  
  2936  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2937  		ctxt.Andptr = ctxt.Andptr[1:]
  2938  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2939  		goto putrelv
  2940  	}
  2941  
  2942  	if REG_AX <= base && base <= REG_R15 {
  2943  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2944  			rel = obj.Reloc{}
  2945  			rel.Type = obj.R_TLS_LE
  2946  			rel.Siz = 4
  2947  			rel.Sym = nil
  2948  			rel.Add = int64(v)
  2949  			v = 0
  2950  		}
  2951  
  2952  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2953  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2954  			ctxt.Andptr = ctxt.Andptr[1:]
  2955  			return
  2956  		}
  2957  
  2958  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2959  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2960  			ctxt.Andptr[1] = byte(v)
  2961  			ctxt.Andptr = ctxt.Andptr[2:]
  2962  			return
  2963  		}
  2964  
  2965  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2966  		ctxt.Andptr = ctxt.Andptr[1:]
  2967  		goto putrelv
  2968  	}
  2969  
  2970  	goto bad
  2971  
  2972  putrelv:
  2973  	if rel.Siz != 0 {
  2974  		if rel.Siz != 4 {
  2975  			ctxt.Diag("bad rel")
  2976  			goto bad
  2977  		}
  2978  
  2979  		r := obj.Addrel(ctxt.Cursym)
  2980  		*r = rel
  2981  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2982  	}
  2983  
  2984  	put4(ctxt, v)
  2985  	return
  2986  
  2987  bad:
  2988  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2989  	return
  2990  }
  2991  
  2992  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2993  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2994  }
  2995  
  2996  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2997  	asmandsz(ctxt, p, a, o, 0, 0)
  2998  }
  2999  
  3000  func bytereg(a *obj.Addr, t *uint8) {
  3001  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3002  		a.Reg += REG_AL - REG_AX
  3003  		*t = 0
  3004  	}
  3005  }
  3006  
  3007  func unbytereg(a *obj.Addr, t *uint8) {
  3008  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3009  		a.Reg += REG_AX - REG_AL
  3010  		*t = 0
  3011  	}
  3012  }
  3013  
  3014  const (
  3015  	E = 0xff
  3016  )
  3017  
  3018  var ymovtab = []Movtab{
  3019  	/* push */
  3020  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  3021  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  3022  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  3023  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  3024  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3025  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3026  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  3027  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  3028  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  3029  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  3030  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  3031  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  3032  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  3033  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  3034  
  3035  	/* pop */
  3036  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  3037  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  3038  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  3039  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3040  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3041  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  3042  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  3043  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  3044  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  3045  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  3046  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3047  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3048  
  3049  	/* mov seg */
  3050  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3051  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3052  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3053  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3054  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3055  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3056  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3057  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3058  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3059  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3060  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3061  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3062  
  3063  	/* mov cr */
  3064  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3065  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3066  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3067  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3068  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3069  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3070  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3071  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3072  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3073  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3074  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3075  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3076  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3077  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3078  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3079  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3080  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3081  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3082  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3083  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3084  
  3085  	/* mov dr */
  3086  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3087  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3088  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3089  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3090  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3091  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3092  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3093  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3094  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3095  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3096  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3097  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3098  
  3099  	/* mov tr */
  3100  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3101  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3102  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3103  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3104  
  3105  	/* lgdt, sgdt, lidt, sidt */
  3106  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3107  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3108  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3109  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3110  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3111  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3112  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3113  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3114  
  3115  	/* lldt, sldt */
  3116  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3117  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3118  
  3119  	/* lmsw, smsw */
  3120  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3121  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3122  
  3123  	/* ltr, str */
  3124  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3125  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3126  
  3127  	/* load full pointer - unsupported
  3128  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3129  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3130  	*/
  3131  
  3132  	/* double shift */
  3133  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3134  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3135  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3136  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3137  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3138  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3139  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3140  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3141  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3142  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3143  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3144  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3145  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3146  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3147  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3148  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3149  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3150  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3151  
  3152  	/* load TLS base */
  3153  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3154  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3155  	{0, 0, 0, 0, 0, [4]uint8{}},
  3156  }
  3157  
  3158  func isax(a *obj.Addr) bool {
  3159  	switch a.Reg {
  3160  	case REG_AX, REG_AL, REG_AH:
  3161  		return true
  3162  	}
  3163  
  3164  	if a.Index == REG_AX {
  3165  		return true
  3166  	}
  3167  	return false
  3168  }
  3169  
  3170  func subreg(p *obj.Prog, from int, to int) {
  3171  	if false { /* debug['Q'] */
  3172  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3173  	}
  3174  
  3175  	if int(p.From.Reg) == from {
  3176  		p.From.Reg = int16(to)
  3177  		p.Ft = 0
  3178  	}
  3179  
  3180  	if int(p.To.Reg) == from {
  3181  		p.To.Reg = int16(to)
  3182  		p.Tt = 0
  3183  	}
  3184  
  3185  	if int(p.From.Index) == from {
  3186  		p.From.Index = int16(to)
  3187  		p.Ft = 0
  3188  	}
  3189  
  3190  	if int(p.To.Index) == from {
  3191  		p.To.Index = int16(to)
  3192  		p.Tt = 0
  3193  	}
  3194  
  3195  	if false { /* debug['Q'] */
  3196  		fmt.Printf("%v\n", p)
  3197  	}
  3198  }
  3199  
  3200  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3201  	switch op {
  3202  	case Pm, Pe, Pf2, Pf3:
  3203  		if osize != 1 {
  3204  			if op != Pm {
  3205  				ctxt.Andptr[0] = byte(op)
  3206  				ctxt.Andptr = ctxt.Andptr[1:]
  3207  			}
  3208  			ctxt.Andptr[0] = Pm
  3209  			ctxt.Andptr = ctxt.Andptr[1:]
  3210  			z++
  3211  			op = int(o.op[z])
  3212  			break
  3213  		}
  3214  		fallthrough
  3215  
  3216  	default:
  3217  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  3218  			ctxt.Andptr[0] = Pm
  3219  			ctxt.Andptr = ctxt.Andptr[1:]
  3220  		}
  3221  	}
  3222  
  3223  	ctxt.Andptr[0] = byte(op)
  3224  	ctxt.Andptr = ctxt.Andptr[1:]
  3225  	return z
  3226  }
  3227  
  3228  var bpduff1 = []byte{
  3229  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3230  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3231  }
  3232  
  3233  var bpduff2 = []byte{
  3234  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3235  }
  3236  
  3237  // Emit VEX prefix and opcode byte.
  3238  // The three addresses are the r/m, vvvv, and reg fields.
  3239  // The reg and rm arguments appear in the same order as the
  3240  // arguments to asmand, which typically follows the call to asmvex.
  3241  // The final two arguments are the VEX prefix (see encoding above)
  3242  // and the opcode byte.
  3243  // For details about vex prefix see:
  3244  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3245  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3246  	ctxt.Vexflag = 1
  3247  	rexR := regrex[r.Reg] & Rxr
  3248  	rexB := regrex[rm.Reg] & Rxb
  3249  	rexX := regrex[rm.Index] & Rxx
  3250  	vexM := (vex >> 3) & 0xF
  3251  	vexWLP := vex & 0x87
  3252  	vexV := byte(0)
  3253  	if v != nil {
  3254  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3255  	}
  3256  	vexV ^= 0xF
  3257  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3258  		// Can use 2-byte encoding.
  3259  		ctxt.Andptr[0] = 0xc5
  3260  		ctxt.Andptr[1] = byte(rexR<<5) ^ 0x80 | vexV<<3 | vexWLP
  3261  		ctxt.Andptr = ctxt.Andptr[2:]
  3262  	} else {
  3263  		// Must use 3-byte encoding.
  3264  		ctxt.Andptr[0] = 0xc4
  3265  		ctxt.Andptr[1] = (byte(rexR|rexX|rexB) << 5) ^ 0xE0 | vexM
  3266  		ctxt.Andptr[2] = vexV<<3 | vexWLP
  3267  		ctxt.Andptr = ctxt.Andptr[3:]
  3268  	}
  3269  	ctxt.Andptr[0] = opcode
  3270  	ctxt.Andptr = ctxt.Andptr[1:]
  3271  }
  3272  
  3273  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3274  	ctxt.Curp = p // TODO
  3275  
  3276  	o := opindex[p.As&obj.AMask]
  3277  
  3278  	if o == nil {
  3279  		ctxt.Diag("asmins: missing op %v", p)
  3280  		return
  3281  	}
  3282  
  3283  	pre := prefixof(ctxt, p, &p.From)
  3284  	if pre != 0 {
  3285  		ctxt.Andptr[0] = byte(pre)
  3286  		ctxt.Andptr = ctxt.Andptr[1:]
  3287  	}
  3288  	pre = prefixof(ctxt, p, &p.To)
  3289  	if pre != 0 {
  3290  		ctxt.Andptr[0] = byte(pre)
  3291  		ctxt.Andptr = ctxt.Andptr[1:]
  3292  	}
  3293  
  3294  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3295  	// which encodes as SHRQ $32(DX*0), AX.
  3296  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3297  	// Change encoding generated by assemblers and compilers and remove.
  3298  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3299  		p.From3 = new(obj.Addr)
  3300  		p.From3.Type = obj.TYPE_REG
  3301  		p.From3.Reg = p.From.Index
  3302  		p.From.Index = 0
  3303  	}
  3304  
  3305  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3306  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3307  	switch p.As {
  3308  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3309  		if p.From3Type() == obj.TYPE_NONE {
  3310  			p.From3 = new(obj.Addr)
  3311  			*p.From3 = p.From
  3312  			p.From = obj.Addr{}
  3313  			p.From.Type = obj.TYPE_CONST
  3314  			p.From.Offset = p.To.Offset
  3315  			p.To.Offset = 0
  3316  		}
  3317  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3318  		if p.From3Type() == obj.TYPE_NONE {
  3319  			p.From3 = new(obj.Addr)
  3320  			*p.From3 = p.To
  3321  			p.To = obj.Addr{}
  3322  			p.To.Type = obj.TYPE_CONST
  3323  			p.To.Offset = p.From3.Offset
  3324  			p.From3.Offset = 0
  3325  		}
  3326  	}
  3327  
  3328  	if p.Ft == 0 {
  3329  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3330  	}
  3331  	if p.Tt == 0 {
  3332  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3333  	}
  3334  
  3335  	ft := int(p.Ft) * Ymax
  3336  	f3t := Ynone * Ymax
  3337  	if p.From3 != nil {
  3338  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3339  	}
  3340  	tt := int(p.Tt) * Ymax
  3341  
  3342  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3343  	z := 0
  3344  	var a *obj.Addr
  3345  	var l int
  3346  	var op int
  3347  	var q *obj.Prog
  3348  	var r *obj.Reloc
  3349  	var rel obj.Reloc
  3350  	var v int64
  3351  	for i := range o.ytab {
  3352  		yt := &o.ytab[i]
  3353  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3354  			switch o.prefix {
  3355  			case Px1: /* first option valid only in 32-bit mode */
  3356  				if ctxt.Mode == 64 && z == 0 {
  3357  					z += int(yt.zoffset) + xo
  3358  					continue
  3359  				}
  3360  			case Pq: /* 16 bit escape and opcode escape */
  3361  				ctxt.Andptr[0] = Pe
  3362  				ctxt.Andptr = ctxt.Andptr[1:]
  3363  
  3364  				ctxt.Andptr[0] = Pm
  3365  				ctxt.Andptr = ctxt.Andptr[1:]
  3366  
  3367  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3368  				ctxt.Rexflag |= Pw
  3369  				ctxt.Andptr[0] = Pe
  3370  				ctxt.Andptr = ctxt.Andptr[1:]
  3371  				ctxt.Andptr[0] = Pm
  3372  				ctxt.Andptr = ctxt.Andptr[1:]
  3373  
  3374  			case Pq4: /*  66 0F 38 */
  3375  				ctxt.Andptr[0] = 0x66
  3376  				ctxt.Andptr[1] = 0x0F
  3377  				ctxt.Andptr[2] = 0x38
  3378  				ctxt.Andptr = ctxt.Andptr[3:]
  3379  
  3380  			case Pf2, /* xmm opcode escape */
  3381  				Pf3:
  3382  				ctxt.Andptr[0] = byte(o.prefix)
  3383  				ctxt.Andptr = ctxt.Andptr[1:]
  3384  
  3385  				ctxt.Andptr[0] = Pm
  3386  				ctxt.Andptr = ctxt.Andptr[1:]
  3387  
  3388  			case Pef3:
  3389  				ctxt.Andptr[0] = Pe
  3390  				ctxt.Andptr = ctxt.Andptr[1:]
  3391  				ctxt.Andptr[0] = Pf3
  3392  				ctxt.Andptr = ctxt.Andptr[1:]
  3393  				ctxt.Andptr[0] = Pm
  3394  				ctxt.Andptr = ctxt.Andptr[1:]
  3395  
  3396  			case Pfw: /* xmm opcode escape + REX.W */
  3397  				ctxt.Rexflag |= Pw
  3398  				ctxt.Andptr[0] = Pf3
  3399  				ctxt.Andptr = ctxt.Andptr[1:]
  3400  				ctxt.Andptr[0] = Pm
  3401  				ctxt.Andptr = ctxt.Andptr[1:]
  3402  
  3403  			case Pm: /* opcode escape */
  3404  				ctxt.Andptr[0] = Pm
  3405  				ctxt.Andptr = ctxt.Andptr[1:]
  3406  
  3407  			case Pe: /* 16 bit escape */
  3408  				ctxt.Andptr[0] = Pe
  3409  				ctxt.Andptr = ctxt.Andptr[1:]
  3410  
  3411  			case Pw: /* 64-bit escape */
  3412  				if p.Mode != 64 {
  3413  					ctxt.Diag("asmins: illegal 64: %v", p)
  3414  				}
  3415  				ctxt.Rexflag |= Pw
  3416  
  3417  			case Pw8: /* 64-bit escape if z >= 8 */
  3418  				if z >= 8 {
  3419  					if p.Mode != 64 {
  3420  						ctxt.Diag("asmins: illegal 64: %v", p)
  3421  					}
  3422  					ctxt.Rexflag |= Pw
  3423  				}
  3424  
  3425  			case Pb: /* botch */
  3426  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3427  					goto bad
  3428  				}
  3429  				// NOTE(rsc): This is probably safe to do always,
  3430  				// but when enabled it chooses different encodings
  3431  				// than the old cmd/internal/obj/i386 code did,
  3432  				// which breaks our "same bits out" checks.
  3433  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3434  				// in the original obj/i386, and it would encode
  3435  				// (using a valid, shorter form) as 3c 00 if we enabled
  3436  				// the call to bytereg here.
  3437  				if p.Mode == 64 {
  3438  					bytereg(&p.From, &p.Ft)
  3439  					bytereg(&p.To, &p.Tt)
  3440  				}
  3441  
  3442  			case P32: /* 32 bit but illegal if 64-bit mode */
  3443  				if p.Mode == 64 {
  3444  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3445  				}
  3446  
  3447  			case Py: /* 64-bit only, no prefix */
  3448  				if p.Mode != 64 {
  3449  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3450  				}
  3451  
  3452  			case Py1: /* 64-bit only if z < 1, no prefix */
  3453  				if z < 1 && p.Mode != 64 {
  3454  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3455  				}
  3456  
  3457  			case Py3: /* 64-bit only if z < 3, no prefix */
  3458  				if z < 3 && p.Mode != 64 {
  3459  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3460  				}
  3461  			}
  3462  
  3463  			if z >= len(o.op) {
  3464  				log.Fatalf("asmins bad table %v", p)
  3465  			}
  3466  			op = int(o.op[z])
  3467  			if op == 0x0f {
  3468  				ctxt.Andptr[0] = byte(op)
  3469  				ctxt.Andptr = ctxt.Andptr[1:]
  3470  				z++
  3471  				op = int(o.op[z])
  3472  			}
  3473  
  3474  			switch yt.zcase {
  3475  			default:
  3476  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3477  				return
  3478  
  3479  			case Zpseudo:
  3480  				break
  3481  
  3482  			case Zlit:
  3483  				for ; ; z++ {
  3484  					op = int(o.op[z])
  3485  					if op == 0 {
  3486  						break
  3487  					}
  3488  					ctxt.Andptr[0] = byte(op)
  3489  					ctxt.Andptr = ctxt.Andptr[1:]
  3490  				}
  3491  
  3492  			case Zlitm_r:
  3493  				for ; ; z++ {
  3494  					op = int(o.op[z])
  3495  					if op == 0 {
  3496  						break
  3497  					}
  3498  					ctxt.Andptr[0] = byte(op)
  3499  					ctxt.Andptr = ctxt.Andptr[1:]
  3500  				}
  3501  				asmand(ctxt, p, &p.From, &p.To)
  3502  
  3503  			case Zmb_r:
  3504  				bytereg(&p.From, &p.Ft)
  3505  				fallthrough
  3506  
  3507  				/* fall through */
  3508  			case Zm_r:
  3509  				ctxt.Andptr[0] = byte(op)
  3510  				ctxt.Andptr = ctxt.Andptr[1:]
  3511  
  3512  				asmand(ctxt, p, &p.From, &p.To)
  3513  
  3514  			case Zm2_r:
  3515  				ctxt.Andptr[0] = byte(op)
  3516  				ctxt.Andptr = ctxt.Andptr[1:]
  3517  				ctxt.Andptr[0] = byte(o.op[z+1])
  3518  				ctxt.Andptr = ctxt.Andptr[1:]
  3519  				asmand(ctxt, p, &p.From, &p.To)
  3520  
  3521  			case Zm_r_xm:
  3522  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3523  				asmand(ctxt, p, &p.From, &p.To)
  3524  
  3525  			case Zm_r_xm_nr:
  3526  				ctxt.Rexflag = 0
  3527  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3528  				asmand(ctxt, p, &p.From, &p.To)
  3529  
  3530  			case Zm_r_i_xm:
  3531  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3532  				asmand(ctxt, p, &p.From, p.From3)
  3533  				ctxt.Andptr[0] = byte(p.To.Offset)
  3534  				ctxt.Andptr = ctxt.Andptr[1:]
  3535  
  3536  			case Zm_r_3d:
  3537  				ctxt.Andptr[0] = 0x0f
  3538  				ctxt.Andptr = ctxt.Andptr[1:]
  3539  				ctxt.Andptr[0] = 0x0f
  3540  				ctxt.Andptr = ctxt.Andptr[1:]
  3541  				asmand(ctxt, p, &p.From, &p.To)
  3542  				ctxt.Andptr[0] = byte(op)
  3543  				ctxt.Andptr = ctxt.Andptr[1:]
  3544  
  3545  			case Zibm_r, Zibr_m:
  3546  				for {
  3547  					tmp1 := z
  3548  					z++
  3549  					op = int(o.op[tmp1])
  3550  					if op == 0 {
  3551  						break
  3552  					}
  3553  					ctxt.Andptr[0] = byte(op)
  3554  					ctxt.Andptr = ctxt.Andptr[1:]
  3555  				}
  3556  				if yt.zcase == Zibr_m {
  3557  					asmand(ctxt, p, &p.To, p.From3)
  3558  				} else {
  3559  					asmand(ctxt, p, p.From3, &p.To)
  3560  				}
  3561  				ctxt.Andptr[0] = byte(p.From.Offset)
  3562  				ctxt.Andptr = ctxt.Andptr[1:]
  3563  
  3564  			case Zaut_r:
  3565  				ctxt.Andptr[0] = 0x8d
  3566  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3567  				if p.From.Type != obj.TYPE_ADDR {
  3568  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3569  				}
  3570  				p.From.Type = obj.TYPE_MEM
  3571  				asmand(ctxt, p, &p.From, &p.To)
  3572  				p.From.Type = obj.TYPE_ADDR
  3573  
  3574  			case Zm_o:
  3575  				ctxt.Andptr[0] = byte(op)
  3576  				ctxt.Andptr = ctxt.Andptr[1:]
  3577  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3578  
  3579  			case Zr_m:
  3580  				ctxt.Andptr[0] = byte(op)
  3581  				ctxt.Andptr = ctxt.Andptr[1:]
  3582  				asmand(ctxt, p, &p.To, &p.From)
  3583  
  3584  			case Zvex_rm_v_r:
  3585  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3586  				asmand(ctxt, p, &p.From, &p.To)
  3587  
  3588  			case Zvex_v_rm_r:
  3589  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3590  				asmand(ctxt, p, p.From3, &p.To)
  3591  
  3592  			case Zvex_r_v_rm:
  3593  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3594  				asmand(ctxt, p, &p.To, &p.From)
  3595  
  3596  			case Zr_m_xm:
  3597  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3598  				asmand(ctxt, p, &p.To, &p.From)
  3599  
  3600  			case Zr_m_xm_nr:
  3601  				ctxt.Rexflag = 0
  3602  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3603  				asmand(ctxt, p, &p.To, &p.From)
  3604  
  3605  			case Zo_m:
  3606  				ctxt.Andptr[0] = byte(op)
  3607  				ctxt.Andptr = ctxt.Andptr[1:]
  3608  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3609  
  3610  			case Zcallindreg:
  3611  				r = obj.Addrel(ctxt.Cursym)
  3612  				r.Off = int32(p.Pc)
  3613  				r.Type = obj.R_CALLIND
  3614  				r.Siz = 0
  3615  				fallthrough
  3616  
  3617  			case Zo_m64:
  3618  				ctxt.Andptr[0] = byte(op)
  3619  				ctxt.Andptr = ctxt.Andptr[1:]
  3620  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3621  
  3622  			case Zm_ibo:
  3623  				ctxt.Andptr[0] = byte(op)
  3624  				ctxt.Andptr = ctxt.Andptr[1:]
  3625  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3626  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3627  				ctxt.Andptr = ctxt.Andptr[1:]
  3628  
  3629  			case Zibo_m:
  3630  				ctxt.Andptr[0] = byte(op)
  3631  				ctxt.Andptr = ctxt.Andptr[1:]
  3632  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3633  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3634  				ctxt.Andptr = ctxt.Andptr[1:]
  3635  
  3636  			case Zibo_m_xm:
  3637  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3638  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3639  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3640  				ctxt.Andptr = ctxt.Andptr[1:]
  3641  
  3642  			case Z_ib, Zib_:
  3643  				if yt.zcase == Zib_ {
  3644  					a = &p.From
  3645  				} else {
  3646  					a = &p.To
  3647  				}
  3648  				ctxt.Andptr[0] = byte(op)
  3649  				ctxt.Andptr = ctxt.Andptr[1:]
  3650  				if p.As == AXABORT {
  3651  					ctxt.Andptr[0] = byte(o.op[z+1])
  3652  					ctxt.Andptr = ctxt.Andptr[1:]
  3653  				}
  3654  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3655  				ctxt.Andptr = ctxt.Andptr[1:]
  3656  
  3657  			case Zib_rp:
  3658  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3659  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3660  				ctxt.Andptr = ctxt.Andptr[1:]
  3661  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3662  				ctxt.Andptr = ctxt.Andptr[1:]
  3663  
  3664  			case Zil_rp:
  3665  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3666  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3667  				ctxt.Andptr = ctxt.Andptr[1:]
  3668  				if o.prefix == Pe {
  3669  					v = vaddr(ctxt, p, &p.From, nil)
  3670  					ctxt.Andptr[0] = byte(v)
  3671  					ctxt.Andptr = ctxt.Andptr[1:]
  3672  					ctxt.Andptr[0] = byte(v >> 8)
  3673  					ctxt.Andptr = ctxt.Andptr[1:]
  3674  				} else {
  3675  					relput4(ctxt, p, &p.From)
  3676  				}
  3677  
  3678  			case Zo_iw:
  3679  				ctxt.Andptr[0] = byte(op)
  3680  				ctxt.Andptr = ctxt.Andptr[1:]
  3681  				if p.From.Type != obj.TYPE_NONE {
  3682  					v = vaddr(ctxt, p, &p.From, nil)
  3683  					ctxt.Andptr[0] = byte(v)
  3684  					ctxt.Andptr = ctxt.Andptr[1:]
  3685  					ctxt.Andptr[0] = byte(v >> 8)
  3686  					ctxt.Andptr = ctxt.Andptr[1:]
  3687  				}
  3688  
  3689  			case Ziq_rp:
  3690  				v = vaddr(ctxt, p, &p.From, &rel)
  3691  				l = int(v >> 32)
  3692  				if l == 0 && rel.Siz != 8 {
  3693  					//p->mark |= 0100;
  3694  					//print("zero: %llux %v\n", v, p);
  3695  					ctxt.Rexflag &^= (0x40 | Rxw)
  3696  
  3697  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3698  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3699  					ctxt.Andptr = ctxt.Andptr[1:]
  3700  					if rel.Type != 0 {
  3701  						r = obj.Addrel(ctxt.Cursym)
  3702  						*r = rel
  3703  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3704  					}
  3705  
  3706  					put4(ctxt, int32(v))
  3707  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3708  
  3709  					//p->mark |= 0100;
  3710  					//print("sign: %llux %v\n", v, p);
  3711  					ctxt.Andptr[0] = 0xc7
  3712  					ctxt.Andptr = ctxt.Andptr[1:]
  3713  
  3714  					asmando(ctxt, p, &p.To, 0)
  3715  					put4(ctxt, int32(v)) /* need all 8 */
  3716  				} else {
  3717  					//print("all: %llux %v\n", v, p);
  3718  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3719  
  3720  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3721  					ctxt.Andptr = ctxt.Andptr[1:]
  3722  					if rel.Type != 0 {
  3723  						r = obj.Addrel(ctxt.Cursym)
  3724  						*r = rel
  3725  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3726  					}
  3727  
  3728  					put8(ctxt, v)
  3729  				}
  3730  
  3731  			case Zib_rr:
  3732  				ctxt.Andptr[0] = byte(op)
  3733  				ctxt.Andptr = ctxt.Andptr[1:]
  3734  				asmand(ctxt, p, &p.To, &p.To)
  3735  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3736  				ctxt.Andptr = ctxt.Andptr[1:]
  3737  
  3738  			case Z_il, Zil_:
  3739  				if yt.zcase == Zil_ {
  3740  					a = &p.From
  3741  				} else {
  3742  					a = &p.To
  3743  				}
  3744  				ctxt.Andptr[0] = byte(op)
  3745  				ctxt.Andptr = ctxt.Andptr[1:]
  3746  				if o.prefix == Pe {
  3747  					v = vaddr(ctxt, p, a, nil)
  3748  					ctxt.Andptr[0] = byte(v)
  3749  					ctxt.Andptr = ctxt.Andptr[1:]
  3750  					ctxt.Andptr[0] = byte(v >> 8)
  3751  					ctxt.Andptr = ctxt.Andptr[1:]
  3752  				} else {
  3753  					relput4(ctxt, p, a)
  3754  				}
  3755  
  3756  			case Zm_ilo, Zilo_m:
  3757  				ctxt.Andptr[0] = byte(op)
  3758  				ctxt.Andptr = ctxt.Andptr[1:]
  3759  				if yt.zcase == Zilo_m {
  3760  					a = &p.From
  3761  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3762  				} else {
  3763  					a = &p.To
  3764  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3765  				}
  3766  
  3767  				if o.prefix == Pe {
  3768  					v = vaddr(ctxt, p, a, nil)
  3769  					ctxt.Andptr[0] = byte(v)
  3770  					ctxt.Andptr = ctxt.Andptr[1:]
  3771  					ctxt.Andptr[0] = byte(v >> 8)
  3772  					ctxt.Andptr = ctxt.Andptr[1:]
  3773  				} else {
  3774  					relput4(ctxt, p, a)
  3775  				}
  3776  
  3777  			case Zil_rr:
  3778  				ctxt.Andptr[0] = byte(op)
  3779  				ctxt.Andptr = ctxt.Andptr[1:]
  3780  				asmand(ctxt, p, &p.To, &p.To)
  3781  				if o.prefix == Pe {
  3782  					v = vaddr(ctxt, p, &p.From, nil)
  3783  					ctxt.Andptr[0] = byte(v)
  3784  					ctxt.Andptr = ctxt.Andptr[1:]
  3785  					ctxt.Andptr[0] = byte(v >> 8)
  3786  					ctxt.Andptr = ctxt.Andptr[1:]
  3787  				} else {
  3788  					relput4(ctxt, p, &p.From)
  3789  				}
  3790  
  3791  			case Z_rp:
  3792  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3793  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3794  				ctxt.Andptr = ctxt.Andptr[1:]
  3795  
  3796  			case Zrp_:
  3797  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3798  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3799  				ctxt.Andptr = ctxt.Andptr[1:]
  3800  
  3801  			case Zclr:
  3802  				ctxt.Rexflag &^= Pw
  3803  				ctxt.Andptr[0] = byte(op)
  3804  				ctxt.Andptr = ctxt.Andptr[1:]
  3805  				asmand(ctxt, p, &p.To, &p.To)
  3806  
  3807  			case Zcallcon, Zjmpcon:
  3808  				if yt.zcase == Zcallcon {
  3809  					ctxt.Andptr[0] = byte(op)
  3810  					ctxt.Andptr = ctxt.Andptr[1:]
  3811  				} else {
  3812  					ctxt.Andptr[0] = byte(o.op[z+1])
  3813  					ctxt.Andptr = ctxt.Andptr[1:]
  3814  				}
  3815  				r = obj.Addrel(ctxt.Cursym)
  3816  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3817  				r.Type = obj.R_PCREL
  3818  				r.Siz = 4
  3819  				r.Add = p.To.Offset
  3820  				put4(ctxt, 0)
  3821  
  3822  			case Zcallind:
  3823  				ctxt.Andptr[0] = byte(op)
  3824  				ctxt.Andptr = ctxt.Andptr[1:]
  3825  				ctxt.Andptr[0] = byte(o.op[z+1])
  3826  				ctxt.Andptr = ctxt.Andptr[1:]
  3827  				r = obj.Addrel(ctxt.Cursym)
  3828  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3829  				r.Type = obj.R_ADDR
  3830  				r.Siz = 4
  3831  				r.Add = p.To.Offset
  3832  				r.Sym = p.To.Sym
  3833  				put4(ctxt, 0)
  3834  
  3835  			case Zcall, Zcallduff:
  3836  				if p.To.Sym == nil {
  3837  					ctxt.Diag("call without target")
  3838  					log.Fatalf("bad code")
  3839  				}
  3840  
  3841  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3842  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3843  				}
  3844  
  3845  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3846  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3847  					// (the call jumps into the middle of the function).
  3848  					// This makes it possible to see call sites for duffcopy/duffzero in
  3849  					// BP-based profiling tools like Linux perf (which is the
  3850  					// whole point of obj.Framepointer_enabled).
  3851  					// MOVQ BP, -16(SP)
  3852  					// LEAQ -16(SP), BP
  3853  					copy(ctxt.Andptr, bpduff1)
  3854  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3855  				}
  3856  				ctxt.Andptr[0] = byte(op)
  3857  				ctxt.Andptr = ctxt.Andptr[1:]
  3858  				r = obj.Addrel(ctxt.Cursym)
  3859  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3860  				r.Sym = p.To.Sym
  3861  				r.Add = p.To.Offset
  3862  				r.Type = obj.R_CALL
  3863  				r.Siz = 4
  3864  				put4(ctxt, 0)
  3865  
  3866  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3867  					// Pop BP pushed above.
  3868  					// MOVQ 0(BP), BP
  3869  					copy(ctxt.Andptr, bpduff2)
  3870  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3871  				}
  3872  
  3873  			// TODO: jump across functions needs reloc
  3874  			case Zbr, Zjmp, Zloop:
  3875  				if p.As == AXBEGIN {
  3876  					ctxt.Andptr[0] = byte(op)
  3877  					ctxt.Andptr = ctxt.Andptr[1:]
  3878  				}
  3879  				if p.To.Sym != nil {
  3880  					if yt.zcase != Zjmp {
  3881  						ctxt.Diag("branch to ATEXT")
  3882  						log.Fatalf("bad code")
  3883  					}
  3884  
  3885  					ctxt.Andptr[0] = byte(o.op[z+1])
  3886  					ctxt.Andptr = ctxt.Andptr[1:]
  3887  					r = obj.Addrel(ctxt.Cursym)
  3888  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3889  					r.Sym = p.To.Sym
  3890  					r.Type = obj.R_PCREL
  3891  					r.Siz = 4
  3892  					put4(ctxt, 0)
  3893  					break
  3894  				}
  3895  
  3896  				// Assumes q is in this function.
  3897  				// TODO: Check in input, preserve in brchain.
  3898  
  3899  				// Fill in backward jump now.
  3900  				q = p.Pcond
  3901  
  3902  				if q == nil {
  3903  					ctxt.Diag("jmp/branch/loop without target")
  3904  					log.Fatalf("bad code")
  3905  				}
  3906  
  3907  				if p.Back&1 != 0 {
  3908  					v = q.Pc - (p.Pc + 2)
  3909  					if v >= -128 && p.As != AXBEGIN {
  3910  						if p.As == AJCXZL {
  3911  							ctxt.Andptr[0] = 0x67
  3912  							ctxt.Andptr = ctxt.Andptr[1:]
  3913  						}
  3914  						ctxt.Andptr[0] = byte(op)
  3915  						ctxt.Andptr = ctxt.Andptr[1:]
  3916  						ctxt.Andptr[0] = byte(v)
  3917  						ctxt.Andptr = ctxt.Andptr[1:]
  3918  					} else if yt.zcase == Zloop {
  3919  						ctxt.Diag("loop too far: %v", p)
  3920  					} else {
  3921  						v -= 5 - 2
  3922  						if p.As == AXBEGIN {
  3923  							v--
  3924  						}
  3925  						if yt.zcase == Zbr {
  3926  							ctxt.Andptr[0] = 0x0f
  3927  							ctxt.Andptr = ctxt.Andptr[1:]
  3928  							v--
  3929  						}
  3930  
  3931  						ctxt.Andptr[0] = byte(o.op[z+1])
  3932  						ctxt.Andptr = ctxt.Andptr[1:]
  3933  						ctxt.Andptr[0] = byte(v)
  3934  						ctxt.Andptr = ctxt.Andptr[1:]
  3935  						ctxt.Andptr[0] = byte(v >> 8)
  3936  						ctxt.Andptr = ctxt.Andptr[1:]
  3937  						ctxt.Andptr[0] = byte(v >> 16)
  3938  						ctxt.Andptr = ctxt.Andptr[1:]
  3939  						ctxt.Andptr[0] = byte(v >> 24)
  3940  						ctxt.Andptr = ctxt.Andptr[1:]
  3941  					}
  3942  
  3943  					break
  3944  				}
  3945  
  3946  				// Annotate target; will fill in later.
  3947  				p.Forwd = q.Rel
  3948  
  3949  				q.Rel = p
  3950  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3951  					if p.As == AJCXZL {
  3952  						ctxt.Andptr[0] = 0x67
  3953  						ctxt.Andptr = ctxt.Andptr[1:]
  3954  					}
  3955  					ctxt.Andptr[0] = byte(op)
  3956  					ctxt.Andptr = ctxt.Andptr[1:]
  3957  					ctxt.Andptr[0] = 0
  3958  					ctxt.Andptr = ctxt.Andptr[1:]
  3959  				} else if yt.zcase == Zloop {
  3960  					ctxt.Diag("loop too far: %v", p)
  3961  				} else {
  3962  					if yt.zcase == Zbr {
  3963  						ctxt.Andptr[0] = 0x0f
  3964  						ctxt.Andptr = ctxt.Andptr[1:]
  3965  					}
  3966  					ctxt.Andptr[0] = byte(o.op[z+1])
  3967  					ctxt.Andptr = ctxt.Andptr[1:]
  3968  					ctxt.Andptr[0] = 0
  3969  					ctxt.Andptr = ctxt.Andptr[1:]
  3970  					ctxt.Andptr[0] = 0
  3971  					ctxt.Andptr = ctxt.Andptr[1:]
  3972  					ctxt.Andptr[0] = 0
  3973  					ctxt.Andptr = ctxt.Andptr[1:]
  3974  					ctxt.Andptr[0] = 0
  3975  					ctxt.Andptr = ctxt.Andptr[1:]
  3976  				}
  3977  
  3978  				break
  3979  
  3980  			/*
  3981  				v = q->pc - p->pc - 2;
  3982  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3983  					*ctxt->andptr++ = op;
  3984  					*ctxt->andptr++ = v;
  3985  				} else {
  3986  					v -= 5-2;
  3987  					if(yt.zcase == Zbr) {
  3988  						*ctxt->andptr++ = 0x0f;
  3989  						v--;
  3990  					}
  3991  					*ctxt->andptr++ = o->op[z+1];
  3992  					*ctxt->andptr++ = v;
  3993  					*ctxt->andptr++ = v>>8;
  3994  					*ctxt->andptr++ = v>>16;
  3995  					*ctxt->andptr++ = v>>24;
  3996  				}
  3997  			*/
  3998  
  3999  			case Zbyte:
  4000  				v = vaddr(ctxt, p, &p.From, &rel)
  4001  				if rel.Siz != 0 {
  4002  					rel.Siz = uint8(op)
  4003  					r = obj.Addrel(ctxt.Cursym)
  4004  					*r = rel
  4005  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4006  				}
  4007  
  4008  				ctxt.Andptr[0] = byte(v)
  4009  				ctxt.Andptr = ctxt.Andptr[1:]
  4010  				if op > 1 {
  4011  					ctxt.Andptr[0] = byte(v >> 8)
  4012  					ctxt.Andptr = ctxt.Andptr[1:]
  4013  					if op > 2 {
  4014  						ctxt.Andptr[0] = byte(v >> 16)
  4015  						ctxt.Andptr = ctxt.Andptr[1:]
  4016  						ctxt.Andptr[0] = byte(v >> 24)
  4017  						ctxt.Andptr = ctxt.Andptr[1:]
  4018  						if op > 4 {
  4019  							ctxt.Andptr[0] = byte(v >> 32)
  4020  							ctxt.Andptr = ctxt.Andptr[1:]
  4021  							ctxt.Andptr[0] = byte(v >> 40)
  4022  							ctxt.Andptr = ctxt.Andptr[1:]
  4023  							ctxt.Andptr[0] = byte(v >> 48)
  4024  							ctxt.Andptr = ctxt.Andptr[1:]
  4025  							ctxt.Andptr[0] = byte(v >> 56)
  4026  							ctxt.Andptr = ctxt.Andptr[1:]
  4027  						}
  4028  					}
  4029  				}
  4030  			}
  4031  
  4032  			return
  4033  		}
  4034  		z += int(yt.zoffset) + xo
  4035  	}
  4036  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4037  		var pp obj.Prog
  4038  		var t []byte
  4039  		if p.As == mo[0].as {
  4040  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4041  				t = mo[0].op[:]
  4042  				switch mo[0].code {
  4043  				default:
  4044  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4045  
  4046  				case 0: /* lit */
  4047  					for z = 0; t[z] != E; z++ {
  4048  						ctxt.Andptr[0] = t[z]
  4049  						ctxt.Andptr = ctxt.Andptr[1:]
  4050  					}
  4051  
  4052  				case 1: /* r,m */
  4053  					ctxt.Andptr[0] = t[0]
  4054  					ctxt.Andptr = ctxt.Andptr[1:]
  4055  
  4056  					asmando(ctxt, p, &p.To, int(t[1]))
  4057  
  4058  				case 2: /* m,r */
  4059  					ctxt.Andptr[0] = t[0]
  4060  					ctxt.Andptr = ctxt.Andptr[1:]
  4061  
  4062  					asmando(ctxt, p, &p.From, int(t[1]))
  4063  
  4064  				case 3: /* r,m - 2op */
  4065  					ctxt.Andptr[0] = t[0]
  4066  					ctxt.Andptr = ctxt.Andptr[1:]
  4067  
  4068  					ctxt.Andptr[0] = t[1]
  4069  					ctxt.Andptr = ctxt.Andptr[1:]
  4070  					asmando(ctxt, p, &p.To, int(t[2]))
  4071  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4072  
  4073  				case 4: /* m,r - 2op */
  4074  					ctxt.Andptr[0] = t[0]
  4075  					ctxt.Andptr = ctxt.Andptr[1:]
  4076  
  4077  					ctxt.Andptr[0] = t[1]
  4078  					ctxt.Andptr = ctxt.Andptr[1:]
  4079  					asmando(ctxt, p, &p.From, int(t[2]))
  4080  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4081  
  4082  				case 5: /* load full pointer, trash heap */
  4083  					if t[0] != 0 {
  4084  						ctxt.Andptr[0] = t[0]
  4085  						ctxt.Andptr = ctxt.Andptr[1:]
  4086  					}
  4087  					switch p.To.Index {
  4088  					default:
  4089  						goto bad
  4090  
  4091  					case REG_DS:
  4092  						ctxt.Andptr[0] = 0xc5
  4093  						ctxt.Andptr = ctxt.Andptr[1:]
  4094  
  4095  					case REG_SS:
  4096  						ctxt.Andptr[0] = 0x0f
  4097  						ctxt.Andptr = ctxt.Andptr[1:]
  4098  						ctxt.Andptr[0] = 0xb2
  4099  						ctxt.Andptr = ctxt.Andptr[1:]
  4100  
  4101  					case REG_ES:
  4102  						ctxt.Andptr[0] = 0xc4
  4103  						ctxt.Andptr = ctxt.Andptr[1:]
  4104  
  4105  					case REG_FS:
  4106  						ctxt.Andptr[0] = 0x0f
  4107  						ctxt.Andptr = ctxt.Andptr[1:]
  4108  						ctxt.Andptr[0] = 0xb4
  4109  						ctxt.Andptr = ctxt.Andptr[1:]
  4110  
  4111  					case REG_GS:
  4112  						ctxt.Andptr[0] = 0x0f
  4113  						ctxt.Andptr = ctxt.Andptr[1:]
  4114  						ctxt.Andptr[0] = 0xb5
  4115  						ctxt.Andptr = ctxt.Andptr[1:]
  4116  					}
  4117  
  4118  					asmand(ctxt, p, &p.From, &p.To)
  4119  
  4120  				case 6: /* double shift */
  4121  					if t[0] == Pw {
  4122  						if p.Mode != 64 {
  4123  							ctxt.Diag("asmins: illegal 64: %v", p)
  4124  						}
  4125  						ctxt.Rexflag |= Pw
  4126  						t = t[1:]
  4127  					} else if t[0] == Pe {
  4128  						ctxt.Andptr[0] = Pe
  4129  						ctxt.Andptr = ctxt.Andptr[1:]
  4130  						t = t[1:]
  4131  					}
  4132  
  4133  					switch p.From.Type {
  4134  					default:
  4135  						goto bad
  4136  
  4137  					case obj.TYPE_CONST:
  4138  						ctxt.Andptr[0] = 0x0f
  4139  						ctxt.Andptr = ctxt.Andptr[1:]
  4140  						ctxt.Andptr[0] = t[0]
  4141  						ctxt.Andptr = ctxt.Andptr[1:]
  4142  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4143  						ctxt.Andptr[0] = byte(p.From.Offset)
  4144  						ctxt.Andptr = ctxt.Andptr[1:]
  4145  
  4146  					case obj.TYPE_REG:
  4147  						switch p.From.Reg {
  4148  						default:
  4149  							goto bad
  4150  
  4151  						case REG_CL, REG_CX:
  4152  							ctxt.Andptr[0] = 0x0f
  4153  							ctxt.Andptr = ctxt.Andptr[1:]
  4154  							ctxt.Andptr[0] = t[1]
  4155  							ctxt.Andptr = ctxt.Andptr[1:]
  4156  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  4157  						}
  4158  					}
  4159  
  4160  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4161  				// where you load the TLS base register into a register and then index off that
  4162  				// register to access the actual TLS variables. Systems that allow direct TLS access
  4163  				// are handled in prefixof above and should not be listed here.
  4164  				case 7: /* mov tls, r */
  4165  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  4166  						ctxt.Diag("invalid load of TLS: %v", p)
  4167  					}
  4168  
  4169  					if p.Mode == 32 {
  4170  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4171  						// where you load the TLS base register into a register and then index off that
  4172  						// register to access the actual TLS variables. Systems that allow direct TLS access
  4173  						// are handled in prefixof above and should not be listed here.
  4174  						switch ctxt.Headtype {
  4175  						default:
  4176  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4177  
  4178  						case obj.Hlinux,
  4179  							obj.Hnacl:
  4180  							if ctxt.Flag_shared != 0 {
  4181  								// Note that this is not generating the same insns as the other cases.
  4182  								//     MOV TLS, R_to
  4183  								// becomes
  4184  								//     call __x86.get_pc_thunk.cx
  4185  								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
  4186  								// which is encoded as
  4187  								//     call __x86.get_pc_thunk.cx
  4188  								//     movq 0(%ecx), R_to
  4189  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4190  								// is g, which we can't check here, but will when we assemble the second
  4191  								// instruction.
  4192  								ctxt.Andptr[0] = 0xe8
  4193  								ctxt.Andptr = ctxt.Andptr[1:]
  4194  								r = obj.Addrel(ctxt.Cursym)
  4195  								r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4196  								r.Type = obj.R_CALL
  4197  								r.Siz = 4
  4198  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
  4199  								put4(ctxt, 0)
  4200  
  4201  								ctxt.Andptr[0] = 0x8B
  4202  								ctxt.Andptr = ctxt.Andptr[1:]
  4203  								ctxt.Andptr[0] = byte(2<<6 | reg[REG_CX] | (reg[p.To.Reg] << 3))
  4204  								ctxt.Andptr = ctxt.Andptr[1:]
  4205  								r = obj.Addrel(ctxt.Cursym)
  4206  								r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4207  								r.Type = obj.R_TLS_IE
  4208  								r.Siz = 4
  4209  								r.Add = 2
  4210  								put4(ctxt, 0)
  4211  							} else {
  4212  								// ELF TLS base is 0(GS).
  4213  								pp.From = p.From
  4214  
  4215  								pp.From.Type = obj.TYPE_MEM
  4216  								pp.From.Reg = REG_GS
  4217  								pp.From.Offset = 0
  4218  								pp.From.Index = REG_NONE
  4219  								pp.From.Scale = 0
  4220  								ctxt.Andptr[0] = 0x65
  4221  								ctxt.Andptr = ctxt.Andptr[1:] // GS
  4222  								ctxt.Andptr[0] = 0x8B
  4223  								ctxt.Andptr = ctxt.Andptr[1:]
  4224  								asmand(ctxt, p, &pp.From, &p.To)
  4225  							}
  4226  						case obj.Hplan9:
  4227  							if ctxt.Plan9privates == nil {
  4228  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4229  							}
  4230  							pp.From = obj.Addr{}
  4231  							pp.From.Type = obj.TYPE_MEM
  4232  							pp.From.Name = obj.NAME_EXTERN
  4233  							pp.From.Sym = ctxt.Plan9privates
  4234  							pp.From.Offset = 0
  4235  							pp.From.Index = REG_NONE
  4236  							ctxt.Andptr[0] = 0x8B
  4237  							ctxt.Andptr = ctxt.Andptr[1:]
  4238  							asmand(ctxt, p, &pp.From, &p.To)
  4239  
  4240  						case obj.Hwindows:
  4241  							// Windows TLS base is always 0x14(FS).
  4242  							pp.From = p.From
  4243  
  4244  							pp.From.Type = obj.TYPE_MEM
  4245  							pp.From.Reg = REG_FS
  4246  							pp.From.Offset = 0x14
  4247  							pp.From.Index = REG_NONE
  4248  							pp.From.Scale = 0
  4249  							ctxt.Andptr[0] = 0x64
  4250  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  4251  							ctxt.Andptr[0] = 0x8B
  4252  							ctxt.Andptr = ctxt.Andptr[1:]
  4253  							asmand(ctxt, p, &pp.From, &p.To)
  4254  						}
  4255  						break
  4256  					}
  4257  
  4258  					switch ctxt.Headtype {
  4259  					default:
  4260  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4261  
  4262  					case obj.Hlinux:
  4263  						if ctxt.Flag_shared == 0 {
  4264  							log.Fatalf("unknown TLS base location for linux without -shared")
  4265  						}
  4266  						// Note that this is not generating the same insn as the other cases.
  4267  						//     MOV TLS, R_to
  4268  						// becomes
  4269  						//     movq g@gottpoff(%rip), R_to
  4270  						// which is encoded as
  4271  						//     movq 0(%rip), R_to
  4272  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4273  						// is g, which we can't check here, but will when we assemble the second
  4274  						// instruction.
  4275  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4276  
  4277  						ctxt.Andptr[0] = 0x8B
  4278  						ctxt.Andptr = ctxt.Andptr[1:]
  4279  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  4280  						ctxt.Andptr = ctxt.Andptr[1:]
  4281  						r = obj.Addrel(ctxt.Cursym)
  4282  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  4283  						r.Type = obj.R_TLS_IE
  4284  						r.Siz = 4
  4285  						r.Add = -4
  4286  						put4(ctxt, 0)
  4287  
  4288  					case obj.Hplan9:
  4289  						if ctxt.Plan9privates == nil {
  4290  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4291  						}
  4292  						pp.From = obj.Addr{}
  4293  						pp.From.Type = obj.TYPE_MEM
  4294  						pp.From.Name = obj.NAME_EXTERN
  4295  						pp.From.Sym = ctxt.Plan9privates
  4296  						pp.From.Offset = 0
  4297  						pp.From.Index = REG_NONE
  4298  						ctxt.Rexflag |= Pw
  4299  						ctxt.Andptr[0] = 0x8B
  4300  						ctxt.Andptr = ctxt.Andptr[1:]
  4301  						asmand(ctxt, p, &pp.From, &p.To)
  4302  
  4303  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4304  						// TLS base is 0(FS).
  4305  						pp.From = p.From
  4306  
  4307  						pp.From.Type = obj.TYPE_MEM
  4308  						pp.From.Name = obj.NAME_NONE
  4309  						pp.From.Reg = REG_NONE
  4310  						pp.From.Offset = 0
  4311  						pp.From.Index = REG_NONE
  4312  						pp.From.Scale = 0
  4313  						ctxt.Rexflag |= Pw
  4314  						ctxt.Andptr[0] = 0x64
  4315  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  4316  						ctxt.Andptr[0] = 0x8B
  4317  						ctxt.Andptr = ctxt.Andptr[1:]
  4318  						asmand(ctxt, p, &pp.From, &p.To)
  4319  
  4320  					case obj.Hwindows:
  4321  						// Windows TLS base is always 0x28(GS).
  4322  						pp.From = p.From
  4323  
  4324  						pp.From.Type = obj.TYPE_MEM
  4325  						pp.From.Name = obj.NAME_NONE
  4326  						pp.From.Reg = REG_GS
  4327  						pp.From.Offset = 0x28
  4328  						pp.From.Index = REG_NONE
  4329  						pp.From.Scale = 0
  4330  						ctxt.Rexflag |= Pw
  4331  						ctxt.Andptr[0] = 0x65
  4332  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  4333  						ctxt.Andptr[0] = 0x8B
  4334  						ctxt.Andptr = ctxt.Andptr[1:]
  4335  						asmand(ctxt, p, &pp.From, &p.To)
  4336  					}
  4337  				}
  4338  				return
  4339  			}
  4340  		}
  4341  	}
  4342  	goto bad
  4343  
  4344  bad:
  4345  	if p.Mode != 64 {
  4346  		/*
  4347  		 * here, the assembly has failed.
  4348  		 * if its a byte instruction that has
  4349  		 * unaddressable registers, try to
  4350  		 * exchange registers and reissue the
  4351  		 * instruction with the operands renamed.
  4352  		 */
  4353  		pp := *p
  4354  
  4355  		unbytereg(&pp.From, &pp.Ft)
  4356  		unbytereg(&pp.To, &pp.Tt)
  4357  
  4358  		z := int(p.From.Reg)
  4359  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4360  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4361  			// For now, different to keep bit-for-bit compatibility.
  4362  			if p.Mode == 32 {
  4363  				breg := byteswapreg(ctxt, &p.To)
  4364  				if breg != REG_AX {
  4365  					ctxt.Andptr[0] = 0x87
  4366  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4367  					asmando(ctxt, p, &p.From, reg[breg])
  4368  					subreg(&pp, z, breg)
  4369  					doasm(ctxt, &pp)
  4370  					ctxt.Andptr[0] = 0x87
  4371  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4372  					asmando(ctxt, p, &p.From, reg[breg])
  4373  				} else {
  4374  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4375  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4376  					subreg(&pp, z, REG_AX)
  4377  					doasm(ctxt, &pp)
  4378  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4379  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4380  				}
  4381  				return
  4382  			}
  4383  
  4384  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4385  				// We certainly don't want to exchange
  4386  				// with AX if the op is MUL or DIV.
  4387  				ctxt.Andptr[0] = 0x87
  4388  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4389  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4390  				subreg(&pp, z, REG_BX)
  4391  				doasm(ctxt, &pp)
  4392  				ctxt.Andptr[0] = 0x87
  4393  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4394  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4395  			} else {
  4396  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4397  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4398  				subreg(&pp, z, REG_AX)
  4399  				doasm(ctxt, &pp)
  4400  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4401  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4402  			}
  4403  			return
  4404  		}
  4405  
  4406  		z = int(p.To.Reg)
  4407  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4408  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4409  			// For now, different to keep bit-for-bit compatibility.
  4410  			if p.Mode == 32 {
  4411  				breg := byteswapreg(ctxt, &p.From)
  4412  				if breg != REG_AX {
  4413  					ctxt.Andptr[0] = 0x87
  4414  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4415  					asmando(ctxt, p, &p.To, reg[breg])
  4416  					subreg(&pp, z, breg)
  4417  					doasm(ctxt, &pp)
  4418  					ctxt.Andptr[0] = 0x87
  4419  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4420  					asmando(ctxt, p, &p.To, reg[breg])
  4421  				} else {
  4422  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4423  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4424  					subreg(&pp, z, REG_AX)
  4425  					doasm(ctxt, &pp)
  4426  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4427  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4428  				}
  4429  				return
  4430  			}
  4431  
  4432  			if isax(&p.From) {
  4433  				ctxt.Andptr[0] = 0x87
  4434  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4435  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4436  				subreg(&pp, z, REG_BX)
  4437  				doasm(ctxt, &pp)
  4438  				ctxt.Andptr[0] = 0x87
  4439  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4440  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4441  			} else {
  4442  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4443  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4444  				subreg(&pp, z, REG_AX)
  4445  				doasm(ctxt, &pp)
  4446  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4447  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4448  			}
  4449  			return
  4450  		}
  4451  	}
  4452  
  4453  	ctxt.Diag("invalid instruction: %v", p)
  4454  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4455  	return
  4456  }
  4457  
  4458  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4459  // which is not referenced in a.
  4460  // If a is empty, it returns BX to account for MULB-like instructions
  4461  // that might use DX and AX.
  4462  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4463  	cand := 1
  4464  	canc := cand
  4465  	canb := canc
  4466  	cana := canb
  4467  
  4468  	if a.Type == obj.TYPE_NONE {
  4469  		cand = 0
  4470  		cana = cand
  4471  	}
  4472  
  4473  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4474  		switch a.Reg {
  4475  		case REG_NONE:
  4476  			cand = 0
  4477  			cana = cand
  4478  
  4479  		case REG_AX, REG_AL, REG_AH:
  4480  			cana = 0
  4481  
  4482  		case REG_BX, REG_BL, REG_BH:
  4483  			canb = 0
  4484  
  4485  		case REG_CX, REG_CL, REG_CH:
  4486  			canc = 0
  4487  
  4488  		case REG_DX, REG_DL, REG_DH:
  4489  			cand = 0
  4490  		}
  4491  	}
  4492  
  4493  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4494  		switch a.Index {
  4495  		case REG_AX:
  4496  			cana = 0
  4497  
  4498  		case REG_BX:
  4499  			canb = 0
  4500  
  4501  		case REG_CX:
  4502  			canc = 0
  4503  
  4504  		case REG_DX:
  4505  			cand = 0
  4506  		}
  4507  	}
  4508  
  4509  	if cana != 0 {
  4510  		return REG_AX
  4511  	}
  4512  	if canb != 0 {
  4513  		return REG_BX
  4514  	}
  4515  	if canc != 0 {
  4516  		return REG_CX
  4517  	}
  4518  	if cand != 0 {
  4519  		return REG_DX
  4520  	}
  4521  
  4522  	ctxt.Diag("impossible byte register")
  4523  	log.Fatalf("bad code")
  4524  	return 0
  4525  }
  4526  
  4527  func isbadbyte(a *obj.Addr) bool {
  4528  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4529  }
  4530  
  4531  var naclret = []uint8{
  4532  	0x5e, // POPL SI
  4533  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4534  	0x83,
  4535  	0xe6,
  4536  	0xe0, // ANDL $~31, SI
  4537  	0x4c,
  4538  	0x01,
  4539  	0xfe, // ADDQ R15, SI
  4540  	0xff,
  4541  	0xe6, // JMP SI
  4542  }
  4543  
  4544  var naclret8 = []uint8{
  4545  	0x5d, // POPL BP
  4546  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4547  	0x83,
  4548  	0xe5,
  4549  	0xe0, // ANDL $~31, BP
  4550  	0xff,
  4551  	0xe5, // JMP BP
  4552  }
  4553  
  4554  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4555  
  4556  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4557  
  4558  var naclmovs = []uint8{
  4559  	0x89,
  4560  	0xf6, // MOVL SI, SI
  4561  	0x49,
  4562  	0x8d,
  4563  	0x34,
  4564  	0x37, // LEAQ (R15)(SI*1), SI
  4565  	0x89,
  4566  	0xff, // MOVL DI, DI
  4567  	0x49,
  4568  	0x8d,
  4569  	0x3c,
  4570  	0x3f, // LEAQ (R15)(DI*1), DI
  4571  }
  4572  
  4573  var naclstos = []uint8{
  4574  	0x89,
  4575  	0xff, // MOVL DI, DI
  4576  	0x49,
  4577  	0x8d,
  4578  	0x3c,
  4579  	0x3f, // LEAQ (R15)(DI*1), DI
  4580  }
  4581  
  4582  func nacltrunc(ctxt *obj.Link, reg int) {
  4583  	if reg >= REG_R8 {
  4584  		ctxt.Andptr[0] = 0x45
  4585  		ctxt.Andptr = ctxt.Andptr[1:]
  4586  	}
  4587  	reg = (reg - REG_AX) & 7
  4588  	ctxt.Andptr[0] = 0x89
  4589  	ctxt.Andptr = ctxt.Andptr[1:]
  4590  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4591  	ctxt.Andptr = ctxt.Andptr[1:]
  4592  }
  4593  
  4594  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4595  	ctxt.Andptr = ctxt.And[:]
  4596  	ctxt.Asmode = int(p.Mode)
  4597  
  4598  	if p.As == obj.AUSEFIELD {
  4599  		r := obj.Addrel(ctxt.Cursym)
  4600  		r.Off = 0
  4601  		r.Siz = 0
  4602  		r.Sym = p.From.Sym
  4603  		r.Type = obj.R_USEFIELD
  4604  		return
  4605  	}
  4606  
  4607  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4608  		switch p.As {
  4609  		case obj.ARET:
  4610  			copy(ctxt.Andptr, naclret8)
  4611  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4612  			return
  4613  
  4614  		case obj.ACALL,
  4615  			obj.AJMP:
  4616  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4617  				ctxt.Andptr[0] = 0x83
  4618  				ctxt.Andptr = ctxt.Andptr[1:]
  4619  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4620  				ctxt.Andptr = ctxt.Andptr[1:]
  4621  				ctxt.Andptr[0] = 0xe0
  4622  				ctxt.Andptr = ctxt.Andptr[1:]
  4623  			}
  4624  
  4625  		case AINT:
  4626  			ctxt.Andptr[0] = 0xf4
  4627  			ctxt.Andptr = ctxt.Andptr[1:]
  4628  			return
  4629  		}
  4630  	}
  4631  
  4632  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4633  		if p.As == AREP {
  4634  			ctxt.Rep++
  4635  			return
  4636  		}
  4637  
  4638  		if p.As == AREPN {
  4639  			ctxt.Repn++
  4640  			return
  4641  		}
  4642  
  4643  		if p.As == ALOCK {
  4644  			ctxt.Lock++
  4645  			return
  4646  		}
  4647  
  4648  		if p.As != ALEAQ && p.As != ALEAL {
  4649  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4650  				nacltrunc(ctxt, int(p.From.Index))
  4651  			}
  4652  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4653  				nacltrunc(ctxt, int(p.To.Index))
  4654  			}
  4655  		}
  4656  
  4657  		switch p.As {
  4658  		case obj.ARET:
  4659  			copy(ctxt.Andptr, naclret)
  4660  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4661  			return
  4662  
  4663  		case obj.ACALL,
  4664  			obj.AJMP:
  4665  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4666  				// ANDL $~31, reg
  4667  				ctxt.Andptr[0] = 0x83
  4668  				ctxt.Andptr = ctxt.Andptr[1:]
  4669  
  4670  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4671  				ctxt.Andptr = ctxt.Andptr[1:]
  4672  				ctxt.Andptr[0] = 0xe0
  4673  				ctxt.Andptr = ctxt.Andptr[1:]
  4674  
  4675  				// ADDQ R15, reg
  4676  				ctxt.Andptr[0] = 0x4c
  4677  				ctxt.Andptr = ctxt.Andptr[1:]
  4678  
  4679  				ctxt.Andptr[0] = 0x01
  4680  				ctxt.Andptr = ctxt.Andptr[1:]
  4681  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4682  				ctxt.Andptr = ctxt.Andptr[1:]
  4683  			}
  4684  
  4685  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4686  				// ANDL $~31, reg
  4687  				ctxt.Andptr[0] = 0x41
  4688  				ctxt.Andptr = ctxt.Andptr[1:]
  4689  
  4690  				ctxt.Andptr[0] = 0x83
  4691  				ctxt.Andptr = ctxt.Andptr[1:]
  4692  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4693  				ctxt.Andptr = ctxt.Andptr[1:]
  4694  				ctxt.Andptr[0] = 0xe0
  4695  				ctxt.Andptr = ctxt.Andptr[1:]
  4696  
  4697  				// ADDQ R15, reg
  4698  				ctxt.Andptr[0] = 0x4d
  4699  				ctxt.Andptr = ctxt.Andptr[1:]
  4700  
  4701  				ctxt.Andptr[0] = 0x01
  4702  				ctxt.Andptr = ctxt.Andptr[1:]
  4703  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4704  				ctxt.Andptr = ctxt.Andptr[1:]
  4705  			}
  4706  
  4707  		case AINT:
  4708  			ctxt.Andptr[0] = 0xf4
  4709  			ctxt.Andptr = ctxt.Andptr[1:]
  4710  			return
  4711  
  4712  		case ASCASB,
  4713  			ASCASW,
  4714  			ASCASL,
  4715  			ASCASQ,
  4716  			ASTOSB,
  4717  			ASTOSW,
  4718  			ASTOSL,
  4719  			ASTOSQ:
  4720  			copy(ctxt.Andptr, naclstos)
  4721  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4722  
  4723  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4724  			copy(ctxt.Andptr, naclmovs)
  4725  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4726  		}
  4727  
  4728  		if ctxt.Rep != 0 {
  4729  			ctxt.Andptr[0] = 0xf3
  4730  			ctxt.Andptr = ctxt.Andptr[1:]
  4731  			ctxt.Rep = 0
  4732  		}
  4733  
  4734  		if ctxt.Repn != 0 {
  4735  			ctxt.Andptr[0] = 0xf2
  4736  			ctxt.Andptr = ctxt.Andptr[1:]
  4737  			ctxt.Repn = 0
  4738  		}
  4739  
  4740  		if ctxt.Lock != 0 {
  4741  			ctxt.Andptr[0] = 0xf0
  4742  			ctxt.Andptr = ctxt.Andptr[1:]
  4743  			ctxt.Lock = 0
  4744  		}
  4745  	}
  4746  
  4747  	ctxt.Rexflag = 0
  4748  	ctxt.Vexflag = 0
  4749  	and0 := ctxt.Andptr
  4750  	ctxt.Asmode = int(p.Mode)
  4751  	doasm(ctxt, p)
  4752  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4753  		/*
  4754  		 * as befits the whole approach of the architecture,
  4755  		 * the rex prefix must appear before the first opcode byte
  4756  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4757  		 * before the 0f opcode escape!), or it might be ignored.
  4758  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4759  		 */
  4760  		if p.Mode != 64 {
  4761  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4762  		}
  4763  		n := -cap(ctxt.Andptr) + cap(and0)
  4764  		var c int
  4765  		var np int
  4766  		for np = 0; np < n; np++ {
  4767  			c = int(and0[np])
  4768  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4769  				break
  4770  			}
  4771  		}
  4772  
  4773  		copy(and0[np+1:], and0[np:n])
  4774  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4775  		ctxt.Andptr = ctxt.Andptr[1:]
  4776  	}
  4777  
  4778  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4779  	var r *obj.Reloc
  4780  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4781  		r = &ctxt.Cursym.R[i:][0]
  4782  		if int64(r.Off) < p.Pc {
  4783  			break
  4784  		}
  4785  		if ctxt.Rexflag != 0 {
  4786  			r.Off++
  4787  		}
  4788  		if r.Type == obj.R_PCREL {
  4789  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4790  				// PC-relative addressing is relative to the end of the instruction,
  4791  				// but the relocations applied by the linker are relative to the end
  4792  				// of the relocation. Because immediate instruction
  4793  				// arguments can follow the PC-relative memory reference in the
  4794  				// instruction encoding, the two may not coincide. In this case,
  4795  				// adjust addend so that linker can keep relocating relative to the
  4796  				// end of the relocation.
  4797  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4798  			} else if p.Mode == 32 {
  4799  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4800  				// assumes that the previous instruction loaded the PC of the end
  4801  				// of that instruction into CX, so the adjustment is relative to
  4802  				// that.
  4803  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4804  			}
  4805  		}
  4806  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4807  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4808  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4809  		}
  4810  
  4811  	}
  4812  
  4813  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4814  		switch p.To.Reg {
  4815  		case REG_SP:
  4816  			copy(ctxt.Andptr, naclspfix)
  4817  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4818  
  4819  		case REG_BP:
  4820  			copy(ctxt.Andptr, naclbpfix)
  4821  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4822  		}
  4823  	}
  4824  }