github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"encoding/binary"
    36  	"fmt"
    37  	"log"
    38  	"strings"
    39  )
    40  
    41  // Instruction layout.
    42  
    43  const (
    44  	// Loop alignment constants:
    45  	// want to align loop entry to LoopAlign-byte boundary,
    46  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    47  	// We define a loop entry as the target of a backward jump.
    48  	//
    49  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    50  	// and it aligns all jump targets, not just backward jump targets.
    51  	//
    52  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    53  	// is very slight but negative, so the alignment is disabled by
    54  	// setting MaxLoopPad = 0. The code is here for reference and
    55  	// for future experiments.
    56  	//
    57  	LoopAlign  = 16
    58  	MaxLoopPad = 0
    59  	FuncAlign  = 16
    60  )
    61  
    62  type Optab struct {
    63  	as     obj.As
    64  	ytab   []ytab
    65  	prefix uint8
    66  	op     [23]uint8
    67  }
    68  
    69  type ytab struct {
    70  	from    uint8
    71  	from3   uint8
    72  	to      uint8
    73  	zcase   uint8
    74  	zoffset uint8
    75  }
    76  
    77  type Movtab struct {
    78  	as   obj.As
    79  	ft   uint8
    80  	f3t  uint8
    81  	tt   uint8
    82  	code uint8
    83  	op   [4]uint8
    84  }
    85  
    86  const (
    87  	Yxxx = iota
    88  	Ynone
    89  	Yi0 // $0
    90  	Yi1 // $1
    91  	Yi8 // $x, x fits in int8
    92  	Yu8 // $x, x fits in uint8
    93  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    94  	Ys32
    95  	Yi32
    96  	Yi64
    97  	Yiauto
    98  	Yal
    99  	Ycl
   100  	Yax
   101  	Ycx
   102  	Yrb
   103  	Yrl
   104  	Yrl32 // Yrl on 32-bit system
   105  	Yrf
   106  	Yf0
   107  	Yrx
   108  	Ymb
   109  	Yml
   110  	Ym
   111  	Ybr
   112  	Ycs
   113  	Yss
   114  	Yds
   115  	Yes
   116  	Yfs
   117  	Ygs
   118  	Ygdtr
   119  	Yidtr
   120  	Yldtr
   121  	Ymsw
   122  	Ytask
   123  	Ycr0
   124  	Ycr1
   125  	Ycr2
   126  	Ycr3
   127  	Ycr4
   128  	Ycr5
   129  	Ycr6
   130  	Ycr7
   131  	Ycr8
   132  	Ydr0
   133  	Ydr1
   134  	Ydr2
   135  	Ydr3
   136  	Ydr4
   137  	Ydr5
   138  	Ydr6
   139  	Ydr7
   140  	Ytr0
   141  	Ytr1
   142  	Ytr2
   143  	Ytr3
   144  	Ytr4
   145  	Ytr5
   146  	Ytr6
   147  	Ytr7
   148  	Ymr
   149  	Ymm
   150  	Yxr
   151  	Yxm
   152  	Yyr
   153  	Yym
   154  	Ytls
   155  	Ytextsize
   156  	Yindir
   157  	Ymax
   158  )
   159  
   160  const (
   161  	Zxxx = iota
   162  	Zlit
   163  	Zlitm_r
   164  	Z_rp
   165  	Zbr
   166  	Zcall
   167  	Zcallcon
   168  	Zcallduff
   169  	Zcallind
   170  	Zcallindreg
   171  	Zib_
   172  	Zib_rp
   173  	Zibo_m
   174  	Zibo_m_xm
   175  	Zil_
   176  	Zil_rp
   177  	Ziq_rp
   178  	Zilo_m
   179  	Zjmp
   180  	Zjmpcon
   181  	Zloop
   182  	Zo_iw
   183  	Zm_o
   184  	Zm_r
   185  	Zm2_r
   186  	Zm_r_xm
   187  	Zm_r_i_xm
   188  	Zm_r_xm_nr
   189  	Zr_m_xm_nr
   190  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   191  	Zibr_m
   192  	Zmb_r
   193  	Zaut_r
   194  	Zo_m
   195  	Zo_m64
   196  	Zpseudo
   197  	Zr_m
   198  	Zr_m_xm
   199  	Zrp_
   200  	Z_ib
   201  	Z_il
   202  	Zm_ibo
   203  	Zm_ilo
   204  	Zib_rr
   205  	Zil_rr
   206  	Zclr
   207  	Zbyte
   208  	Zvex_rm_v_r
   209  	Zvex_r_v_rm
   210  	Zvex_v_rm_r
   211  	Zmax
   212  )
   213  
   214  const (
   215  	Px   = 0
   216  	Px1  = 1    // symbolic; exact value doesn't matter
   217  	P32  = 0x32 /* 32-bit only */
   218  	Pe   = 0x66 /* operand escape */
   219  	Pm   = 0x0f /* 2byte opcode escape */
   220  	Pq   = 0xff /* both escapes: 66 0f */
   221  	Pb   = 0xfe /* byte operands */
   222  	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
   223  	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
   224  	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
   225  	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
   226  	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
   227  	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
   228  	Pw   = 0x48 /* Rex.w */
   229  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   230  	Py   = 0x80 /* defaults to 64-bit mode */
   231  	Py1  = 0x81 // symbolic; exact value doesn't matter
   232  	Py3  = 0x83 // symbolic; exact value doesn't matter
   233  	Pvex = 0x84 // symbolic: exact value doesn't matter
   234  
   235  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   236  	Rxr = 1 << 2 /* extend modrm reg */
   237  	Rxx = 1 << 1 /* extend sib index */
   238  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   239  )
   240  
   241  const (
   242  	// Encoding for VEX prefix in tables.
   243  	// The P, L, and W fields are chosen to match
   244  	// their eventual locations in the VEX prefix bytes.
   245  
   246  	// P field - 2 bits
   247  	vex66 = 1 << 0
   248  	vexF3 = 2 << 0
   249  	vexF2 = 3 << 0
   250  	// L field - 1 bit
   251  	vexLZ  = 0 << 2
   252  	vexLIG = 0 << 2
   253  	vex128 = 0 << 2
   254  	vex256 = 1 << 2
   255  	// W field - 1 bit
   256  	vexWIG = 0 << 7
   257  	vexW0  = 0 << 7
   258  	vexW1  = 1 << 7
   259  	// M field - 5 bits, but mostly reserved; we can store up to 4
   260  	vex0F   = 1 << 3
   261  	vex0F38 = 2 << 3
   262  	vex0F3A = 3 << 3
   263  
   264  	// Combinations used in the manual.
   265  	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
   266  	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
   267  	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
   268  	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
   269  	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
   270  	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
   271  	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
   272  	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
   273  	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
   274  	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
   275  	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
   276  	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
   277  	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
   278  	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
   279  	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
   280  	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
   281  	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
   282  	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
   283  	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
   284  	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
   285  	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
   286  	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
   287  	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
   288  	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
   289  	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
   290  	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
   291  	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
   292  	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
   293  	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
   294  	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
   295  	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
   296  	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
   297  	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
   298  	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
   299  	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
   300  	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
   301  	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
   302  	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
   303  	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
   304  	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
   305  	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
   306  	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
   307  	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
   308  )
   309  
   310  var ycover [Ymax * Ymax]uint8
   311  
   312  var reg [MAXREG]int
   313  
   314  var regrex [MAXREG + 1]int
   315  
   316  var ynone = []ytab{
   317  	{Ynone, Ynone, Ynone, Zlit, 1},
   318  }
   319  
   320  var ytext = []ytab{
   321  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   322  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   323  }
   324  
   325  var ynop = []ytab{
   326  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   327  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   328  	{Ynone, Ynone, Yml, Zpseudo, 0},
   329  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   330  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   331  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   332  	{Yml, Ynone, Ynone, Zpseudo, 0},
   333  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   334  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   335  }
   336  
   337  var yfuncdata = []ytab{
   338  	{Yi32, Ynone, Ym, Zpseudo, 0},
   339  }
   340  
   341  var ypcdata = []ytab{
   342  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   343  }
   344  
   345  var yxorb = []ytab{
   346  	{Yi32, Ynone, Yal, Zib_, 1},
   347  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   348  	{Yrb, Ynone, Ymb, Zr_m, 1},
   349  	{Ymb, Ynone, Yrb, Zm_r, 1},
   350  }
   351  
   352  var yxorl = []ytab{
   353  	{Yi8, Ynone, Yml, Zibo_m, 2},
   354  	{Yi32, Ynone, Yax, Zil_, 1},
   355  	{Yi32, Ynone, Yml, Zilo_m, 2},
   356  	{Yrl, Ynone, Yml, Zr_m, 1},
   357  	{Yml, Ynone, Yrl, Zm_r, 1},
   358  }
   359  
   360  var yaddl = []ytab{
   361  	{Yi8, Ynone, Yml, Zibo_m, 2},
   362  	{Yi32, Ynone, Yax, Zil_, 1},
   363  	{Yi32, Ynone, Yml, Zilo_m, 2},
   364  	{Yrl, Ynone, Yml, Zr_m, 1},
   365  	{Yml, Ynone, Yrl, Zm_r, 1},
   366  }
   367  
   368  var yincb = []ytab{
   369  	{Ynone, Ynone, Ymb, Zo_m, 2},
   370  }
   371  
   372  var yincw = []ytab{
   373  	{Ynone, Ynone, Yml, Zo_m, 2},
   374  }
   375  
   376  var yincl = []ytab{
   377  	{Ynone, Ynone, Yrl, Z_rp, 1},
   378  	{Ynone, Ynone, Yml, Zo_m, 2},
   379  }
   380  
   381  var yincq = []ytab{
   382  	{Ynone, Ynone, Yml, Zo_m, 2},
   383  }
   384  
   385  var ycmpb = []ytab{
   386  	{Yal, Ynone, Yi32, Z_ib, 1},
   387  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   388  	{Ymb, Ynone, Yrb, Zm_r, 1},
   389  	{Yrb, Ynone, Ymb, Zr_m, 1},
   390  }
   391  
   392  var ycmpl = []ytab{
   393  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   394  	{Yax, Ynone, Yi32, Z_il, 1},
   395  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   396  	{Yml, Ynone, Yrl, Zm_r, 1},
   397  	{Yrl, Ynone, Yml, Zr_m, 1},
   398  }
   399  
   400  var yshb = []ytab{
   401  	{Yi1, Ynone, Ymb, Zo_m, 2},
   402  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   403  	{Ycx, Ynone, Ymb, Zo_m, 2},
   404  }
   405  
   406  var yshl = []ytab{
   407  	{Yi1, Ynone, Yml, Zo_m, 2},
   408  	{Yi32, Ynone, Yml, Zibo_m, 2},
   409  	{Ycl, Ynone, Yml, Zo_m, 2},
   410  	{Ycx, Ynone, Yml, Zo_m, 2},
   411  }
   412  
   413  var ytestb = []ytab{
   414  	{Yi32, Ynone, Yal, Zib_, 1},
   415  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   416  	{Yrb, Ynone, Ymb, Zr_m, 1},
   417  	{Ymb, Ynone, Yrb, Zm_r, 1},
   418  }
   419  
   420  var ytestl = []ytab{
   421  	{Yi32, Ynone, Yax, Zil_, 1},
   422  	{Yi32, Ynone, Yml, Zilo_m, 2},
   423  	{Yrl, Ynone, Yml, Zr_m, 1},
   424  	{Yml, Ynone, Yrl, Zm_r, 1},
   425  }
   426  
   427  var ymovb = []ytab{
   428  	{Yrb, Ynone, Ymb, Zr_m, 1},
   429  	{Ymb, Ynone, Yrb, Zm_r, 1},
   430  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   431  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   432  }
   433  
   434  var ymbs = []ytab{
   435  	{Ymb, Ynone, Ynone, Zm_o, 2},
   436  }
   437  
   438  var ybtl = []ytab{
   439  	{Yi8, Ynone, Yml, Zibo_m, 2},
   440  	{Yrl, Ynone, Yml, Zr_m, 1},
   441  }
   442  
   443  var ymovw = []ytab{
   444  	{Yrl, Ynone, Yml, Zr_m, 1},
   445  	{Yml, Ynone, Yrl, Zm_r, 1},
   446  	{Yi0, Ynone, Yrl, Zclr, 1},
   447  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   448  	{Yi32, Ynone, Yml, Zilo_m, 2},
   449  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   450  }
   451  
   452  var ymovl = []ytab{
   453  	{Yrl, Ynone, Yml, Zr_m, 1},
   454  	{Yml, Ynone, Yrl, Zm_r, 1},
   455  	{Yi0, Ynone, Yrl, Zclr, 1},
   456  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   457  	{Yi32, Ynone, Yml, Zilo_m, 2},
   458  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   459  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   460  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   461  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   462  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   463  }
   464  
   465  var yret = []ytab{
   466  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   467  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   468  }
   469  
   470  var ymovq = []ytab{
   471  	// valid in 32-bit mode
   472  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   473  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   474  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   475  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   476  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   477  
   478  	// valid only in 64-bit mode, usually with 64-bit prefix
   479  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   480  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   481  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   482  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   483  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   484  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   485  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   486  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   487  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   488  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   489  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   490  }
   491  
   492  var ym_rl = []ytab{
   493  	{Ym, Ynone, Yrl, Zm_r, 1},
   494  }
   495  
   496  var yrl_m = []ytab{
   497  	{Yrl, Ynone, Ym, Zr_m, 1},
   498  }
   499  
   500  var ymb_rl = []ytab{
   501  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   502  }
   503  
   504  var yml_rl = []ytab{
   505  	{Yml, Ynone, Yrl, Zm_r, 1},
   506  }
   507  
   508  var yrl_ml = []ytab{
   509  	{Yrl, Ynone, Yml, Zr_m, 1},
   510  }
   511  
   512  var yml_mb = []ytab{
   513  	{Yrb, Ynone, Ymb, Zr_m, 1},
   514  	{Ymb, Ynone, Yrb, Zm_r, 1},
   515  }
   516  
   517  var yrb_mb = []ytab{
   518  	{Yrb, Ynone, Ymb, Zr_m, 1},
   519  }
   520  
   521  var yxchg = []ytab{
   522  	{Yax, Ynone, Yrl, Z_rp, 1},
   523  	{Yrl, Ynone, Yax, Zrp_, 1},
   524  	{Yrl, Ynone, Yml, Zr_m, 1},
   525  	{Yml, Ynone, Yrl, Zm_r, 1},
   526  }
   527  
   528  var ydivl = []ytab{
   529  	{Yml, Ynone, Ynone, Zm_o, 2},
   530  }
   531  
   532  var ydivb = []ytab{
   533  	{Ymb, Ynone, Ynone, Zm_o, 2},
   534  }
   535  
   536  var yimul = []ytab{
   537  	{Yml, Ynone, Ynone, Zm_o, 2},
   538  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   539  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   540  	{Yml, Ynone, Yrl, Zm_r, 2},
   541  }
   542  
   543  var yimul3 = []ytab{
   544  	{Yi8, Yml, Yrl, Zibm_r, 2},
   545  }
   546  
   547  var ybyte = []ytab{
   548  	{Yi64, Ynone, Ynone, Zbyte, 1},
   549  }
   550  
   551  var yin = []ytab{
   552  	{Yi32, Ynone, Ynone, Zib_, 1},
   553  	{Ynone, Ynone, Ynone, Zlit, 1},
   554  }
   555  
   556  var yint = []ytab{
   557  	{Yi32, Ynone, Ynone, Zib_, 1},
   558  }
   559  
   560  var ypushl = []ytab{
   561  	{Yrl, Ynone, Ynone, Zrp_, 1},
   562  	{Ym, Ynone, Ynone, Zm_o, 2},
   563  	{Yi8, Ynone, Ynone, Zib_, 1},
   564  	{Yi32, Ynone, Ynone, Zil_, 1},
   565  }
   566  
   567  var ypopl = []ytab{
   568  	{Ynone, Ynone, Yrl, Z_rp, 1},
   569  	{Ynone, Ynone, Ym, Zo_m, 2},
   570  }
   571  
   572  var ybswap = []ytab{
   573  	{Ynone, Ynone, Yrl, Z_rp, 2},
   574  }
   575  
   576  var yscond = []ytab{
   577  	{Ynone, Ynone, Ymb, Zo_m, 2},
   578  }
   579  
   580  var yjcond = []ytab{
   581  	{Ynone, Ynone, Ybr, Zbr, 0},
   582  	{Yi0, Ynone, Ybr, Zbr, 0},
   583  	{Yi1, Ynone, Ybr, Zbr, 1},
   584  }
   585  
   586  var yloop = []ytab{
   587  	{Ynone, Ynone, Ybr, Zloop, 1},
   588  }
   589  
   590  var ycall = []ytab{
   591  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   592  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   593  	{Ynone, Ynone, Yindir, Zcallind, 2},
   594  	{Ynone, Ynone, Ybr, Zcall, 0},
   595  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   596  }
   597  
   598  var yduff = []ytab{
   599  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   600  }
   601  
   602  var yjmp = []ytab{
   603  	{Ynone, Ynone, Yml, Zo_m64, 2},
   604  	{Ynone, Ynone, Ybr, Zjmp, 0},
   605  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   606  }
   607  
   608  var yfmvd = []ytab{
   609  	{Ym, Ynone, Yf0, Zm_o, 2},
   610  	{Yf0, Ynone, Ym, Zo_m, 2},
   611  	{Yrf, Ynone, Yf0, Zm_o, 2},
   612  	{Yf0, Ynone, Yrf, Zo_m, 2},
   613  }
   614  
   615  var yfmvdp = []ytab{
   616  	{Yf0, Ynone, Ym, Zo_m, 2},
   617  	{Yf0, Ynone, Yrf, Zo_m, 2},
   618  }
   619  
   620  var yfmvf = []ytab{
   621  	{Ym, Ynone, Yf0, Zm_o, 2},
   622  	{Yf0, Ynone, Ym, Zo_m, 2},
   623  }
   624  
   625  var yfmvx = []ytab{
   626  	{Ym, Ynone, Yf0, Zm_o, 2},
   627  }
   628  
   629  var yfmvp = []ytab{
   630  	{Yf0, Ynone, Ym, Zo_m, 2},
   631  }
   632  
   633  var yfcmv = []ytab{
   634  	{Yrf, Ynone, Yf0, Zm_o, 2},
   635  }
   636  
   637  var yfadd = []ytab{
   638  	{Ym, Ynone, Yf0, Zm_o, 2},
   639  	{Yrf, Ynone, Yf0, Zm_o, 2},
   640  	{Yf0, Ynone, Yrf, Zo_m, 2},
   641  }
   642  
   643  var yfaddp = []ytab{
   644  	{Yf0, Ynone, Yrf, Zo_m, 2},
   645  }
   646  
   647  var yfxch = []ytab{
   648  	{Yf0, Ynone, Yrf, Zo_m, 2},
   649  	{Yrf, Ynone, Yf0, Zm_o, 2},
   650  }
   651  
   652  var ycompp = []ytab{
   653  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   654  }
   655  
   656  var ystsw = []ytab{
   657  	{Ynone, Ynone, Ym, Zo_m, 2},
   658  	{Ynone, Ynone, Yax, Zlit, 1},
   659  }
   660  
   661  var ystcw = []ytab{
   662  	{Ynone, Ynone, Ym, Zo_m, 2},
   663  	{Ym, Ynone, Ynone, Zm_o, 2},
   664  }
   665  
   666  var ysvrs = []ytab{
   667  	{Ynone, Ynone, Ym, Zo_m, 2},
   668  	{Ym, Ynone, Ynone, Zm_o, 2},
   669  }
   670  
   671  var ymm = []ytab{
   672  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   673  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   674  }
   675  
   676  var yxm = []ytab{
   677  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   678  }
   679  
   680  var yxm_q4 = []ytab{
   681  	{Yxm, Ynone, Yxr, Zm_r, 1},
   682  }
   683  
   684  var yxcvm1 = []ytab{
   685  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   686  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   687  }
   688  
   689  var yxcvm2 = []ytab{
   690  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   691  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   692  }
   693  
   694  /*
   695  var yxmq = []ytab{
   696  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   697  }
   698  */
   699  
   700  var yxr = []ytab{
   701  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   702  }
   703  
   704  var yxr_ml = []ytab{
   705  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   706  }
   707  
   708  var ymr = []ytab{
   709  	{Ymr, Ynone, Ymr, Zm_r, 1},
   710  }
   711  
   712  var ymr_ml = []ytab{
   713  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   714  }
   715  
   716  var yxcmp = []ytab{
   717  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   718  }
   719  
   720  var yxcmpi = []ytab{
   721  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   722  }
   723  
   724  var yxmov = []ytab{
   725  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   726  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   727  }
   728  
   729  var yxcvfl = []ytab{
   730  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   731  }
   732  
   733  var yxcvlf = []ytab{
   734  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   735  }
   736  
   737  var yxcvfq = []ytab{
   738  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   739  }
   740  
   741  var yxcvqf = []ytab{
   742  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   743  }
   744  
   745  var yps = []ytab{
   746  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   747  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   748  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   749  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   750  }
   751  
   752  var yxrrl = []ytab{
   753  	{Yxr, Ynone, Yrl, Zm_r, 1},
   754  }
   755  
   756  var ymrxr = []ytab{
   757  	{Ymr, Ynone, Yxr, Zm_r, 1},
   758  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   759  }
   760  
   761  var ymshuf = []ytab{
   762  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   763  }
   764  
   765  var ymshufb = []ytab{
   766  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   767  }
   768  
   769  var yxshuf = []ytab{
   770  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   771  }
   772  
   773  var yextrw = []ytab{
   774  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   775  }
   776  
   777  var yextr = []ytab{
   778  	{Yu8, Yxr, Ymm, Zibr_m, 3},
   779  }
   780  
   781  var yinsrw = []ytab{
   782  	{Yu8, Yml, Yxr, Zibm_r, 2},
   783  }
   784  
   785  var yinsr = []ytab{
   786  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   787  }
   788  
   789  var ypsdq = []ytab{
   790  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   791  }
   792  
   793  var ymskb = []ytab{
   794  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   795  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   796  }
   797  
   798  var ycrc32l = []ytab{
   799  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   800  }
   801  
   802  var yprefetch = []ytab{
   803  	{Ym, Ynone, Ynone, Zm_o, 2},
   804  }
   805  
   806  var yaes = []ytab{
   807  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   808  }
   809  
   810  var yaes2 = []ytab{
   811  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   812  }
   813  
   814  var yxbegin = []ytab{
   815  	{Ynone, Ynone, Ybr, Zjmp, 1},
   816  }
   817  
   818  var yxabort = []ytab{
   819  	{Yu8, Ynone, Ynone, Zib_, 1},
   820  }
   821  
   822  var ylddqu = []ytab{
   823  	{Ym, Ynone, Yxr, Zm_r, 1},
   824  }
   825  
   826  // VEX instructions that come in two forms:
   827  //	VTHING xmm2/m128, xmmV, xmm1
   828  //	VTHING ymm2/m256, ymmV, ymm1
   829  // The opcode array in the corresponding Optab entry
   830  // should contain the (VEX prefixes, opcode byte) pair
   831  // for each of the two forms.
   832  // For example, the entries for VPXOR are:
   833  //
   834  //	VPXOR xmm2/m128, xmmV, xmm1
   835  //	VEX.NDS.128.66.0F.WIG EF /r
   836  //
   837  //	VPXOR ymm2/m256, ymmV, ymm1
   838  //	VEX.NDS.256.66.0F.WIG EF /r
   839  //
   840  // The NDS/NDD/DDS part can be dropped, producing this
   841  // Optab entry:
   842  //
   843  //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
   844  //
   845  var yvex_xy3 = []ytab{
   846  	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
   847  	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
   848  }
   849  
   850  var yvex_r3 = []ytab{
   851  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   852  	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
   853  }
   854  
   855  var yvex_vmr3 = []ytab{
   856  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   857  	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
   858  }
   859  
   860  var yvex_xy2 = []ytab{
   861  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   862  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   863  }
   864  
   865  var yvex_xyr2 = []ytab{
   866  	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
   867  	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
   868  }
   869  
   870  var yvex_vmovdqa = []ytab{
   871  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   872  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   873  	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
   874  	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
   875  }
   876  
   877  var yvex_vmovntdq = []ytab{
   878  	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
   879  	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
   880  }
   881  
   882  var yvex_vpbroadcast = []ytab{
   883  	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
   884  	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
   885  }
   886  
   887  var yvex_xxmyxm = []ytab{
   888  	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
   889  	{Yyr, Ynone, Yxm, Zvex_r_v_rm, 2},
   890  }
   891  
   892  var ymmxmm0f38 = []ytab{
   893  	{Ymm, Ynone, Ymr, Zlitm_r, 3},
   894  	{Yxm, Ynone, Yxr, Zlitm_r, 5},
   895  }
   896  
   897  /*
   898   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   899   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   900   * the entry with the given p->as and then looks through the ytable for that
   901   * instruction (the second field in the optab struct) for a line whose first
   902   * two values match the Ytypes of the p->from and p->to operands.  The function
   903   * oclass in span.c computes the specific Ytype of an operand and then the set
   904   * of more general Ytypes that it satisfies is implied by the ycover table, set
   905   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   906   * from the more general 8-bit constants, but instinit says
   907   *
   908   *        ycover[Yi0*Ymax + Ys32] = 1;
   909   *        ycover[Yi1*Ymax + Ys32] = 1;
   910   *        ycover[Yi8*Ymax + Ys32] = 1;
   911   *
   912   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   913   * if that's what an instruction can handle.
   914   *
   915   * In parallel with the scan through the ytable for the appropriate line, there
   916   * is a z pointer that starts out pointing at the strange magic byte list in
   917   * the Optab struct.  With each step past a non-matching ytable line, z
   918   * advances by the 4th entry in the line.  When a matching line is found, that
   919   * z pointer has the extra data to use in laying down the instruction bytes.
   920   * The actual bytes laid down are a function of the 3rd entry in the line (that
   921   * is, the Ztype) and the z bytes.
   922   *
   923   * For example, let's look at AADDL.  The optab line says:
   924   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   925   *
   926   * and yaddl says
   927   *        uchar   yaddl[] =
   928   *        {
   929   *                Yi8,    Yml,    Zibo_m, 2,
   930   *                Yi32,   Yax,    Zil_,   1,
   931   *                Yi32,   Yml,    Zilo_m, 2,
   932   *                Yrl,    Yml,    Zr_m,   1,
   933   *                Yml,    Yrl,    Zm_r,   1,
   934   *                0
   935   *        };
   936   *
   937   * so there are 5 possible types of ADDL instruction that can be laid down, and
   938   * possible states used to lay them down (Ztype and z pointer, assuming z
   939   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   940   *
   941   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   942   *        Yi32, Yax -> Zil_, z+2 (0x05)
   943   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   944   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   945   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   946   *
   947   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   948   * relatively straightforward as this program goes.
   949   *
   950   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   951   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   952   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   953   * Zilo_m is the same but a long (32-bit) immediate.
   954   */
   955  var optab =
   956  /*	as, ytab, andproto, opcode */
   957  []Optab{
   958  	{obj.AXXX, nil, 0, [23]uint8{}},
   959  	{AAAA, ynone, P32, [23]uint8{0x37}},
   960  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   961  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   962  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   963  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   964  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   965  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   966  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   967  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   968  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   969  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   970  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   971  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   972  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   973  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   974  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   975  	{AADJSP, nil, 0, [23]uint8{}},
   976  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   977  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   978  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   979  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   980  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   981  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   982  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   983  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   984  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   985  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   986  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   987  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   988  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   989  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   990  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   991  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   992  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   993  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   994  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   995  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   996  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   997  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   998  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   999  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
  1000  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
  1001  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
  1002  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
  1003  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
  1004  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
  1005  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
  1006  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
  1007  	{ABYTE, ybyte, Px, [23]uint8{1}},
  1008  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
  1009  	{ACDQ, ynone, Px, [23]uint8{0x99}},
  1010  	{ACLC, ynone, Px, [23]uint8{0xf8}},
  1011  	{ACLD, ynone, Px, [23]uint8{0xfc}},
  1012  	{ACLI, ynone, Px, [23]uint8{0xfa}},
  1013  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
  1014  	{ACMC, ynone, Px, [23]uint8{0xf5}},
  1015  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
  1016  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
  1017  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
  1018  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
  1019  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
  1020  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
  1021  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
  1022  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
  1023  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
  1024  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
  1025  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
  1026  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
  1027  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
  1028  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
  1029  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
  1030  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
  1031  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
  1032  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
  1033  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
  1034  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
  1035  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
  1036  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
  1037  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
  1038  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
  1039  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
  1040  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
  1041  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
  1042  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
  1043  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
  1044  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
  1045  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
  1046  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
  1047  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
  1048  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
  1049  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
  1050  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
  1051  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
  1052  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
  1053  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
  1054  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
  1055  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
  1056  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
  1057  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
  1058  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
  1059  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
  1060  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
  1061  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
  1062  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
  1063  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
  1064  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1065  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
  1066  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
  1067  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1068  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
  1069  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
  1070  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
  1071  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
  1072  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
  1073  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
  1074  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1075  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
  1076  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
  1077  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
  1078  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
  1079  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
  1080  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
  1081  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
  1082  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
  1083  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
  1084  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
  1085  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
  1086  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
  1087  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
  1088  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
  1089  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
  1090  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
  1091  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
  1092  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
  1093  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
  1094  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
  1095  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
  1096  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
  1097  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
  1098  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
  1099  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
  1100  	{ACWD, ynone, Pe, [23]uint8{0x99}},
  1101  	{ACQO, ynone, Pw, [23]uint8{0x99}},
  1102  	{ADAA, ynone, P32, [23]uint8{0x27}},
  1103  	{ADAS, ynone, P32, [23]uint8{0x2f}},
  1104  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
  1105  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
  1106  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
  1107  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
  1108  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
  1109  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
  1110  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
  1111  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
  1112  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
  1113  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
  1114  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
  1115  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
  1116  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
  1117  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
  1118  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
  1119  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
  1120  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1121  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1122  	{obj.AGLOBL, nil, 0, [23]uint8{}},
  1123  	{AHLT, ynone, Px, [23]uint8{0xf4}},
  1124  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
  1125  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
  1126  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
  1127  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
  1128  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
  1129  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1130  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1131  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1132  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
  1133  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
  1134  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
  1135  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
  1136  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
  1137  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
  1138  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
  1139  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
  1140  	{AINSL, ynone, Px, [23]uint8{0x6d}},
  1141  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
  1142  	{AINT, yint, Px, [23]uint8{0xcd}},
  1143  	{AINTO, ynone, P32, [23]uint8{0xce}},
  1144  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
  1145  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
  1146  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
  1147  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
  1148  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
  1149  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
  1150  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1151  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1152  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1153  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1154  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1155  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1156  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1157  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1158  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1159  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1160  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1161  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1162  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1163  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1164  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1165  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1166  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1167  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1168  	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
  1169  	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
  1170  	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
  1171  	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
  1172  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1173  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1174  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1175  	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
  1176  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1177  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1178  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1179  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1180  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1181  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1182  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1183  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1184  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1185  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1186  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1187  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1188  	{ALONG, ybyte, Px, [23]uint8{4}},
  1189  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1190  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1191  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1192  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1193  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1194  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1195  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1196  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1197  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1198  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1199  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1200  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1201  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1202  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1203  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1204  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1205  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1206  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1207  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1208  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1209  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1210  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1211  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1212  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1213  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1214  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1215  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1216  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1217  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1218  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1219  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1220  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1221  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1222  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1223  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1224  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1225  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1226  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1227  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1228  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1229  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1230  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1231  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1232  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1233  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1234  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1235  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1236  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1237  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1238  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1239  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1240  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1241  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1242  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1243  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1244  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1245  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1246  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1247  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1248  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1249  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1250  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1251  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1252  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1253  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1254  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1255  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1256  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1257  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1258  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1259  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1260  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1261  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1262  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1263  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1264  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1265  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1266  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1267  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1268  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1269  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1270  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1271  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1272  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1273  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1274  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1275  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1276  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1277  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1278  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1279  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1280  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1281  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1282  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1283  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1284  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1285  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1286  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1287  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1288  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1289  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1290  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1291  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1292  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1293  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1294  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1295  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1296  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1297  	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
  1298  	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
  1299  	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
  1300  	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1301  	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
  1302  	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
  1303  	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
  1304  	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
  1305  	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
  1306  	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
  1307  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1308  	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
  1309  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1310  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1311  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1312  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1313  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1314  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1315  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1316  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1317  	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
  1318  	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
  1319  	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
  1320  	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
  1321  	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
  1322  	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
  1323  	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
  1324  	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
  1325  	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
  1326  	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
  1327  	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
  1328  	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
  1329  	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
  1330  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1331  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1332  	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
  1333  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1334  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1335  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1336  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1337  	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
  1338  	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
  1339  	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
  1340  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1341  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1342  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1343  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1344  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1345  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1346  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1347  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1348  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1349  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1350  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1351  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1352  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1353  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1354  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1355  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1356  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1357  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1358  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1359  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1360  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1361  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1362  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1363  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1364  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1365  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1366  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1367  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1368  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1369  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1370  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1371  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1372  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1373  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1374  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1375  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1376  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1377  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1378  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1379  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1380  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1381  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1382  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1383  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1384  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1385  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1386  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1387  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1388  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1389  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1390  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1391  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1392  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1393  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1394  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1395  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1396  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1397  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1398  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1399  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1400  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1401  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1402  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1403  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1404  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1405  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1406  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1407  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1408  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1409  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1410  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1411  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1412  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1413  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1414  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1415  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1416  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1417  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1418  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1419  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1420  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1421  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1422  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1423  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1424  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1425  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1426  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1427  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1428  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1429  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1430  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1431  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1432  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1433  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1434  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1435  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1436  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1437  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1438  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1439  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1440  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1441  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1442  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1443  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1444  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1445  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1446  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1447  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1448  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1449  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1450  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1451  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1452  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1453  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1454  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1455  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1456  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1457  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1458  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1459  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1460  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1461  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1462  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1463  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1464  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1465  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1466  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1467  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1468  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1469  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1470  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1471  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1472  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1473  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1474  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1475  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1476  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1477  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1478  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1479  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1480  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1481  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1482  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1483  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1484  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1485  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1486  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1487  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1488  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1489  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1490  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1491  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1492  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1493  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1494  	{AWORD, ybyte, Px, [23]uint8{2}},
  1495  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1496  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1497  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1498  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1499  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1500  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1501  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1502  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1503  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1504  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1505  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1506  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1507  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1508  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1509  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1510  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1511  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1512  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1513  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1514  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1515  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1516  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1517  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1518  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1519  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1520  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1521  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1522  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1523  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1524  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1525  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1526  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1527  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1528  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1529  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1530  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1531  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1532  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1533  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1534  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1535  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1536  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1537  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1538  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1539  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1540  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1541  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1542  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1543  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1544  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1545  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1546  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1547  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1548  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1549  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1550  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1551  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1552  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1553  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1554  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1555  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1556  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1557  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1558  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1559  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1560  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1561  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1562  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1563  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1564  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1565  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1566  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1567  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1568  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1569  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1570  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1571  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1572  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1573  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1574  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1575  	{AFFREE, nil, 0, [23]uint8{}},
  1576  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1577  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1578  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1579  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1580  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1581  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1582  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1583  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1584  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1585  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1586  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1587  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1588  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1589  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1590  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1591  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1592  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1593  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1594  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1595  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1596  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1597  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1598  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1599  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1600  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1601  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1602  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1603  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1604  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1605  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1606  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1607  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1608  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1609  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1610  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1611  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1612  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1613  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1614  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1615  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1616  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1617  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1618  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1619  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1620  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1621  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1622  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1623  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1624  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1625  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1626  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1627  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1628  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1629  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1630  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1631  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1632  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1633  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1634  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1635  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1636  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1637  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1638  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1639  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1640  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1641  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1642  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1643  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1644  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1645  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1646  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1647  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1648  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1649  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1650  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1651  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1652  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1653  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1654  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1655  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1656  
  1657  	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
  1658  	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
  1659  	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
  1660  	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
  1661  	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
  1662  	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
  1663  	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
  1664  	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
  1665  	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
  1666  	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
  1667  	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
  1668  	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
  1669  	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
  1670  	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
  1671  	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
  1672  	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
  1673  	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
  1674  	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
  1675  
  1676  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1677  	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
  1678  	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
  1679  	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
  1680  	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
  1681  	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
  1682  	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
  1683  	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
  1684  	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
  1685  	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
  1686  
  1687  	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
  1688  	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
  1689  	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
  1690  	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
  1691  	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
  1692  	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
  1693  	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
  1694  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1695  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1696  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1697  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1698  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1699  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1700  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1701  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1702  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1703  	{obj.AEND, nil, 0, [23]uint8{}},
  1704  	{0, nil, 0, [23]uint8{}},
  1705  }
  1706  
  1707  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1708  
  1709  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1710  // This happens on systems like Solaris that call .so functions instead of system calls.
  1711  // It does not seem to be necessary for any other systems. This is probably working
  1712  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1713  // what that bug is. And this does fix it.
  1714  func isextern(s *obj.LSym) bool {
  1715  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1716  	return strings.HasPrefix(s.Name, "libc_")
  1717  }
  1718  
  1719  // single-instruction no-ops of various lengths.
  1720  // constructed by hand and disassembled with gdb to verify.
  1721  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1722  var nop = [][16]uint8{
  1723  	{0x90},
  1724  	{0x66, 0x90},
  1725  	{0x0F, 0x1F, 0x00},
  1726  	{0x0F, 0x1F, 0x40, 0x00},
  1727  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1728  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1729  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1730  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1731  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1732  }
  1733  
  1734  // Native Client rejects the repeated 0x66 prefix.
  1735  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1736  func fillnop(p []byte, n int) {
  1737  	var m int
  1738  
  1739  	for n > 0 {
  1740  		m = n
  1741  		if m > len(nop) {
  1742  			m = len(nop)
  1743  		}
  1744  		copy(p[:m], nop[m-1][:m])
  1745  		p = p[m:]
  1746  		n -= m
  1747  	}
  1748  }
  1749  
  1750  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1751  	s.Grow(int64(c) + int64(pad))
  1752  	fillnop(s.P[c:], int(pad))
  1753  	return c + pad
  1754  }
  1755  
  1756  func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
  1757  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1758  		return l
  1759  	}
  1760  	return q
  1761  }
  1762  
  1763  func span6(ctxt *obj.Link, s *obj.LSym) {
  1764  	ctxt.Cursym = s
  1765  
  1766  	if s.P != nil {
  1767  		return
  1768  	}
  1769  
  1770  	if ycover[0] == 0 {
  1771  		instinit()
  1772  	}
  1773  
  1774  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1775  		if p.To.Type == obj.TYPE_BRANCH {
  1776  			if p.Pcond == nil {
  1777  				p.Pcond = p
  1778  			}
  1779  		}
  1780  		if p.As == AADJSP {
  1781  			p.To.Type = obj.TYPE_REG
  1782  			p.To.Reg = REG_SP
  1783  			v := int32(-p.From.Offset)
  1784  			p.From.Offset = int64(v)
  1785  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1786  			if v < 0 {
  1787  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1788  				v = -v
  1789  				p.From.Offset = int64(v)
  1790  			}
  1791  
  1792  			if v == 0 {
  1793  				p.As = obj.ANOP
  1794  			}
  1795  		}
  1796  	}
  1797  
  1798  	var q *obj.Prog
  1799  	var count int64 // rough count of number of instructions
  1800  	for p := s.Text; p != nil; p = p.Link {
  1801  		count++
  1802  		p.Back = 2 // use short branches first time through
  1803  		q = p.Pcond
  1804  		if q != nil && (q.Back&2 != 0) {
  1805  			p.Back |= 1 // backward jump
  1806  			q.Back |= 4 // loop head
  1807  		}
  1808  
  1809  		if p.As == AADJSP {
  1810  			p.To.Type = obj.TYPE_REG
  1811  			p.To.Reg = REG_SP
  1812  			v := int32(-p.From.Offset)
  1813  			p.From.Offset = int64(v)
  1814  			p.As = spadjop(ctxt, p, AADDL, AADDQ)
  1815  			if v < 0 {
  1816  				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
  1817  				v = -v
  1818  				p.From.Offset = int64(v)
  1819  			}
  1820  
  1821  			if v == 0 {
  1822  				p.As = obj.ANOP
  1823  			}
  1824  		}
  1825  	}
  1826  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1827  
  1828  	n := 0
  1829  	var c int32
  1830  	errors := ctxt.Errors
  1831  	var deferreturn *obj.LSym
  1832  	if ctxt.Headtype == obj.Hnacl {
  1833  		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1834  	}
  1835  	for {
  1836  		loop := int32(0)
  1837  		for i := range s.R {
  1838  			s.R[i] = obj.Reloc{}
  1839  		}
  1840  		s.R = s.R[:0]
  1841  		s.P = s.P[:0]
  1842  		c = 0
  1843  		for p := s.Text; p != nil; p = p.Link {
  1844  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1845  
  1846  				// pad everything to avoid crossing 32-byte boundary
  1847  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1848  					c = naclpad(ctxt, s, c, -c&31)
  1849  				}
  1850  
  1851  				// pad call deferreturn to start at 32-byte boundary
  1852  				// so that subtracting 5 in jmpdefer will jump back
  1853  				// to that boundary and rerun the call.
  1854  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1855  					c = naclpad(ctxt, s, c, -c&31)
  1856  				}
  1857  
  1858  				// pad call to end at 32-byte boundary
  1859  				if p.As == obj.ACALL {
  1860  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1861  				}
  1862  
  1863  				// the linker treats REP and STOSQ as different instructions
  1864  				// but in fact the REP is a prefix on the STOSQ.
  1865  				// make sure REP has room for 2 more bytes, so that
  1866  				// padding will not be inserted before the next instruction.
  1867  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1868  					c = naclpad(ctxt, s, c, -c&31)
  1869  				}
  1870  
  1871  				// same for LOCK.
  1872  				// various instructions follow; the longest is 4 bytes.
  1873  				// give ourselves 8 bytes so as to avoid surprises.
  1874  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1875  					c = naclpad(ctxt, s, c, -c&31)
  1876  				}
  1877  			}
  1878  
  1879  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1880  				// pad with NOPs
  1881  				v := -c & (LoopAlign - 1)
  1882  
  1883  				if v <= MaxLoopPad {
  1884  					s.Grow(int64(c) + int64(v))
  1885  					fillnop(s.P[c:], int(v))
  1886  					c += v
  1887  				}
  1888  			}
  1889  
  1890  			p.Pc = int64(c)
  1891  
  1892  			// process forward jumps to p
  1893  			for q = p.Rel; q != nil; q = q.Forwd {
  1894  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1895  				if q.Back&2 != 0 { // short
  1896  					if v > 127 {
  1897  						loop++
  1898  						q.Back ^= 2
  1899  					}
  1900  
  1901  					if q.As == AJCXZL || q.As == AXBEGIN {
  1902  						s.P[q.Pc+2] = byte(v)
  1903  					} else {
  1904  						s.P[q.Pc+1] = byte(v)
  1905  					}
  1906  				} else {
  1907  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1908  				}
  1909  			}
  1910  
  1911  			p.Rel = nil
  1912  
  1913  			p.Pc = int64(c)
  1914  			asmins(ctxt, p)
  1915  			m := ctxt.AsmBuf.Len()
  1916  			if int(p.Isize) != m {
  1917  				p.Isize = uint8(m)
  1918  				loop++
  1919  			}
  1920  
  1921  			s.Grow(p.Pc + int64(m))
  1922  			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
  1923  			c += int32(m)
  1924  		}
  1925  
  1926  		n++
  1927  		if n > 20 {
  1928  			ctxt.Diag("span must be looping")
  1929  			log.Fatalf("loop")
  1930  		}
  1931  		if loop == 0 {
  1932  			break
  1933  		}
  1934  		if ctxt.Errors > errors {
  1935  			return
  1936  		}
  1937  	}
  1938  
  1939  	if ctxt.Headtype == obj.Hnacl {
  1940  		c = naclpad(ctxt, s, c, -c&31)
  1941  	}
  1942  
  1943  	// Pad functions with trap instruction, to catch invalid jumps
  1944  	if c&(FuncAlign-1) != 0 {
  1945  		v := -c & (FuncAlign - 1)
  1946  		s.Grow(int64(c) + int64(v))
  1947  		for i := c; i < c+v; i++ {
  1948  			// 0xCC is INT $3 - breakpoint instruction
  1949  			s.P[i] = uint8(0xCC)
  1950  		}
  1951  		c += v
  1952  	}
  1953  	s.Size = int64(c)
  1954  
  1955  	if false { /* debug['a'] > 1 */
  1956  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1957  		var i int
  1958  		for i = 0; i < len(s.P); i++ {
  1959  			fmt.Printf(" %.2x", s.P[i])
  1960  			if i%16 == 15 {
  1961  				fmt.Printf("\n  %.6x", uint(i+1))
  1962  			}
  1963  		}
  1964  
  1965  		if i%16 != 0 {
  1966  			fmt.Printf("\n")
  1967  		}
  1968  
  1969  		for i := 0; i < len(s.R); i++ {
  1970  			r := &s.R[i]
  1971  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1972  		}
  1973  	}
  1974  }
  1975  
  1976  func instinit() {
  1977  	for i := 1; optab[i].as != 0; i++ {
  1978  		c := optab[i].as
  1979  		if opindex[c&obj.AMask] != nil {
  1980  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1981  		}
  1982  		opindex[c&obj.AMask] = &optab[i]
  1983  	}
  1984  
  1985  	for i := 0; i < Ymax; i++ {
  1986  		ycover[i*Ymax+i] = 1
  1987  	}
  1988  
  1989  	ycover[Yi0*Ymax+Yi8] = 1
  1990  	ycover[Yi1*Ymax+Yi8] = 1
  1991  	ycover[Yu7*Ymax+Yi8] = 1
  1992  
  1993  	ycover[Yi0*Ymax+Yu7] = 1
  1994  	ycover[Yi1*Ymax+Yu7] = 1
  1995  
  1996  	ycover[Yi0*Ymax+Yu8] = 1
  1997  	ycover[Yi1*Ymax+Yu8] = 1
  1998  	ycover[Yu7*Ymax+Yu8] = 1
  1999  
  2000  	ycover[Yi0*Ymax+Ys32] = 1
  2001  	ycover[Yi1*Ymax+Ys32] = 1
  2002  	ycover[Yu7*Ymax+Ys32] = 1
  2003  	ycover[Yu8*Ymax+Ys32] = 1
  2004  	ycover[Yi8*Ymax+Ys32] = 1
  2005  
  2006  	ycover[Yi0*Ymax+Yi32] = 1
  2007  	ycover[Yi1*Ymax+Yi32] = 1
  2008  	ycover[Yu7*Ymax+Yi32] = 1
  2009  	ycover[Yu8*Ymax+Yi32] = 1
  2010  	ycover[Yi8*Ymax+Yi32] = 1
  2011  	ycover[Ys32*Ymax+Yi32] = 1
  2012  
  2013  	ycover[Yi0*Ymax+Yi64] = 1
  2014  	ycover[Yi1*Ymax+Yi64] = 1
  2015  	ycover[Yu7*Ymax+Yi64] = 1
  2016  	ycover[Yu8*Ymax+Yi64] = 1
  2017  	ycover[Yi8*Ymax+Yi64] = 1
  2018  	ycover[Ys32*Ymax+Yi64] = 1
  2019  	ycover[Yi32*Ymax+Yi64] = 1
  2020  
  2021  	ycover[Yal*Ymax+Yrb] = 1
  2022  	ycover[Ycl*Ymax+Yrb] = 1
  2023  	ycover[Yax*Ymax+Yrb] = 1
  2024  	ycover[Ycx*Ymax+Yrb] = 1
  2025  	ycover[Yrx*Ymax+Yrb] = 1
  2026  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2027  
  2028  	ycover[Ycl*Ymax+Ycx] = 1
  2029  
  2030  	ycover[Yax*Ymax+Yrx] = 1
  2031  	ycover[Ycx*Ymax+Yrx] = 1
  2032  
  2033  	ycover[Yax*Ymax+Yrl] = 1
  2034  	ycover[Ycx*Ymax+Yrl] = 1
  2035  	ycover[Yrx*Ymax+Yrl] = 1
  2036  	ycover[Yrl32*Ymax+Yrl] = 1
  2037  
  2038  	ycover[Yf0*Ymax+Yrf] = 1
  2039  
  2040  	ycover[Yal*Ymax+Ymb] = 1
  2041  	ycover[Ycl*Ymax+Ymb] = 1
  2042  	ycover[Yax*Ymax+Ymb] = 1
  2043  	ycover[Ycx*Ymax+Ymb] = 1
  2044  	ycover[Yrx*Ymax+Ymb] = 1
  2045  	ycover[Yrb*Ymax+Ymb] = 1
  2046  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2047  	ycover[Ym*Ymax+Ymb] = 1
  2048  
  2049  	ycover[Yax*Ymax+Yml] = 1
  2050  	ycover[Ycx*Ymax+Yml] = 1
  2051  	ycover[Yrx*Ymax+Yml] = 1
  2052  	ycover[Yrl*Ymax+Yml] = 1
  2053  	ycover[Yrl32*Ymax+Yml] = 1
  2054  	ycover[Ym*Ymax+Yml] = 1
  2055  
  2056  	ycover[Yax*Ymax+Ymm] = 1
  2057  	ycover[Ycx*Ymax+Ymm] = 1
  2058  	ycover[Yrx*Ymax+Ymm] = 1
  2059  	ycover[Yrl*Ymax+Ymm] = 1
  2060  	ycover[Yrl32*Ymax+Ymm] = 1
  2061  	ycover[Ym*Ymax+Ymm] = 1
  2062  	ycover[Ymr*Ymax+Ymm] = 1
  2063  
  2064  	ycover[Ym*Ymax+Yxm] = 1
  2065  	ycover[Yxr*Ymax+Yxm] = 1
  2066  
  2067  	ycover[Ym*Ymax+Yym] = 1
  2068  	ycover[Yyr*Ymax+Yym] = 1
  2069  
  2070  	for i := 0; i < MAXREG; i++ {
  2071  		reg[i] = -1
  2072  		if i >= REG_AL && i <= REG_R15B {
  2073  			reg[i] = (i - REG_AL) & 7
  2074  			if i >= REG_SPB && i <= REG_DIB {
  2075  				regrex[i] = 0x40
  2076  			}
  2077  			if i >= REG_R8B && i <= REG_R15B {
  2078  				regrex[i] = Rxr | Rxx | Rxb
  2079  			}
  2080  		}
  2081  
  2082  		if i >= REG_AH && i <= REG_BH {
  2083  			reg[i] = 4 + ((i - REG_AH) & 7)
  2084  		}
  2085  		if i >= REG_AX && i <= REG_R15 {
  2086  			reg[i] = (i - REG_AX) & 7
  2087  			if i >= REG_R8 {
  2088  				regrex[i] = Rxr | Rxx | Rxb
  2089  			}
  2090  		}
  2091  
  2092  		if i >= REG_F0 && i <= REG_F0+7 {
  2093  			reg[i] = (i - REG_F0) & 7
  2094  		}
  2095  		if i >= REG_M0 && i <= REG_M0+7 {
  2096  			reg[i] = (i - REG_M0) & 7
  2097  		}
  2098  		if i >= REG_X0 && i <= REG_X0+15 {
  2099  			reg[i] = (i - REG_X0) & 7
  2100  			if i >= REG_X0+8 {
  2101  				regrex[i] = Rxr | Rxx | Rxb
  2102  			}
  2103  		}
  2104  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2105  			reg[i] = (i - REG_Y0) & 7
  2106  			if i >= REG_Y0+8 {
  2107  				regrex[i] = Rxr | Rxx | Rxb
  2108  			}
  2109  		}
  2110  
  2111  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2112  			regrex[i] = Rxr
  2113  		}
  2114  	}
  2115  }
  2116  
  2117  var isAndroid = (obj.Getgoos() == "android")
  2118  
  2119  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2120  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2121  		return 0
  2122  	}
  2123  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2124  		switch a.Reg {
  2125  		case REG_CS:
  2126  			return 0x2e
  2127  
  2128  		case REG_DS:
  2129  			return 0x3e
  2130  
  2131  		case REG_ES:
  2132  			return 0x26
  2133  
  2134  		case REG_FS:
  2135  			return 0x64
  2136  
  2137  		case REG_GS:
  2138  			return 0x65
  2139  
  2140  		case REG_TLS:
  2141  			// NOTE: Systems listed here should be only systems that
  2142  			// support direct TLS references like 8(TLS) implemented as
  2143  			// direct references from FS or GS. Systems that require
  2144  			// the initial-exec model, where you load the TLS base into
  2145  			// a register and then index from that register, do not reach
  2146  			// this code and should not be listed.
  2147  			if p.Mode == 32 {
  2148  				switch ctxt.Headtype {
  2149  				default:
  2150  					if isAndroid {
  2151  						return 0x65 // GS
  2152  					}
  2153  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2154  
  2155  				case obj.Hdarwin,
  2156  					obj.Hdragonfly,
  2157  					obj.Hfreebsd,
  2158  					obj.Hnetbsd,
  2159  					obj.Hopenbsd:
  2160  					return 0x65 // GS
  2161  				}
  2162  			}
  2163  
  2164  			switch ctxt.Headtype {
  2165  			default:
  2166  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  2167  
  2168  			case obj.Hlinux:
  2169  				if isAndroid {
  2170  					return 0x64 // FS
  2171  				}
  2172  
  2173  				if ctxt.Flag_shared != 0 {
  2174  					log.Fatalf("unknown TLS base register for linux with -shared")
  2175  				} else {
  2176  					return 0x64 // FS
  2177  				}
  2178  
  2179  			case obj.Hdragonfly,
  2180  				obj.Hfreebsd,
  2181  				obj.Hnetbsd,
  2182  				obj.Hopenbsd,
  2183  				obj.Hsolaris:
  2184  				return 0x64 // FS
  2185  
  2186  			case obj.Hdarwin:
  2187  				return 0x65 // GS
  2188  			}
  2189  		}
  2190  	}
  2191  
  2192  	if p.Mode == 32 {
  2193  		if a.Index == REG_TLS && ctxt.Flag_shared != 0 {
  2194  			// When building for inclusion into a shared library, an instruction of the form
  2195  			//     MOVL 0(CX)(TLS*1), AX
  2196  			// becomes
  2197  			//     mov %gs:(%ecx), %eax
  2198  			// which assumes that the correct TLS offset has been loaded into %ecx (today
  2199  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2200  			// a shared library the instruction it becomes
  2201  			//     mov 0x0(%ecx), $eax
  2202  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2203  			if a.Offset != 0 {
  2204  				ctxt.Diag("cannot handle non-0 offsets to TLS")
  2205  			}
  2206  			return 0x65 // GS
  2207  		}
  2208  		return 0
  2209  	}
  2210  
  2211  	switch a.Index {
  2212  	case REG_CS:
  2213  		return 0x2e
  2214  
  2215  	case REG_DS:
  2216  		return 0x3e
  2217  
  2218  	case REG_ES:
  2219  		return 0x26
  2220  
  2221  	case REG_TLS:
  2222  		if ctxt.Flag_shared != 0 {
  2223  			// When building for inclusion into a shared library, an instruction of the form
  2224  			//     MOV 0(CX)(TLS*1), AX
  2225  			// becomes
  2226  			//     mov %fs:(%rcx), %rax
  2227  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2228  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2229  			// a shared library the instruction does not require a prefix.
  2230  			if a.Offset != 0 {
  2231  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2232  			}
  2233  			return 0x64
  2234  		}
  2235  
  2236  	case REG_FS:
  2237  		return 0x64
  2238  
  2239  	case REG_GS:
  2240  		return 0x65
  2241  	}
  2242  
  2243  	return 0
  2244  }
  2245  
  2246  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2247  	switch a.Type {
  2248  	case obj.TYPE_NONE:
  2249  		return Ynone
  2250  
  2251  	case obj.TYPE_BRANCH:
  2252  		return Ybr
  2253  
  2254  	case obj.TYPE_INDIR:
  2255  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2256  			return Yindir
  2257  		}
  2258  		return Yxxx
  2259  
  2260  	case obj.TYPE_MEM:
  2261  		if a.Index == REG_SP {
  2262  			// Can't use SP as the index register
  2263  			return Yxxx
  2264  		}
  2265  		if ctxt.Asmode == 64 {
  2266  			switch a.Name {
  2267  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2268  				// Global variables can't use index registers and their
  2269  				// base register is %rip (%rip is encoded as REG_NONE).
  2270  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2271  					return Yxxx
  2272  				}
  2273  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2274  				// These names must have a base of SP.  The old compiler
  2275  				// uses 0 for the base register. SSA uses REG_SP.
  2276  				if a.Reg != REG_SP && a.Reg != 0 {
  2277  					return Yxxx
  2278  				}
  2279  			case obj.NAME_NONE:
  2280  				// everything is ok
  2281  			default:
  2282  				// unknown name
  2283  				return Yxxx
  2284  			}
  2285  		}
  2286  		return Ym
  2287  
  2288  	case obj.TYPE_ADDR:
  2289  		switch a.Name {
  2290  		case obj.NAME_GOTREF:
  2291  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2292  			return Yxxx
  2293  
  2294  		case obj.NAME_EXTERN,
  2295  			obj.NAME_STATIC:
  2296  			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && ctxt.Flag_shared == 0) {
  2297  				return Yi32
  2298  			}
  2299  			return Yiauto // use pc-relative addressing
  2300  
  2301  		case obj.NAME_AUTO,
  2302  			obj.NAME_PARAM:
  2303  			return Yiauto
  2304  		}
  2305  
  2306  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2307  		// and got Yi32 in an earlier version of this code.
  2308  		// Keep doing that until we fix yduff etc.
  2309  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2310  			return Yi32
  2311  		}
  2312  
  2313  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2314  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2315  		}
  2316  		fallthrough
  2317  
  2318  		// fall through
  2319  
  2320  	case obj.TYPE_CONST:
  2321  		if a.Sym != nil {
  2322  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2323  		}
  2324  
  2325  		v := a.Offset
  2326  		if p.Mode == 32 {
  2327  			v = int64(int32(v))
  2328  		}
  2329  		if v == 0 {
  2330  			if p.Mark&PRESERVEFLAGS != 0 {
  2331  				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
  2332  				return Yu7
  2333  			}
  2334  			return Yi0
  2335  		}
  2336  		if v == 1 {
  2337  			return Yi1
  2338  		}
  2339  		if v >= 0 && v <= 127 {
  2340  			return Yu7
  2341  		}
  2342  		if v >= 0 && v <= 255 {
  2343  			return Yu8
  2344  		}
  2345  		if v >= -128 && v <= 127 {
  2346  			return Yi8
  2347  		}
  2348  		if p.Mode == 32 {
  2349  			return Yi32
  2350  		}
  2351  		l := int32(v)
  2352  		if int64(l) == v {
  2353  			return Ys32 /* can sign extend */
  2354  		}
  2355  		if v>>32 == 0 {
  2356  			return Yi32 /* unsigned */
  2357  		}
  2358  		return Yi64
  2359  
  2360  	case obj.TYPE_TEXTSIZE:
  2361  		return Ytextsize
  2362  	}
  2363  
  2364  	if a.Type != obj.TYPE_REG {
  2365  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2366  		return Yxxx
  2367  	}
  2368  
  2369  	switch a.Reg {
  2370  	case REG_AL:
  2371  		return Yal
  2372  
  2373  	case REG_AX:
  2374  		return Yax
  2375  
  2376  		/*
  2377  			case REG_SPB:
  2378  		*/
  2379  	case REG_BPB,
  2380  		REG_SIB,
  2381  		REG_DIB,
  2382  		REG_R8B,
  2383  		REG_R9B,
  2384  		REG_R10B,
  2385  		REG_R11B,
  2386  		REG_R12B,
  2387  		REG_R13B,
  2388  		REG_R14B,
  2389  		REG_R15B:
  2390  		if ctxt.Asmode != 64 {
  2391  			return Yxxx
  2392  		}
  2393  		fallthrough
  2394  
  2395  	case REG_DL,
  2396  		REG_BL,
  2397  		REG_AH,
  2398  		REG_CH,
  2399  		REG_DH,
  2400  		REG_BH:
  2401  		return Yrb
  2402  
  2403  	case REG_CL:
  2404  		return Ycl
  2405  
  2406  	case REG_CX:
  2407  		return Ycx
  2408  
  2409  	case REG_DX, REG_BX:
  2410  		return Yrx
  2411  
  2412  	case REG_R8, /* not really Yrl */
  2413  		REG_R9,
  2414  		REG_R10,
  2415  		REG_R11,
  2416  		REG_R12,
  2417  		REG_R13,
  2418  		REG_R14,
  2419  		REG_R15:
  2420  		if ctxt.Asmode != 64 {
  2421  			return Yxxx
  2422  		}
  2423  		fallthrough
  2424  
  2425  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2426  		if p.Mode == 32 {
  2427  			return Yrl32
  2428  		}
  2429  		return Yrl
  2430  
  2431  	case REG_F0 + 0:
  2432  		return Yf0
  2433  
  2434  	case REG_F0 + 1,
  2435  		REG_F0 + 2,
  2436  		REG_F0 + 3,
  2437  		REG_F0 + 4,
  2438  		REG_F0 + 5,
  2439  		REG_F0 + 6,
  2440  		REG_F0 + 7:
  2441  		return Yrf
  2442  
  2443  	case REG_M0 + 0,
  2444  		REG_M0 + 1,
  2445  		REG_M0 + 2,
  2446  		REG_M0 + 3,
  2447  		REG_M0 + 4,
  2448  		REG_M0 + 5,
  2449  		REG_M0 + 6,
  2450  		REG_M0 + 7:
  2451  		return Ymr
  2452  
  2453  	case REG_X0 + 0,
  2454  		REG_X0 + 1,
  2455  		REG_X0 + 2,
  2456  		REG_X0 + 3,
  2457  		REG_X0 + 4,
  2458  		REG_X0 + 5,
  2459  		REG_X0 + 6,
  2460  		REG_X0 + 7,
  2461  		REG_X0 + 8,
  2462  		REG_X0 + 9,
  2463  		REG_X0 + 10,
  2464  		REG_X0 + 11,
  2465  		REG_X0 + 12,
  2466  		REG_X0 + 13,
  2467  		REG_X0 + 14,
  2468  		REG_X0 + 15:
  2469  		return Yxr
  2470  
  2471  	case REG_Y0 + 0,
  2472  		REG_Y0 + 1,
  2473  		REG_Y0 + 2,
  2474  		REG_Y0 + 3,
  2475  		REG_Y0 + 4,
  2476  		REG_Y0 + 5,
  2477  		REG_Y0 + 6,
  2478  		REG_Y0 + 7,
  2479  		REG_Y0 + 8,
  2480  		REG_Y0 + 9,
  2481  		REG_Y0 + 10,
  2482  		REG_Y0 + 11,
  2483  		REG_Y0 + 12,
  2484  		REG_Y0 + 13,
  2485  		REG_Y0 + 14,
  2486  		REG_Y0 + 15:
  2487  		return Yyr
  2488  
  2489  	case REG_CS:
  2490  		return Ycs
  2491  	case REG_SS:
  2492  		return Yss
  2493  	case REG_DS:
  2494  		return Yds
  2495  	case REG_ES:
  2496  		return Yes
  2497  	case REG_FS:
  2498  		return Yfs
  2499  	case REG_GS:
  2500  		return Ygs
  2501  	case REG_TLS:
  2502  		return Ytls
  2503  
  2504  	case REG_GDTR:
  2505  		return Ygdtr
  2506  	case REG_IDTR:
  2507  		return Yidtr
  2508  	case REG_LDTR:
  2509  		return Yldtr
  2510  	case REG_MSW:
  2511  		return Ymsw
  2512  	case REG_TASK:
  2513  		return Ytask
  2514  
  2515  	case REG_CR + 0:
  2516  		return Ycr0
  2517  	case REG_CR + 1:
  2518  		return Ycr1
  2519  	case REG_CR + 2:
  2520  		return Ycr2
  2521  	case REG_CR + 3:
  2522  		return Ycr3
  2523  	case REG_CR + 4:
  2524  		return Ycr4
  2525  	case REG_CR + 5:
  2526  		return Ycr5
  2527  	case REG_CR + 6:
  2528  		return Ycr6
  2529  	case REG_CR + 7:
  2530  		return Ycr7
  2531  	case REG_CR + 8:
  2532  		return Ycr8
  2533  
  2534  	case REG_DR + 0:
  2535  		return Ydr0
  2536  	case REG_DR + 1:
  2537  		return Ydr1
  2538  	case REG_DR + 2:
  2539  		return Ydr2
  2540  	case REG_DR + 3:
  2541  		return Ydr3
  2542  	case REG_DR + 4:
  2543  		return Ydr4
  2544  	case REG_DR + 5:
  2545  		return Ydr5
  2546  	case REG_DR + 6:
  2547  		return Ydr6
  2548  	case REG_DR + 7:
  2549  		return Ydr7
  2550  
  2551  	case REG_TR + 0:
  2552  		return Ytr0
  2553  	case REG_TR + 1:
  2554  		return Ytr1
  2555  	case REG_TR + 2:
  2556  		return Ytr2
  2557  	case REG_TR + 3:
  2558  		return Ytr3
  2559  	case REG_TR + 4:
  2560  		return Ytr4
  2561  	case REG_TR + 5:
  2562  		return Ytr5
  2563  	case REG_TR + 6:
  2564  		return Ytr6
  2565  	case REG_TR + 7:
  2566  		return Ytr7
  2567  	}
  2568  
  2569  	return Yxxx
  2570  }
  2571  
  2572  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2573  	var i int
  2574  
  2575  	switch index {
  2576  	default:
  2577  		goto bad
  2578  
  2579  	case REG_NONE:
  2580  		i = 4 << 3
  2581  		goto bas
  2582  
  2583  	case REG_R8,
  2584  		REG_R9,
  2585  		REG_R10,
  2586  		REG_R11,
  2587  		REG_R12,
  2588  		REG_R13,
  2589  		REG_R14,
  2590  		REG_R15:
  2591  		if ctxt.Asmode != 64 {
  2592  			goto bad
  2593  		}
  2594  		fallthrough
  2595  
  2596  	case REG_AX,
  2597  		REG_CX,
  2598  		REG_DX,
  2599  		REG_BX,
  2600  		REG_BP,
  2601  		REG_SI,
  2602  		REG_DI:
  2603  		i = reg[index] << 3
  2604  	}
  2605  
  2606  	switch scale {
  2607  	default:
  2608  		goto bad
  2609  
  2610  	case 1:
  2611  		break
  2612  
  2613  	case 2:
  2614  		i |= 1 << 6
  2615  
  2616  	case 4:
  2617  		i |= 2 << 6
  2618  
  2619  	case 8:
  2620  		i |= 3 << 6
  2621  	}
  2622  
  2623  bas:
  2624  	switch base {
  2625  	default:
  2626  		goto bad
  2627  
  2628  	case REG_NONE: /* must be mod=00 */
  2629  		i |= 5
  2630  
  2631  	case REG_R8,
  2632  		REG_R9,
  2633  		REG_R10,
  2634  		REG_R11,
  2635  		REG_R12,
  2636  		REG_R13,
  2637  		REG_R14,
  2638  		REG_R15:
  2639  		if ctxt.Asmode != 64 {
  2640  			goto bad
  2641  		}
  2642  		fallthrough
  2643  
  2644  	case REG_AX,
  2645  		REG_CX,
  2646  		REG_DX,
  2647  		REG_BX,
  2648  		REG_SP,
  2649  		REG_BP,
  2650  		REG_SI,
  2651  		REG_DI:
  2652  		i |= reg[base]
  2653  	}
  2654  
  2655  	ctxt.AsmBuf.Put1(byte(i))
  2656  	return
  2657  
  2658  bad:
  2659  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2660  	ctxt.AsmBuf.Put1(0)
  2661  	return
  2662  }
  2663  
  2664  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2665  	var rel obj.Reloc
  2666  
  2667  	v := vaddr(ctxt, p, a, &rel)
  2668  	if rel.Siz != 0 {
  2669  		if rel.Siz != 4 {
  2670  			ctxt.Diag("bad reloc")
  2671  		}
  2672  		r := obj.Addrel(ctxt.Cursym)
  2673  		*r = rel
  2674  		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  2675  	}
  2676  
  2677  	ctxt.AsmBuf.PutInt32(int32(v))
  2678  }
  2679  
  2680  /*
  2681  static void
  2682  relput8(Prog *p, Addr *a)
  2683  {
  2684  	vlong v;
  2685  	Reloc rel, *r;
  2686  
  2687  	v = vaddr(ctxt, p, a, &rel);
  2688  	if(rel.siz != 0) {
  2689  		r = addrel(ctxt->cursym);
  2690  		*r = rel;
  2691  		r->siz = 8;
  2692  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2693  	}
  2694  	put8(ctxt, v);
  2695  }
  2696  */
  2697  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2698  	if r != nil {
  2699  		*r = obj.Reloc{}
  2700  	}
  2701  
  2702  	switch a.Name {
  2703  	case obj.NAME_STATIC,
  2704  		obj.NAME_GOTREF,
  2705  		obj.NAME_EXTERN:
  2706  		s := a.Sym
  2707  		if r == nil {
  2708  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2709  			log.Fatalf("reloc")
  2710  		}
  2711  
  2712  		if a.Name == obj.NAME_GOTREF {
  2713  			r.Siz = 4
  2714  			r.Type = obj.R_GOTPCREL
  2715  		} else if isextern(s) || (p.Mode != 64 && ctxt.Flag_shared == 0) {
  2716  			r.Siz = 4
  2717  			r.Type = obj.R_ADDR
  2718  		} else {
  2719  			r.Siz = 4
  2720  			r.Type = obj.R_PCREL
  2721  		}
  2722  
  2723  		r.Off = -1 // caller must fill in
  2724  		r.Sym = s
  2725  		r.Add = a.Offset
  2726  
  2727  		return 0
  2728  	}
  2729  
  2730  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2731  		if r == nil {
  2732  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2733  			log.Fatalf("reloc")
  2734  		}
  2735  
  2736  		if ctxt.Flag_shared == 0 || isAndroid {
  2737  			r.Type = obj.R_TLS_LE
  2738  			r.Siz = 4
  2739  			r.Off = -1 // caller must fill in
  2740  			r.Add = a.Offset
  2741  		}
  2742  		return 0
  2743  	}
  2744  
  2745  	return a.Offset
  2746  }
  2747  
  2748  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2749  	var base int
  2750  	var rel obj.Reloc
  2751  
  2752  	rex &= 0x40 | Rxr
  2753  	switch {
  2754  	case int64(int32(a.Offset)) == a.Offset:
  2755  		// Offset fits in sign-extended 32 bits.
  2756  	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
  2757  		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
  2758  		// This is allowed for assembly that wants to use 32-bit hex
  2759  		// constants, e.g. LEAL 0x99999999(AX), AX.
  2760  	default:
  2761  		ctxt.Diag("offset too large in %s", p)
  2762  	}
  2763  	v := int32(a.Offset)
  2764  	rel.Siz = 0
  2765  
  2766  	switch a.Type {
  2767  	case obj.TYPE_ADDR:
  2768  		if a.Name == obj.NAME_NONE {
  2769  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2770  		}
  2771  		if a.Index == REG_TLS {
  2772  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2773  		}
  2774  		goto bad
  2775  
  2776  	case obj.TYPE_REG:
  2777  		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
  2778  			goto bad
  2779  		}
  2780  		if v != 0 {
  2781  			goto bad
  2782  		}
  2783  		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  2784  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2785  		return
  2786  	}
  2787  
  2788  	if a.Type != obj.TYPE_MEM {
  2789  		goto bad
  2790  	}
  2791  
  2792  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2793  		base := int(a.Reg)
  2794  		switch a.Name {
  2795  		case obj.NAME_EXTERN,
  2796  			obj.NAME_GOTREF,
  2797  			obj.NAME_STATIC:
  2798  			if !isextern(a.Sym) && p.Mode == 64 {
  2799  				goto bad
  2800  			}
  2801  			if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2802  				base = REG_CX
  2803  			} else {
  2804  				base = REG_NONE
  2805  			}
  2806  			v = int32(vaddr(ctxt, p, a, &rel))
  2807  
  2808  		case obj.NAME_AUTO,
  2809  			obj.NAME_PARAM:
  2810  			base = REG_SP
  2811  		}
  2812  
  2813  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2814  		if base == REG_NONE {
  2815  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2816  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2817  			goto putrelv
  2818  		}
  2819  
  2820  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2821  			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
  2822  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2823  			return
  2824  		}
  2825  
  2826  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2827  			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
  2828  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2829  			ctxt.AsmBuf.Put1(byte(v))
  2830  			return
  2831  		}
  2832  
  2833  		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
  2834  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2835  		goto putrelv
  2836  	}
  2837  
  2838  	base = int(a.Reg)
  2839  	switch a.Name {
  2840  	case obj.NAME_STATIC,
  2841  		obj.NAME_GOTREF,
  2842  		obj.NAME_EXTERN:
  2843  		if a.Sym == nil {
  2844  			ctxt.Diag("bad addr: %v", p)
  2845  		}
  2846  		if p.Mode == 32 && ctxt.Flag_shared != 0 {
  2847  			base = REG_CX
  2848  		} else {
  2849  			base = REG_NONE
  2850  		}
  2851  		v = int32(vaddr(ctxt, p, a, &rel))
  2852  
  2853  	case obj.NAME_AUTO,
  2854  		obj.NAME_PARAM:
  2855  		base = REG_SP
  2856  	}
  2857  
  2858  	if base == REG_TLS {
  2859  		v = int32(vaddr(ctxt, p, a, &rel))
  2860  	}
  2861  
  2862  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2863  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2864  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2865  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2866  				ctxt.Diag("%v has offset against gotref", p)
  2867  			}
  2868  			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
  2869  			goto putrelv
  2870  		}
  2871  
  2872  		// temporary
  2873  		ctxt.AsmBuf.Put2(
  2874  			byte(0<<6|4<<0|r<<3), // sib present
  2875  			0<<6|4<<3|5<<0,       // DS:d32
  2876  		)
  2877  		goto putrelv
  2878  	}
  2879  
  2880  	if base == REG_SP || base == REG_R12 {
  2881  		if v == 0 {
  2882  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2883  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2884  			return
  2885  		}
  2886  
  2887  		if v >= -128 && v < 128 {
  2888  			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  2889  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2890  			ctxt.AsmBuf.Put1(byte(v))
  2891  			return
  2892  		}
  2893  
  2894  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2895  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2896  		goto putrelv
  2897  	}
  2898  
  2899  	if REG_AX <= base && base <= REG_R15 {
  2900  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2901  			rel = obj.Reloc{}
  2902  			rel.Type = obj.R_TLS_LE
  2903  			rel.Siz = 4
  2904  			rel.Sym = nil
  2905  			rel.Add = int64(v)
  2906  			v = 0
  2907  		}
  2908  
  2909  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2910  			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  2911  			return
  2912  		}
  2913  
  2914  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2915  			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
  2916  			return
  2917  		}
  2918  
  2919  		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  2920  		goto putrelv
  2921  	}
  2922  
  2923  	goto bad
  2924  
  2925  putrelv:
  2926  	if rel.Siz != 0 {
  2927  		if rel.Siz != 4 {
  2928  			ctxt.Diag("bad rel")
  2929  			goto bad
  2930  		}
  2931  
  2932  		r := obj.Addrel(ctxt.Cursym)
  2933  		*r = rel
  2934  		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
  2935  	}
  2936  
  2937  	ctxt.AsmBuf.PutInt32(v)
  2938  	return
  2939  
  2940  bad:
  2941  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2942  	return
  2943  }
  2944  
  2945  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2946  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2947  }
  2948  
  2949  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2950  	asmandsz(ctxt, p, a, o, 0, 0)
  2951  }
  2952  
  2953  func bytereg(a *obj.Addr, t *uint8) {
  2954  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2955  		a.Reg += REG_AL - REG_AX
  2956  		*t = 0
  2957  	}
  2958  }
  2959  
  2960  func unbytereg(a *obj.Addr, t *uint8) {
  2961  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2962  		a.Reg += REG_AX - REG_AL
  2963  		*t = 0
  2964  	}
  2965  }
  2966  
  2967  const (
  2968  	E = 0xff
  2969  )
  2970  
  2971  var ymovtab = []Movtab{
  2972  	/* push */
  2973  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2974  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2975  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2976  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2977  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2978  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2979  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2980  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2981  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2982  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2983  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2984  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2985  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2986  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2987  
  2988  	/* pop */
  2989  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2990  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2991  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2992  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2993  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2994  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2995  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2996  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2997  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2998  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2999  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  3000  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  3001  
  3002  	/* mov seg */
  3003  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  3004  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  3005  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  3006  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  3007  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  3008  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  3009  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  3010  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  3011  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  3012  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  3013  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  3014  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  3015  
  3016  	/* mov cr */
  3017  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3018  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3019  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3020  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3021  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3022  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  3023  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  3024  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  3025  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  3026  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  3027  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3028  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3029  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3030  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3031  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3032  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  3033  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  3034  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  3035  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  3036  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  3037  
  3038  	/* mov dr */
  3039  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3040  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3041  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3042  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  3043  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  3044  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  3045  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3046  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3047  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3048  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  3049  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  3050  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  3051  
  3052  	/* mov tr */
  3053  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  3054  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  3055  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  3056  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  3057  
  3058  	/* lgdt, sgdt, lidt, sidt */
  3059  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3060  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3061  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3062  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3063  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  3064  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  3065  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  3066  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  3067  
  3068  	/* lldt, sldt */
  3069  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  3070  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  3071  
  3072  	/* lmsw, smsw */
  3073  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  3074  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  3075  
  3076  	/* ltr, str */
  3077  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  3078  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  3079  
  3080  	/* load full pointer - unsupported
  3081  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  3082  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  3083  	*/
  3084  
  3085  	/* double shift */
  3086  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3087  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3088  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  3089  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3090  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3091  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  3092  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3093  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3094  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3095  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3096  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3097  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  3098  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3099  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3100  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3101  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3102  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3103  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  3104  
  3105  	/* load TLS base */
  3106  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3107  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  3108  	{0, 0, 0, 0, 0, [4]uint8{}},
  3109  }
  3110  
  3111  func isax(a *obj.Addr) bool {
  3112  	switch a.Reg {
  3113  	case REG_AX, REG_AL, REG_AH:
  3114  		return true
  3115  	}
  3116  
  3117  	if a.Index == REG_AX {
  3118  		return true
  3119  	}
  3120  	return false
  3121  }
  3122  
  3123  func subreg(p *obj.Prog, from int, to int) {
  3124  	if false { /* debug['Q'] */
  3125  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  3126  	}
  3127  
  3128  	if int(p.From.Reg) == from {
  3129  		p.From.Reg = int16(to)
  3130  		p.Ft = 0
  3131  	}
  3132  
  3133  	if int(p.To.Reg) == from {
  3134  		p.To.Reg = int16(to)
  3135  		p.Tt = 0
  3136  	}
  3137  
  3138  	if int(p.From.Index) == from {
  3139  		p.From.Index = int16(to)
  3140  		p.Ft = 0
  3141  	}
  3142  
  3143  	if int(p.To.Index) == from {
  3144  		p.To.Index = int16(to)
  3145  		p.Tt = 0
  3146  	}
  3147  
  3148  	if false { /* debug['Q'] */
  3149  		fmt.Printf("%v\n", p)
  3150  	}
  3151  }
  3152  
  3153  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3154  	switch op {
  3155  	case Pm, Pe, Pf2, Pf3:
  3156  		if osize != 1 {
  3157  			if op != Pm {
  3158  				ctxt.AsmBuf.Put1(byte(op))
  3159  			}
  3160  			ctxt.AsmBuf.Put1(Pm)
  3161  			z++
  3162  			op = int(o.op[z])
  3163  			break
  3164  		}
  3165  		fallthrough
  3166  
  3167  	default:
  3168  		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
  3169  			ctxt.AsmBuf.Put1(Pm)
  3170  		}
  3171  	}
  3172  
  3173  	ctxt.AsmBuf.Put1(byte(op))
  3174  	return z
  3175  }
  3176  
  3177  var bpduff1 = []byte{
  3178  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3179  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3180  }
  3181  
  3182  var bpduff2 = []byte{
  3183  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3184  }
  3185  
  3186  // Emit VEX prefix and opcode byte.
  3187  // The three addresses are the r/m, vvvv, and reg fields.
  3188  // The reg and rm arguments appear in the same order as the
  3189  // arguments to asmand, which typically follows the call to asmvex.
  3190  // The final two arguments are the VEX prefix (see encoding above)
  3191  // and the opcode byte.
  3192  // For details about vex prefix see:
  3193  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3194  func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3195  	ctxt.Vexflag = 1
  3196  	rexR := regrex[r.Reg] & Rxr
  3197  	rexB := regrex[rm.Reg] & Rxb
  3198  	rexX := regrex[rm.Index] & Rxx
  3199  	vexM := (vex >> 3) & 0xF
  3200  	vexWLP := vex & 0x87
  3201  	vexV := byte(0)
  3202  	if v != nil {
  3203  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3204  	}
  3205  	vexV ^= 0xF
  3206  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3207  		// Can use 2-byte encoding.
  3208  		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3209  	} else {
  3210  		// Must use 3-byte encoding.
  3211  		ctxt.AsmBuf.Put3(0xc4,
  3212  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3213  			vexV<<3|vexWLP,
  3214  		)
  3215  	}
  3216  	ctxt.AsmBuf.Put1(opcode)
  3217  }
  3218  
  3219  func doasm(ctxt *obj.Link, p *obj.Prog) {
  3220  	ctxt.Curp = p // TODO
  3221  
  3222  	o := opindex[p.As&obj.AMask]
  3223  
  3224  	if o == nil {
  3225  		ctxt.Diag("asmins: missing op %v", p)
  3226  		return
  3227  	}
  3228  
  3229  	pre := prefixof(ctxt, p, &p.From)
  3230  	if pre != 0 {
  3231  		ctxt.AsmBuf.Put1(byte(pre))
  3232  	}
  3233  	pre = prefixof(ctxt, p, &p.To)
  3234  	if pre != 0 {
  3235  		ctxt.AsmBuf.Put1(byte(pre))
  3236  	}
  3237  
  3238  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3239  	// which encodes as SHRQ $32(DX*0), AX.
  3240  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3241  	// Change encoding generated by assemblers and compilers and remove.
  3242  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3243  		p.From3 = new(obj.Addr)
  3244  		p.From3.Type = obj.TYPE_REG
  3245  		p.From3.Reg = p.From.Index
  3246  		p.From.Index = 0
  3247  	}
  3248  
  3249  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3250  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3251  	switch p.As {
  3252  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3253  		if p.From3Type() == obj.TYPE_NONE {
  3254  			p.From3 = new(obj.Addr)
  3255  			*p.From3 = p.From
  3256  			p.From = obj.Addr{}
  3257  			p.From.Type = obj.TYPE_CONST
  3258  			p.From.Offset = p.To.Offset
  3259  			p.To.Offset = 0
  3260  		}
  3261  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3262  		if p.From3Type() == obj.TYPE_NONE {
  3263  			p.From3 = new(obj.Addr)
  3264  			*p.From3 = p.To
  3265  			p.To = obj.Addr{}
  3266  			p.To.Type = obj.TYPE_CONST
  3267  			p.To.Offset = p.From3.Offset
  3268  			p.From3.Offset = 0
  3269  		}
  3270  	}
  3271  
  3272  	if p.Ft == 0 {
  3273  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3274  	}
  3275  	if p.Tt == 0 {
  3276  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3277  	}
  3278  
  3279  	ft := int(p.Ft) * Ymax
  3280  	f3t := Ynone * Ymax
  3281  	if p.From3 != nil {
  3282  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3283  	}
  3284  	tt := int(p.Tt) * Ymax
  3285  
  3286  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3287  	z := 0
  3288  	var a *obj.Addr
  3289  	var l int
  3290  	var op int
  3291  	var q *obj.Prog
  3292  	var r *obj.Reloc
  3293  	var rel obj.Reloc
  3294  	var v int64
  3295  	for i := range o.ytab {
  3296  		yt := &o.ytab[i]
  3297  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3298  			switch o.prefix {
  3299  			case Px1: /* first option valid only in 32-bit mode */
  3300  				if ctxt.Mode == 64 && z == 0 {
  3301  					z += int(yt.zoffset) + xo
  3302  					continue
  3303  				}
  3304  			case Pq: /* 16 bit escape and opcode escape */
  3305  				ctxt.AsmBuf.Put2(Pe, Pm)
  3306  
  3307  			case Pq3: /* 16 bit escape and opcode escape + REX.W */
  3308  				ctxt.Rexflag |= Pw
  3309  				ctxt.AsmBuf.Put2(Pe, Pm)
  3310  
  3311  			case Pq4: /*  66 0F 38 */
  3312  				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
  3313  
  3314  			case Pf2, /* xmm opcode escape */
  3315  				Pf3:
  3316  				ctxt.AsmBuf.Put2(byte(o.prefix), Pm)
  3317  
  3318  			case Pef3:
  3319  				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
  3320  
  3321  			case Pfw: /* xmm opcode escape + REX.W */
  3322  				ctxt.Rexflag |= Pw
  3323  				ctxt.AsmBuf.Put2(Pf3, Pm)
  3324  
  3325  			case Pm: /* opcode escape */
  3326  				ctxt.AsmBuf.Put1(Pm)
  3327  
  3328  			case Pe: /* 16 bit escape */
  3329  				ctxt.AsmBuf.Put1(Pe)
  3330  
  3331  			case Pw: /* 64-bit escape */
  3332  				if p.Mode != 64 {
  3333  					ctxt.Diag("asmins: illegal 64: %v", p)
  3334  				}
  3335  				ctxt.Rexflag |= Pw
  3336  
  3337  			case Pw8: /* 64-bit escape if z >= 8 */
  3338  				if z >= 8 {
  3339  					if p.Mode != 64 {
  3340  						ctxt.Diag("asmins: illegal 64: %v", p)
  3341  					}
  3342  					ctxt.Rexflag |= Pw
  3343  				}
  3344  
  3345  			case Pb: /* botch */
  3346  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3347  					goto bad
  3348  				}
  3349  				// NOTE(rsc): This is probably safe to do always,
  3350  				// but when enabled it chooses different encodings
  3351  				// than the old cmd/internal/obj/i386 code did,
  3352  				// which breaks our "same bits out" checks.
  3353  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3354  				// in the original obj/i386, and it would encode
  3355  				// (using a valid, shorter form) as 3c 00 if we enabled
  3356  				// the call to bytereg here.
  3357  				if p.Mode == 64 {
  3358  					bytereg(&p.From, &p.Ft)
  3359  					bytereg(&p.To, &p.Tt)
  3360  				}
  3361  
  3362  			case P32: /* 32 bit but illegal if 64-bit mode */
  3363  				if p.Mode == 64 {
  3364  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3365  				}
  3366  
  3367  			case Py: /* 64-bit only, no prefix */
  3368  				if p.Mode != 64 {
  3369  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3370  				}
  3371  
  3372  			case Py1: /* 64-bit only if z < 1, no prefix */
  3373  				if z < 1 && p.Mode != 64 {
  3374  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3375  				}
  3376  
  3377  			case Py3: /* 64-bit only if z < 3, no prefix */
  3378  				if z < 3 && p.Mode != 64 {
  3379  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3380  				}
  3381  			}
  3382  
  3383  			if z >= len(o.op) {
  3384  				log.Fatalf("asmins bad table %v", p)
  3385  			}
  3386  			op = int(o.op[z])
  3387  			if op == 0x0f {
  3388  				ctxt.AsmBuf.Put1(byte(op))
  3389  				z++
  3390  				op = int(o.op[z])
  3391  			}
  3392  
  3393  			switch yt.zcase {
  3394  			default:
  3395  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3396  				return
  3397  
  3398  			case Zpseudo:
  3399  				break
  3400  
  3401  			case Zlit:
  3402  				for ; ; z++ {
  3403  					op = int(o.op[z])
  3404  					if op == 0 {
  3405  						break
  3406  					}
  3407  					ctxt.AsmBuf.Put1(byte(op))
  3408  				}
  3409  
  3410  			case Zlitm_r:
  3411  				for ; ; z++ {
  3412  					op = int(o.op[z])
  3413  					if op == 0 {
  3414  						break
  3415  					}
  3416  					ctxt.AsmBuf.Put1(byte(op))
  3417  				}
  3418  				asmand(ctxt, p, &p.From, &p.To)
  3419  
  3420  			case Zmb_r:
  3421  				bytereg(&p.From, &p.Ft)
  3422  				fallthrough
  3423  
  3424  			case Zm_r:
  3425  				ctxt.AsmBuf.Put1(byte(op))
  3426  				asmand(ctxt, p, &p.From, &p.To)
  3427  
  3428  			case Zm2_r:
  3429  				ctxt.AsmBuf.Put2(byte(op), byte(o.op[z+1]))
  3430  				asmand(ctxt, p, &p.From, &p.To)
  3431  
  3432  			case Zm_r_xm:
  3433  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3434  				asmand(ctxt, p, &p.From, &p.To)
  3435  
  3436  			case Zm_r_xm_nr:
  3437  				ctxt.Rexflag = 0
  3438  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3439  				asmand(ctxt, p, &p.From, &p.To)
  3440  
  3441  			case Zm_r_i_xm:
  3442  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3443  				asmand(ctxt, p, &p.From, p.From3)
  3444  				ctxt.AsmBuf.Put1(byte(p.To.Offset))
  3445  
  3446  			case Zibm_r, Zibr_m:
  3447  				for {
  3448  					tmp1 := z
  3449  					z++
  3450  					op = int(o.op[tmp1])
  3451  					if op == 0 {
  3452  						break
  3453  					}
  3454  					ctxt.AsmBuf.Put1(byte(op))
  3455  				}
  3456  				if yt.zcase == Zibr_m {
  3457  					asmand(ctxt, p, &p.To, p.From3)
  3458  				} else {
  3459  					asmand(ctxt, p, p.From3, &p.To)
  3460  				}
  3461  				ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3462  
  3463  			case Zaut_r:
  3464  				ctxt.AsmBuf.Put1(0x8d) // leal
  3465  				if p.From.Type != obj.TYPE_ADDR {
  3466  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3467  				}
  3468  				p.From.Type = obj.TYPE_MEM
  3469  				asmand(ctxt, p, &p.From, &p.To)
  3470  				p.From.Type = obj.TYPE_ADDR
  3471  
  3472  			case Zm_o:
  3473  				ctxt.AsmBuf.Put1(byte(op))
  3474  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3475  
  3476  			case Zr_m:
  3477  				ctxt.AsmBuf.Put1(byte(op))
  3478  				asmand(ctxt, p, &p.To, &p.From)
  3479  
  3480  			case Zvex_rm_v_r:
  3481  				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
  3482  				asmand(ctxt, p, &p.From, &p.To)
  3483  
  3484  			case Zvex_v_rm_r:
  3485  				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
  3486  				asmand(ctxt, p, p.From3, &p.To)
  3487  
  3488  			case Zvex_r_v_rm:
  3489  				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
  3490  				asmand(ctxt, p, &p.To, &p.From)
  3491  
  3492  			case Zr_m_xm:
  3493  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3494  				asmand(ctxt, p, &p.To, &p.From)
  3495  
  3496  			case Zr_m_xm_nr:
  3497  				ctxt.Rexflag = 0
  3498  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3499  				asmand(ctxt, p, &p.To, &p.From)
  3500  
  3501  			case Zo_m:
  3502  				ctxt.AsmBuf.Put1(byte(op))
  3503  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3504  
  3505  			case Zcallindreg:
  3506  				r = obj.Addrel(ctxt.Cursym)
  3507  				r.Off = int32(p.Pc)
  3508  				r.Type = obj.R_CALLIND
  3509  				r.Siz = 0
  3510  				fallthrough
  3511  
  3512  			case Zo_m64:
  3513  				ctxt.AsmBuf.Put1(byte(op))
  3514  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3515  
  3516  			case Zm_ibo:
  3517  				ctxt.AsmBuf.Put1(byte(op))
  3518  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3519  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  3520  
  3521  			case Zibo_m:
  3522  				ctxt.AsmBuf.Put1(byte(op))
  3523  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3524  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3525  
  3526  			case Zibo_m_xm:
  3527  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3528  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3529  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3530  
  3531  			case Z_ib, Zib_:
  3532  				if yt.zcase == Zib_ {
  3533  					a = &p.From
  3534  				} else {
  3535  					a = &p.To
  3536  				}
  3537  				ctxt.AsmBuf.Put1(byte(op))
  3538  				if p.As == AXABORT {
  3539  					ctxt.AsmBuf.Put1(byte(o.op[z+1]))
  3540  				}
  3541  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
  3542  
  3543  			case Zib_rp:
  3544  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3545  				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  3546  
  3547  			case Zil_rp:
  3548  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3549  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3550  				if o.prefix == Pe {
  3551  					v = vaddr(ctxt, p, &p.From, nil)
  3552  					ctxt.AsmBuf.PutInt16(int16(v))
  3553  				} else {
  3554  					relput4(ctxt, p, &p.From)
  3555  				}
  3556  
  3557  			case Zo_iw:
  3558  				ctxt.AsmBuf.Put1(byte(op))
  3559  				if p.From.Type != obj.TYPE_NONE {
  3560  					v = vaddr(ctxt, p, &p.From, nil)
  3561  					ctxt.AsmBuf.PutInt16(int16(v))
  3562  				}
  3563  
  3564  			case Ziq_rp:
  3565  				v = vaddr(ctxt, p, &p.From, &rel)
  3566  				l = int(v >> 32)
  3567  				if l == 0 && rel.Siz != 8 {
  3568  					//p->mark |= 0100;
  3569  					//print("zero: %llux %v\n", v, p);
  3570  					ctxt.Rexflag &^= (0x40 | Rxw)
  3571  
  3572  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3573  					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
  3574  					if rel.Type != 0 {
  3575  						r = obj.Addrel(ctxt.Cursym)
  3576  						*r = rel
  3577  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3578  					}
  3579  
  3580  					ctxt.AsmBuf.PutInt32(int32(v))
  3581  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3582  
  3583  					//p->mark |= 0100;
  3584  					//print("sign: %llux %v\n", v, p);
  3585  					ctxt.AsmBuf.Put1(0xc7)
  3586  					asmando(ctxt, p, &p.To, 0)
  3587  
  3588  					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
  3589  				} else {
  3590  					//print("all: %llux %v\n", v, p);
  3591  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3592  					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3593  					if rel.Type != 0 {
  3594  						r = obj.Addrel(ctxt.Cursym)
  3595  						*r = rel
  3596  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3597  					}
  3598  
  3599  					ctxt.AsmBuf.PutInt64(v)
  3600  				}
  3601  
  3602  			case Zib_rr:
  3603  				ctxt.AsmBuf.Put1(byte(op))
  3604  				asmand(ctxt, p, &p.To, &p.To)
  3605  				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  3606  
  3607  			case Z_il, Zil_:
  3608  				if yt.zcase == Zil_ {
  3609  					a = &p.From
  3610  				} else {
  3611  					a = &p.To
  3612  				}
  3613  				ctxt.AsmBuf.Put1(byte(op))
  3614  				if o.prefix == Pe {
  3615  					v = vaddr(ctxt, p, a, nil)
  3616  					ctxt.AsmBuf.PutInt16(int16(v))
  3617  				} else {
  3618  					relput4(ctxt, p, a)
  3619  				}
  3620  
  3621  			case Zm_ilo, Zilo_m:
  3622  				ctxt.AsmBuf.Put1(byte(op))
  3623  				if yt.zcase == Zilo_m {
  3624  					a = &p.From
  3625  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3626  				} else {
  3627  					a = &p.To
  3628  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3629  				}
  3630  
  3631  				if o.prefix == Pe {
  3632  					v = vaddr(ctxt, p, a, nil)
  3633  					ctxt.AsmBuf.PutInt16(int16(v))
  3634  				} else {
  3635  					relput4(ctxt, p, a)
  3636  				}
  3637  
  3638  			case Zil_rr:
  3639  				ctxt.AsmBuf.Put1(byte(op))
  3640  				asmand(ctxt, p, &p.To, &p.To)
  3641  				if o.prefix == Pe {
  3642  					v = vaddr(ctxt, p, &p.From, nil)
  3643  					ctxt.AsmBuf.PutInt16(int16(v))
  3644  				} else {
  3645  					relput4(ctxt, p, &p.From)
  3646  				}
  3647  
  3648  			case Z_rp:
  3649  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3650  				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
  3651  
  3652  			case Zrp_:
  3653  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3654  				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
  3655  
  3656  			case Zclr:
  3657  				ctxt.Rexflag &^= Pw
  3658  				ctxt.AsmBuf.Put1(byte(op))
  3659  				asmand(ctxt, p, &p.To, &p.To)
  3660  
  3661  			case Zcallcon, Zjmpcon:
  3662  				if yt.zcase == Zcallcon {
  3663  					ctxt.AsmBuf.Put1(byte(op))
  3664  				} else {
  3665  					ctxt.AsmBuf.Put1(byte(o.op[z+1]))
  3666  				}
  3667  				r = obj.Addrel(ctxt.Cursym)
  3668  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3669  				r.Type = obj.R_PCREL
  3670  				r.Siz = 4
  3671  				r.Add = p.To.Offset
  3672  				ctxt.AsmBuf.PutInt32(0)
  3673  
  3674  			case Zcallind:
  3675  				ctxt.AsmBuf.Put2(byte(op), byte(o.op[z+1]))
  3676  				r = obj.Addrel(ctxt.Cursym)
  3677  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3678  				r.Type = obj.R_ADDR
  3679  				r.Siz = 4
  3680  				r.Add = p.To.Offset
  3681  				r.Sym = p.To.Sym
  3682  				ctxt.AsmBuf.PutInt32(0)
  3683  
  3684  			case Zcall, Zcallduff:
  3685  				if p.To.Sym == nil {
  3686  					ctxt.Diag("call without target")
  3687  					log.Fatalf("bad code")
  3688  				}
  3689  
  3690  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3691  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3692  				}
  3693  
  3694  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3695  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3696  					// (the call jumps into the middle of the function).
  3697  					// This makes it possible to see call sites for duffcopy/duffzero in
  3698  					// BP-based profiling tools like Linux perf (which is the
  3699  					// whole point of obj.Framepointer_enabled).
  3700  					// MOVQ BP, -16(SP)
  3701  					// LEAQ -16(SP), BP
  3702  					ctxt.AsmBuf.Put(bpduff1)
  3703  				}
  3704  				ctxt.AsmBuf.Put1(byte(op))
  3705  				r = obj.Addrel(ctxt.Cursym)
  3706  				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3707  				r.Sym = p.To.Sym
  3708  				r.Add = p.To.Offset
  3709  				r.Type = obj.R_CALL
  3710  				r.Siz = 4
  3711  				ctxt.AsmBuf.PutInt32(0)
  3712  
  3713  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3714  					// Pop BP pushed above.
  3715  					// MOVQ 0(BP), BP
  3716  					ctxt.AsmBuf.Put(bpduff2)
  3717  				}
  3718  
  3719  			// TODO: jump across functions needs reloc
  3720  			case Zbr, Zjmp, Zloop:
  3721  				if p.As == AXBEGIN {
  3722  					ctxt.AsmBuf.Put1(byte(op))
  3723  				}
  3724  				if p.To.Sym != nil {
  3725  					if yt.zcase != Zjmp {
  3726  						ctxt.Diag("branch to ATEXT")
  3727  						log.Fatalf("bad code")
  3728  					}
  3729  
  3730  					ctxt.AsmBuf.Put1(byte(o.op[z+1]))
  3731  					r = obj.Addrel(ctxt.Cursym)
  3732  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3733  					r.Sym = p.To.Sym
  3734  					r.Type = obj.R_PCREL
  3735  					r.Siz = 4
  3736  					ctxt.AsmBuf.PutInt32(0)
  3737  					break
  3738  				}
  3739  
  3740  				// Assumes q is in this function.
  3741  				// TODO: Check in input, preserve in brchain.
  3742  
  3743  				// Fill in backward jump now.
  3744  				q = p.Pcond
  3745  
  3746  				if q == nil {
  3747  					ctxt.Diag("jmp/branch/loop without target")
  3748  					log.Fatalf("bad code")
  3749  				}
  3750  
  3751  				if p.Back&1 != 0 {
  3752  					v = q.Pc - (p.Pc + 2)
  3753  					if v >= -128 && p.As != AXBEGIN {
  3754  						if p.As == AJCXZL {
  3755  							ctxt.AsmBuf.Put1(0x67)
  3756  						}
  3757  						ctxt.AsmBuf.Put2(byte(op), byte(v))
  3758  					} else if yt.zcase == Zloop {
  3759  						ctxt.Diag("loop too far: %v", p)
  3760  					} else {
  3761  						v -= 5 - 2
  3762  						if p.As == AXBEGIN {
  3763  							v--
  3764  						}
  3765  						if yt.zcase == Zbr {
  3766  							ctxt.AsmBuf.Put1(0x0f)
  3767  							v--
  3768  						}
  3769  
  3770  						ctxt.AsmBuf.Put1(byte(o.op[z+1]))
  3771  						ctxt.AsmBuf.PutInt32(int32(v))
  3772  					}
  3773  
  3774  					break
  3775  				}
  3776  
  3777  				// Annotate target; will fill in later.
  3778  				p.Forwd = q.Rel
  3779  
  3780  				q.Rel = p
  3781  				if p.Back&2 != 0 && p.As != AXBEGIN { // short
  3782  					if p.As == AJCXZL {
  3783  						ctxt.AsmBuf.Put1(0x67)
  3784  					}
  3785  					ctxt.AsmBuf.Put2(byte(op), 0)
  3786  				} else if yt.zcase == Zloop {
  3787  					ctxt.Diag("loop too far: %v", p)
  3788  				} else {
  3789  					if yt.zcase == Zbr {
  3790  						ctxt.AsmBuf.Put1(0x0f)
  3791  					}
  3792  					ctxt.AsmBuf.Put1(byte(o.op[z+1]))
  3793  					ctxt.AsmBuf.PutInt32(0)
  3794  				}
  3795  
  3796  				break
  3797  
  3798  			/*
  3799  				v = q->pc - p->pc - 2;
  3800  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3801  					*ctxt->andptr++ = op;
  3802  					*ctxt->andptr++ = v;
  3803  				} else {
  3804  					v -= 5-2;
  3805  					if(yt.zcase == Zbr) {
  3806  						*ctxt->andptr++ = 0x0f;
  3807  						v--;
  3808  					}
  3809  					*ctxt->andptr++ = o->op[z+1];
  3810  					*ctxt->andptr++ = v;
  3811  					*ctxt->andptr++ = v>>8;
  3812  					*ctxt->andptr++ = v>>16;
  3813  					*ctxt->andptr++ = v>>24;
  3814  				}
  3815  			*/
  3816  
  3817  			case Zbyte:
  3818  				v = vaddr(ctxt, p, &p.From, &rel)
  3819  				if rel.Siz != 0 {
  3820  					rel.Siz = uint8(op)
  3821  					r = obj.Addrel(ctxt.Cursym)
  3822  					*r = rel
  3823  					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3824  				}
  3825  
  3826  				ctxt.AsmBuf.Put1(byte(v))
  3827  				if op > 1 {
  3828  					ctxt.AsmBuf.Put1(byte(v >> 8))
  3829  					if op > 2 {
  3830  						ctxt.AsmBuf.PutInt16(int16(v >> 16))
  3831  						if op > 4 {
  3832  							ctxt.AsmBuf.PutInt32(int32(v >> 32))
  3833  						}
  3834  					}
  3835  				}
  3836  			}
  3837  
  3838  			return
  3839  		}
  3840  		z += int(yt.zoffset) + xo
  3841  	}
  3842  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3843  		var pp obj.Prog
  3844  		var t []byte
  3845  		if p.As == mo[0].as {
  3846  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3847  				t = mo[0].op[:]
  3848  				switch mo[0].code {
  3849  				default:
  3850  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3851  
  3852  				case 0: /* lit */
  3853  					for z = 0; t[z] != E; z++ {
  3854  						ctxt.AsmBuf.Put1(t[z])
  3855  					}
  3856  
  3857  				case 1: /* r,m */
  3858  					ctxt.AsmBuf.Put1(t[0])
  3859  					asmando(ctxt, p, &p.To, int(t[1]))
  3860  
  3861  				case 2: /* m,r */
  3862  					ctxt.AsmBuf.Put1(t[0])
  3863  					asmando(ctxt, p, &p.From, int(t[1]))
  3864  
  3865  				case 3: /* r,m - 2op */
  3866  					ctxt.AsmBuf.Put2(t[0], t[1])
  3867  					asmando(ctxt, p, &p.To, int(t[2]))
  3868  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3869  
  3870  				case 4: /* m,r - 2op */
  3871  					ctxt.AsmBuf.Put2(t[0], t[1])
  3872  					asmando(ctxt, p, &p.From, int(t[2]))
  3873  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3874  
  3875  				case 5: /* load full pointer, trash heap */
  3876  					if t[0] != 0 {
  3877  						ctxt.AsmBuf.Put1(t[0])
  3878  					}
  3879  					switch p.To.Index {
  3880  					default:
  3881  						goto bad
  3882  
  3883  					case REG_DS:
  3884  						ctxt.AsmBuf.Put1(0xc5)
  3885  
  3886  					case REG_SS:
  3887  						ctxt.AsmBuf.Put2(0x0f, 0xb2)
  3888  
  3889  					case REG_ES:
  3890  						ctxt.AsmBuf.Put1(0xc4)
  3891  
  3892  					case REG_FS:
  3893  						ctxt.AsmBuf.Put2(0x0f, 0xb4)
  3894  
  3895  					case REG_GS:
  3896  						ctxt.AsmBuf.Put2(0x0f, 0xb5)
  3897  					}
  3898  
  3899  					asmand(ctxt, p, &p.From, &p.To)
  3900  
  3901  				case 6: /* double shift */
  3902  					if t[0] == Pw {
  3903  						if p.Mode != 64 {
  3904  							ctxt.Diag("asmins: illegal 64: %v", p)
  3905  						}
  3906  						ctxt.Rexflag |= Pw
  3907  						t = t[1:]
  3908  					} else if t[0] == Pe {
  3909  						ctxt.AsmBuf.Put1(Pe)
  3910  						t = t[1:]
  3911  					}
  3912  
  3913  					switch p.From.Type {
  3914  					default:
  3915  						goto bad
  3916  
  3917  					case obj.TYPE_CONST:
  3918  						ctxt.AsmBuf.Put2(0x0f, t[0])
  3919  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3920  						ctxt.AsmBuf.Put1(byte(p.From.Offset))
  3921  
  3922  					case obj.TYPE_REG:
  3923  						switch p.From.Reg {
  3924  						default:
  3925  							goto bad
  3926  
  3927  						case REG_CL, REG_CX:
  3928  							ctxt.AsmBuf.Put2(0x0f, t[1])
  3929  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3930  						}
  3931  					}
  3932  
  3933  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3934  				// where you load the TLS base register into a register and then index off that
  3935  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3936  				// are handled in prefixof above and should not be listed here.
  3937  				case 7: /* mov tls, r */
  3938  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3939  						ctxt.Diag("invalid load of TLS: %v", p)
  3940  					}
  3941  
  3942  					if p.Mode == 32 {
  3943  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3944  						// where you load the TLS base register into a register and then index off that
  3945  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3946  						// are handled in prefixof above and should not be listed here.
  3947  						switch ctxt.Headtype {
  3948  						default:
  3949  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3950  
  3951  						case obj.Hlinux,
  3952  							obj.Hnacl:
  3953  							if ctxt.Flag_shared != 0 {
  3954  								// Note that this is not generating the same insns as the other cases.
  3955  								//     MOV TLS, R_to
  3956  								// becomes
  3957  								//     call __x86.get_pc_thunk.cx
  3958  								//     movl (gotpc + g@gotntpoff)(%ecx),$R_To
  3959  								// which is encoded as
  3960  								//     call __x86.get_pc_thunk.cx
  3961  								//     movq 0(%ecx), R_to
  3962  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  3963  								// is g, which we can't check here, but will when we assemble the second
  3964  								// instruction.
  3965  								ctxt.AsmBuf.Put1(0xe8)
  3966  								r = obj.Addrel(ctxt.Cursym)
  3967  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3968  								r.Type = obj.R_CALL
  3969  								r.Siz = 4
  3970  								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk.cx", 0)
  3971  								ctxt.AsmBuf.PutInt32(0)
  3972  
  3973  								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[REG_CX]|(reg[p.To.Reg]<<3)))
  3974  								r = obj.Addrel(ctxt.Cursym)
  3975  								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  3976  								r.Type = obj.R_TLS_IE
  3977  								r.Siz = 4
  3978  								r.Add = 2
  3979  								ctxt.AsmBuf.PutInt32(0)
  3980  							} else {
  3981  								// ELF TLS base is 0(GS).
  3982  								pp.From = p.From
  3983  
  3984  								pp.From.Type = obj.TYPE_MEM
  3985  								pp.From.Reg = REG_GS
  3986  								pp.From.Offset = 0
  3987  								pp.From.Index = REG_NONE
  3988  								pp.From.Scale = 0
  3989  								ctxt.AsmBuf.Put2(0x65, // GS
  3990  									0x8B)
  3991  								asmand(ctxt, p, &pp.From, &p.To)
  3992  							}
  3993  						case obj.Hplan9:
  3994  							if ctxt.Plan9privates == nil {
  3995  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3996  							}
  3997  							pp.From = obj.Addr{}
  3998  							pp.From.Type = obj.TYPE_MEM
  3999  							pp.From.Name = obj.NAME_EXTERN
  4000  							pp.From.Sym = ctxt.Plan9privates
  4001  							pp.From.Offset = 0
  4002  							pp.From.Index = REG_NONE
  4003  							ctxt.AsmBuf.Put1(0x8B)
  4004  							asmand(ctxt, p, &pp.From, &p.To)
  4005  
  4006  						case obj.Hwindows:
  4007  							// Windows TLS base is always 0x14(FS).
  4008  							pp.From = p.From
  4009  
  4010  							pp.From.Type = obj.TYPE_MEM
  4011  							pp.From.Reg = REG_FS
  4012  							pp.From.Offset = 0x14
  4013  							pp.From.Index = REG_NONE
  4014  							pp.From.Scale = 0
  4015  							ctxt.AsmBuf.Put2(0x64, // FS
  4016  								0x8B)
  4017  							asmand(ctxt, p, &pp.From, &p.To)
  4018  						}
  4019  						break
  4020  					}
  4021  
  4022  					switch ctxt.Headtype {
  4023  					default:
  4024  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  4025  
  4026  					case obj.Hlinux:
  4027  						if ctxt.Flag_shared == 0 {
  4028  							log.Fatalf("unknown TLS base location for linux without -shared")
  4029  						}
  4030  						// Note that this is not generating the same insn as the other cases.
  4031  						//     MOV TLS, R_to
  4032  						// becomes
  4033  						//     movq g@gottpoff(%rip), R_to
  4034  						// which is encoded as
  4035  						//     movq 0(%rip), R_to
  4036  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4037  						// is g, which we can't check here, but will when we assemble the second
  4038  						// instruction.
  4039  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4040  
  4041  						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4042  						r = obj.Addrel(ctxt.Cursym)
  4043  						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
  4044  						r.Type = obj.R_TLS_IE
  4045  						r.Siz = 4
  4046  						r.Add = -4
  4047  						ctxt.AsmBuf.PutInt32(0)
  4048  
  4049  					case obj.Hplan9:
  4050  						if ctxt.Plan9privates == nil {
  4051  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  4052  						}
  4053  						pp.From = obj.Addr{}
  4054  						pp.From.Type = obj.TYPE_MEM
  4055  						pp.From.Name = obj.NAME_EXTERN
  4056  						pp.From.Sym = ctxt.Plan9privates
  4057  						pp.From.Offset = 0
  4058  						pp.From.Index = REG_NONE
  4059  						ctxt.Rexflag |= Pw
  4060  						ctxt.AsmBuf.Put1(0x8B)
  4061  						asmand(ctxt, p, &pp.From, &p.To)
  4062  
  4063  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4064  						// TLS base is 0(FS).
  4065  						pp.From = p.From
  4066  
  4067  						pp.From.Type = obj.TYPE_MEM
  4068  						pp.From.Name = obj.NAME_NONE
  4069  						pp.From.Reg = REG_NONE
  4070  						pp.From.Offset = 0
  4071  						pp.From.Index = REG_NONE
  4072  						pp.From.Scale = 0
  4073  						ctxt.Rexflag |= Pw
  4074  						ctxt.AsmBuf.Put2(0x64, // FS
  4075  							0x8B)
  4076  						asmand(ctxt, p, &pp.From, &p.To)
  4077  
  4078  					case obj.Hwindows:
  4079  						// Windows TLS base is always 0x28(GS).
  4080  						pp.From = p.From
  4081  
  4082  						pp.From.Type = obj.TYPE_MEM
  4083  						pp.From.Name = obj.NAME_NONE
  4084  						pp.From.Reg = REG_GS
  4085  						pp.From.Offset = 0x28
  4086  						pp.From.Index = REG_NONE
  4087  						pp.From.Scale = 0
  4088  						ctxt.Rexflag |= Pw
  4089  						ctxt.AsmBuf.Put2(0x65, // GS
  4090  							0x8B)
  4091  						asmand(ctxt, p, &pp.From, &p.To)
  4092  					}
  4093  				}
  4094  				return
  4095  			}
  4096  		}
  4097  	}
  4098  	goto bad
  4099  
  4100  bad:
  4101  	if p.Mode != 64 {
  4102  		/*
  4103  		 * here, the assembly has failed.
  4104  		 * if its a byte instruction that has
  4105  		 * unaddressable registers, try to
  4106  		 * exchange registers and reissue the
  4107  		 * instruction with the operands renamed.
  4108  		 */
  4109  		pp := *p
  4110  
  4111  		unbytereg(&pp.From, &pp.Ft)
  4112  		unbytereg(&pp.To, &pp.Tt)
  4113  
  4114  		z := int(p.From.Reg)
  4115  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4116  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4117  			// For now, different to keep bit-for-bit compatibility.
  4118  			if p.Mode == 32 {
  4119  				breg := byteswapreg(ctxt, &p.To)
  4120  				if breg != REG_AX {
  4121  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4122  					asmando(ctxt, p, &p.From, reg[breg])
  4123  					subreg(&pp, z, breg)
  4124  					doasm(ctxt, &pp)
  4125  					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4126  					asmando(ctxt, p, &p.From, reg[breg])
  4127  				} else {
  4128  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4129  					subreg(&pp, z, REG_AX)
  4130  					doasm(ctxt, &pp)
  4131  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4132  				}
  4133  				return
  4134  			}
  4135  
  4136  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4137  				// We certainly don't want to exchange
  4138  				// with AX if the op is MUL or DIV.
  4139  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4140  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4141  				subreg(&pp, z, REG_BX)
  4142  				doasm(ctxt, &pp)
  4143  				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
  4144  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4145  			} else {
  4146  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4147  				subreg(&pp, z, REG_AX)
  4148  				doasm(ctxt, &pp)
  4149  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4150  			}
  4151  			return
  4152  		}
  4153  
  4154  		z = int(p.To.Reg)
  4155  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4156  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4157  			// For now, different to keep bit-for-bit compatibility.
  4158  			if p.Mode == 32 {
  4159  				breg := byteswapreg(ctxt, &p.From)
  4160  				if breg != REG_AX {
  4161  					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
  4162  					asmando(ctxt, p, &p.To, reg[breg])
  4163  					subreg(&pp, z, breg)
  4164  					doasm(ctxt, &pp)
  4165  					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4166  					asmando(ctxt, p, &p.To, reg[breg])
  4167  				} else {
  4168  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4169  					subreg(&pp, z, REG_AX)
  4170  					doasm(ctxt, &pp)
  4171  					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4172  				}
  4173  				return
  4174  			}
  4175  
  4176  			if isax(&p.From) {
  4177  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4178  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4179  				subreg(&pp, z, REG_BX)
  4180  				doasm(ctxt, &pp)
  4181  				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
  4182  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4183  			} else {
  4184  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4185  				subreg(&pp, z, REG_AX)
  4186  				doasm(ctxt, &pp)
  4187  				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4188  			}
  4189  			return
  4190  		}
  4191  	}
  4192  
  4193  	ctxt.Diag("invalid instruction: %v", p)
  4194  	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4195  	return
  4196  }
  4197  
  4198  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4199  // which is not referenced in a.
  4200  // If a is empty, it returns BX to account for MULB-like instructions
  4201  // that might use DX and AX.
  4202  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4203  	cand := 1
  4204  	canc := cand
  4205  	canb := canc
  4206  	cana := canb
  4207  
  4208  	if a.Type == obj.TYPE_NONE {
  4209  		cand = 0
  4210  		cana = cand
  4211  	}
  4212  
  4213  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4214  		switch a.Reg {
  4215  		case REG_NONE:
  4216  			cand = 0
  4217  			cana = cand
  4218  
  4219  		case REG_AX, REG_AL, REG_AH:
  4220  			cana = 0
  4221  
  4222  		case REG_BX, REG_BL, REG_BH:
  4223  			canb = 0
  4224  
  4225  		case REG_CX, REG_CL, REG_CH:
  4226  			canc = 0
  4227  
  4228  		case REG_DX, REG_DL, REG_DH:
  4229  			cand = 0
  4230  		}
  4231  	}
  4232  
  4233  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4234  		switch a.Index {
  4235  		case REG_AX:
  4236  			cana = 0
  4237  
  4238  		case REG_BX:
  4239  			canb = 0
  4240  
  4241  		case REG_CX:
  4242  			canc = 0
  4243  
  4244  		case REG_DX:
  4245  			cand = 0
  4246  		}
  4247  	}
  4248  
  4249  	if cana != 0 {
  4250  		return REG_AX
  4251  	}
  4252  	if canb != 0 {
  4253  		return REG_BX
  4254  	}
  4255  	if canc != 0 {
  4256  		return REG_CX
  4257  	}
  4258  	if cand != 0 {
  4259  		return REG_DX
  4260  	}
  4261  
  4262  	ctxt.Diag("impossible byte register")
  4263  	log.Fatalf("bad code")
  4264  	return 0
  4265  }
  4266  
  4267  func isbadbyte(a *obj.Addr) bool {
  4268  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4269  }
  4270  
  4271  var naclret = []uint8{
  4272  	0x5e, // POPL SI
  4273  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4274  	0x83,
  4275  	0xe6,
  4276  	0xe0, // ANDL $~31, SI
  4277  	0x4c,
  4278  	0x01,
  4279  	0xfe, // ADDQ R15, SI
  4280  	0xff,
  4281  	0xe6, // JMP SI
  4282  }
  4283  
  4284  var naclret8 = []uint8{
  4285  	0x5d, // POPL BP
  4286  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4287  	0x83,
  4288  	0xe5,
  4289  	0xe0, // ANDL $~31, BP
  4290  	0xff,
  4291  	0xe5, // JMP BP
  4292  }
  4293  
  4294  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4295  
  4296  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4297  
  4298  var naclmovs = []uint8{
  4299  	0x89,
  4300  	0xf6, // MOVL SI, SI
  4301  	0x49,
  4302  	0x8d,
  4303  	0x34,
  4304  	0x37, // LEAQ (R15)(SI*1), SI
  4305  	0x89,
  4306  	0xff, // MOVL DI, DI
  4307  	0x49,
  4308  	0x8d,
  4309  	0x3c,
  4310  	0x3f, // LEAQ (R15)(DI*1), DI
  4311  }
  4312  
  4313  var naclstos = []uint8{
  4314  	0x89,
  4315  	0xff, // MOVL DI, DI
  4316  	0x49,
  4317  	0x8d,
  4318  	0x3c,
  4319  	0x3f, // LEAQ (R15)(DI*1), DI
  4320  }
  4321  
  4322  func nacltrunc(ctxt *obj.Link, reg int) {
  4323  	if reg >= REG_R8 {
  4324  		ctxt.AsmBuf.Put1(0x45)
  4325  	}
  4326  	reg = (reg - REG_AX) & 7
  4327  	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
  4328  }
  4329  
  4330  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4331  	ctxt.AsmBuf.Reset()
  4332  	ctxt.Asmode = int(p.Mode)
  4333  
  4334  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4335  		switch p.As {
  4336  		case obj.ARET:
  4337  			ctxt.AsmBuf.Put(naclret8)
  4338  			return
  4339  
  4340  		case obj.ACALL,
  4341  			obj.AJMP:
  4342  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4343  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4344  			}
  4345  
  4346  		case AINT:
  4347  			ctxt.AsmBuf.Put1(0xf4)
  4348  			return
  4349  		}
  4350  	}
  4351  
  4352  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4353  		if p.As == AREP {
  4354  			ctxt.Rep++
  4355  			return
  4356  		}
  4357  
  4358  		if p.As == AREPN {
  4359  			ctxt.Repn++
  4360  			return
  4361  		}
  4362  
  4363  		if p.As == ALOCK {
  4364  			ctxt.Lock++
  4365  			return
  4366  		}
  4367  
  4368  		if p.As != ALEAQ && p.As != ALEAL {
  4369  			if p.From.Index != REG_NONE && p.From.Scale > 0 {
  4370  				nacltrunc(ctxt, int(p.From.Index))
  4371  			}
  4372  			if p.To.Index != REG_NONE && p.To.Scale > 0 {
  4373  				nacltrunc(ctxt, int(p.To.Index))
  4374  			}
  4375  		}
  4376  
  4377  		switch p.As {
  4378  		case obj.ARET:
  4379  			ctxt.AsmBuf.Put(naclret)
  4380  			return
  4381  
  4382  		case obj.ACALL,
  4383  			obj.AJMP:
  4384  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4385  				// ANDL $~31, reg
  4386  				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
  4387  				// ADDQ R15, reg
  4388  				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
  4389  			}
  4390  
  4391  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4392  				// ANDL $~31, reg
  4393  				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
  4394  				// ADDQ R15, reg
  4395  				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
  4396  			}
  4397  
  4398  		case AINT:
  4399  			ctxt.AsmBuf.Put1(0xf4)
  4400  			return
  4401  
  4402  		case ASCASB,
  4403  			ASCASW,
  4404  			ASCASL,
  4405  			ASCASQ,
  4406  			ASTOSB,
  4407  			ASTOSW,
  4408  			ASTOSL,
  4409  			ASTOSQ:
  4410  			ctxt.AsmBuf.Put(naclstos)
  4411  
  4412  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4413  			ctxt.AsmBuf.Put(naclmovs)
  4414  		}
  4415  
  4416  		if ctxt.Rep != 0 {
  4417  			ctxt.AsmBuf.Put1(0xf3)
  4418  			ctxt.Rep = 0
  4419  		}
  4420  
  4421  		if ctxt.Repn != 0 {
  4422  			ctxt.AsmBuf.Put1(0xf2)
  4423  			ctxt.Repn = 0
  4424  		}
  4425  
  4426  		if ctxt.Lock != 0 {
  4427  			ctxt.AsmBuf.Put1(0xf0)
  4428  			ctxt.Lock = 0
  4429  		}
  4430  	}
  4431  
  4432  	ctxt.Rexflag = 0
  4433  	ctxt.Vexflag = 0
  4434  	mark := ctxt.AsmBuf.Len()
  4435  	ctxt.Asmode = int(p.Mode)
  4436  	doasm(ctxt, p)
  4437  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4438  		/*
  4439  		 * as befits the whole approach of the architecture,
  4440  		 * the rex prefix must appear before the first opcode byte
  4441  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4442  		 * before the 0f opcode escape!), or it might be ignored.
  4443  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4444  		 */
  4445  		if p.Mode != 64 {
  4446  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4447  		}
  4448  		n := ctxt.AsmBuf.Len()
  4449  		var np int
  4450  		for np = mark; np < n; np++ {
  4451  			c := ctxt.AsmBuf.Peek(np)
  4452  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4453  				break
  4454  			}
  4455  		}
  4456  		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
  4457  	}
  4458  
  4459  	n := ctxt.AsmBuf.Len()
  4460  	var r *obj.Reloc
  4461  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4462  		r = &ctxt.Cursym.R[i:][0]
  4463  		if int64(r.Off) < p.Pc {
  4464  			break
  4465  		}
  4466  		if ctxt.Rexflag != 0 {
  4467  			r.Off++
  4468  		}
  4469  		if r.Type == obj.R_PCREL {
  4470  			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4471  				// PC-relative addressing is relative to the end of the instruction,
  4472  				// but the relocations applied by the linker are relative to the end
  4473  				// of the relocation. Because immediate instruction
  4474  				// arguments can follow the PC-relative memory reference in the
  4475  				// instruction encoding, the two may not coincide. In this case,
  4476  				// adjust addend so that linker can keep relocating relative to the
  4477  				// end of the relocation.
  4478  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4479  			} else if p.Mode == 32 {
  4480  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  4481  				// assumes that the previous instruction loaded the PC of the end
  4482  				// of that instruction into CX, so the adjustment is relative to
  4483  				// that.
  4484  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4485  			}
  4486  		}
  4487  		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
  4488  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4489  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4490  		}
  4491  
  4492  	}
  4493  
  4494  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4495  		switch p.To.Reg {
  4496  		case REG_SP:
  4497  			ctxt.AsmBuf.Put(naclspfix)
  4498  		case REG_BP:
  4499  			ctxt.AsmBuf.Put(naclbpfix)
  4500  		}
  4501  	}
  4502  }