github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/internal/obj/x86/asm6.go (about)

     1  // Inferno utils/6l/span.c
     2  // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors.  All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"fmt"
    36  	"log"
    37  	"strings"
    38  )
    39  
    40  // Instruction layout.
    41  
    42  const (
    43  	// Loop alignment constants:
    44  	// want to align loop entry to LoopAlign-byte boundary,
    45  	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
    46  	// We define a loop entry as the target of a backward jump.
    47  	//
    48  	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
    49  	// and it aligns all jump targets, not just backward jump targets.
    50  	//
    51  	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
    52  	// is very slight but negative, so the alignment is disabled by
    53  	// setting MaxLoopPad = 0. The code is here for reference and
    54  	// for future experiments.
    55  	//
    56  	LoopAlign  = 16
    57  	MaxLoopPad = 0
    58  	FuncAlign  = 16
    59  )
    60  
    61  type Optab struct {
    62  	as     int16
    63  	ytab   []ytab
    64  	prefix uint8
    65  	op     [23]uint8
    66  }
    67  
    68  type ytab struct {
    69  	from    uint8
    70  	from3   uint8
    71  	to      uint8
    72  	zcase   uint8
    73  	zoffset uint8
    74  }
    75  
    76  type Movtab struct {
    77  	as   int16
    78  	ft   uint8
    79  	f3t  uint8
    80  	tt   uint8
    81  	code uint8
    82  	op   [4]uint8
    83  }
    84  
    85  const (
    86  	Yxxx = iota
    87  	Ynone
    88  	Yi0 // $0
    89  	Yi1 // $1
    90  	Yi8 // $x, x fits in int8
    91  	Yu8 // $x, x fits in uint8
    92  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
    93  	Ys32
    94  	Yi32
    95  	Yi64
    96  	Yiauto
    97  	Yal
    98  	Ycl
    99  	Yax
   100  	Ycx
   101  	Yrb
   102  	Yrl
   103  	Yrl32 // Yrl on 32-bit system
   104  	Yrf
   105  	Yf0
   106  	Yrx
   107  	Ymb
   108  	Yml
   109  	Ym
   110  	Ybr
   111  	Ycs
   112  	Yss
   113  	Yds
   114  	Yes
   115  	Yfs
   116  	Ygs
   117  	Ygdtr
   118  	Yidtr
   119  	Yldtr
   120  	Ymsw
   121  	Ytask
   122  	Ycr0
   123  	Ycr1
   124  	Ycr2
   125  	Ycr3
   126  	Ycr4
   127  	Ycr5
   128  	Ycr6
   129  	Ycr7
   130  	Ycr8
   131  	Ydr0
   132  	Ydr1
   133  	Ydr2
   134  	Ydr3
   135  	Ydr4
   136  	Ydr5
   137  	Ydr6
   138  	Ydr7
   139  	Ytr0
   140  	Ytr1
   141  	Ytr2
   142  	Ytr3
   143  	Ytr4
   144  	Ytr5
   145  	Ytr6
   146  	Ytr7
   147  	Ymr
   148  	Ymm
   149  	Yxr
   150  	Yxm
   151  	Ytls
   152  	Ytextsize
   153  	Yindir
   154  	Ymax
   155  )
   156  
   157  const (
   158  	Zxxx = iota
   159  	Zlit
   160  	Zlitm_r
   161  	Z_rp
   162  	Zbr
   163  	Zcall
   164  	Zcallcon
   165  	Zcallduff
   166  	Zcallind
   167  	Zcallindreg
   168  	Zib_
   169  	Zib_rp
   170  	Zibo_m
   171  	Zibo_m_xm
   172  	Zil_
   173  	Zil_rp
   174  	Ziq_rp
   175  	Zilo_m
   176  	Zjmp
   177  	Zjmpcon
   178  	Zloop
   179  	Zo_iw
   180  	Zm_o
   181  	Zm_r
   182  	Zm2_r
   183  	Zm_r_xm
   184  	Zm_r_xm_vex
   185  	Zm_r_i_xm
   186  	Zm_r_3d
   187  	Zm_r_xm_nr
   188  	Zr_m_xm_nr
   189  	Zibm_r /* mmx1,mmx2/mem64,imm8 */
   190  	Zmb_r
   191  	Zaut_r
   192  	Zo_m
   193  	Zo_m64
   194  	Zpseudo
   195  	Zr_m
   196  	Zr_m_xm
   197  	Zr_m_xm_vex
   198  	Zrp_
   199  	Z_ib
   200  	Z_il
   201  	Zm_ibo
   202  	Zm_ilo
   203  	Zib_rr
   204  	Zil_rr
   205  	Zclr
   206  	Zbyte
   207  	Zmax
   208  )
   209  
   210  const (
   211  	Px    = 0
   212  	Px1   = 1    // symbolic; exact value doesn't matter
   213  	P32   = 0x32 /* 32-bit only */
   214  	Pe    = 0x66 /* operand escape */
   215  	Pm    = 0x0f /* 2byte opcode escape */
   216  	Pq    = 0xff /* both escapes: 66 0f */
   217  	Pb    = 0xfe /* byte operands */
   218  	Pf2   = 0xf2 /* xmm escape 1: f2 0f */
   219  	Pf3   = 0xf3 /* xmm escape 2: f3 0f */
   220  	Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
   221  	Pvex1 = 0xc5 /* 66 escape, vex encoding */
   222  	Pvex2 = 0xc6 /* f3 escape, vex encoding */
   223  	Pw    = 0x48 /* Rex.w */
   224  	Pw8   = 0x90 // symbolic; exact value doesn't matter
   225  	Py    = 0x80 /* defaults to 64-bit mode */
   226  	Py1   = 0x81 // symbolic; exact value doesn't matter
   227  	Py3   = 0x83 // symbolic; exact value doesn't matter
   228  
   229  	Rxw = 1 << 3 /* =1, 64-bit operand size */
   230  	Rxr = 1 << 2 /* extend modrm reg */
   231  	Rxx = 1 << 1 /* extend sib index */
   232  	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
   233  )
   234  
   235  var ycover [Ymax * Ymax]uint8
   236  
   237  var reg [MAXREG]int
   238  
   239  var regrex [MAXREG + 1]int
   240  
   241  var ynone = []ytab{
   242  	{Ynone, Ynone, Ynone, Zlit, 1},
   243  }
   244  
   245  var ytext = []ytab{
   246  	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
   247  	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
   248  }
   249  
   250  var ynop = []ytab{
   251  	{Ynone, Ynone, Ynone, Zpseudo, 0},
   252  	{Ynone, Ynone, Yiauto, Zpseudo, 0},
   253  	{Ynone, Ynone, Yml, Zpseudo, 0},
   254  	{Ynone, Ynone, Yrf, Zpseudo, 0},
   255  	{Ynone, Ynone, Yxr, Zpseudo, 0},
   256  	{Yiauto, Ynone, Ynone, Zpseudo, 0},
   257  	{Yml, Ynone, Ynone, Zpseudo, 0},
   258  	{Yrf, Ynone, Ynone, Zpseudo, 0},
   259  	{Yxr, Ynone, Ynone, Zpseudo, 1},
   260  }
   261  
   262  var yfuncdata = []ytab{
   263  	{Yi32, Ynone, Ym, Zpseudo, 0},
   264  }
   265  
   266  var ypcdata = []ytab{
   267  	{Yi32, Ynone, Yi32, Zpseudo, 0},
   268  }
   269  
   270  var yxorb = []ytab{
   271  	{Yi32, Ynone, Yal, Zib_, 1},
   272  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   273  	{Yrb, Ynone, Ymb, Zr_m, 1},
   274  	{Ymb, Ynone, Yrb, Zm_r, 1},
   275  }
   276  
   277  var yxorl = []ytab{
   278  	{Yi8, Ynone, Yml, Zibo_m, 2},
   279  	{Yi32, Ynone, Yax, Zil_, 1},
   280  	{Yi32, Ynone, Yml, Zilo_m, 2},
   281  	{Yrl, Ynone, Yml, Zr_m, 1},
   282  	{Yml, Ynone, Yrl, Zm_r, 1},
   283  }
   284  
   285  var yaddl = []ytab{
   286  	{Yi8, Ynone, Yml, Zibo_m, 2},
   287  	{Yi32, Ynone, Yax, Zil_, 1},
   288  	{Yi32, Ynone, Yml, Zilo_m, 2},
   289  	{Yrl, Ynone, Yml, Zr_m, 1},
   290  	{Yml, Ynone, Yrl, Zm_r, 1},
   291  }
   292  
   293  var yincb = []ytab{
   294  	{Ynone, Ynone, Ymb, Zo_m, 2},
   295  }
   296  
   297  var yincw = []ytab{
   298  	{Ynone, Ynone, Yml, Zo_m, 2},
   299  }
   300  
   301  var yincl = []ytab{
   302  	{Ynone, Ynone, Yrl, Z_rp, 1},
   303  	{Ynone, Ynone, Yml, Zo_m, 2},
   304  }
   305  
   306  var yincq = []ytab{
   307  	{Ynone, Ynone, Yml, Zo_m, 2},
   308  }
   309  
   310  var ycmpb = []ytab{
   311  	{Yal, Ynone, Yi32, Z_ib, 1},
   312  	{Ymb, Ynone, Yi32, Zm_ibo, 2},
   313  	{Ymb, Ynone, Yrb, Zm_r, 1},
   314  	{Yrb, Ynone, Ymb, Zr_m, 1},
   315  }
   316  
   317  var ycmpl = []ytab{
   318  	{Yml, Ynone, Yi8, Zm_ibo, 2},
   319  	{Yax, Ynone, Yi32, Z_il, 1},
   320  	{Yml, Ynone, Yi32, Zm_ilo, 2},
   321  	{Yml, Ynone, Yrl, Zm_r, 1},
   322  	{Yrl, Ynone, Yml, Zr_m, 1},
   323  }
   324  
   325  var yshb = []ytab{
   326  	{Yi1, Ynone, Ymb, Zo_m, 2},
   327  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   328  	{Ycx, Ynone, Ymb, Zo_m, 2},
   329  }
   330  
   331  var yshl = []ytab{
   332  	{Yi1, Ynone, Yml, Zo_m, 2},
   333  	{Yi32, Ynone, Yml, Zibo_m, 2},
   334  	{Ycl, Ynone, Yml, Zo_m, 2},
   335  	{Ycx, Ynone, Yml, Zo_m, 2},
   336  }
   337  
   338  var ytestb = []ytab{
   339  	{Yi32, Ynone, Yal, Zib_, 1},
   340  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   341  	{Yrb, Ynone, Ymb, Zr_m, 1},
   342  	{Ymb, Ynone, Yrb, Zm_r, 1},
   343  }
   344  
   345  var ytestl = []ytab{
   346  	{Yi32, Ynone, Yax, Zil_, 1},
   347  	{Yi32, Ynone, Yml, Zilo_m, 2},
   348  	{Yrl, Ynone, Yml, Zr_m, 1},
   349  	{Yml, Ynone, Yrl, Zm_r, 1},
   350  }
   351  
   352  var ymovb = []ytab{
   353  	{Yrb, Ynone, Ymb, Zr_m, 1},
   354  	{Ymb, Ynone, Yrb, Zm_r, 1},
   355  	{Yi32, Ynone, Yrb, Zib_rp, 1},
   356  	{Yi32, Ynone, Ymb, Zibo_m, 2},
   357  }
   358  
   359  var ymbs = []ytab{
   360  	{Ymb, Ynone, Ynone, Zm_o, 2},
   361  }
   362  
   363  var ybtl = []ytab{
   364  	{Yi8, Ynone, Yml, Zibo_m, 2},
   365  	{Yrl, Ynone, Yml, Zr_m, 1},
   366  }
   367  
   368  var ymovw = []ytab{
   369  	{Yrl, Ynone, Yml, Zr_m, 1},
   370  	{Yml, Ynone, Yrl, Zm_r, 1},
   371  	{Yi0, Ynone, Yrl, Zclr, 1},
   372  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   373  	{Yi32, Ynone, Yml, Zilo_m, 2},
   374  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   375  }
   376  
   377  var ymovl = []ytab{
   378  	{Yrl, Ynone, Yml, Zr_m, 1},
   379  	{Yml, Ynone, Yrl, Zm_r, 1},
   380  	{Yi0, Ynone, Yrl, Zclr, 1},
   381  	{Yi32, Ynone, Yrl, Zil_rp, 1},
   382  	{Yi32, Ynone, Yml, Zilo_m, 2},
   383  	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
   384  	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
   385  	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
   386  	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
   387  	{Yiauto, Ynone, Yrl, Zaut_r, 2},
   388  }
   389  
   390  var yret = []ytab{
   391  	{Ynone, Ynone, Ynone, Zo_iw, 1},
   392  	{Yi32, Ynone, Ynone, Zo_iw, 1},
   393  }
   394  
   395  var ymovq = []ytab{
   396  	// valid in 32-bit mode
   397  	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
   398  	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
   399  	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
   400  	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   401  	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   402  
   403  	// valid only in 64-bit mode, usually with 64-bit prefix
   404  	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
   405  	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
   406  	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
   407  	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
   408  	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
   409  	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
   410  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
   411  	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
   412  	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
   413  	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
   414  	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
   415  }
   416  
   417  var ym_rl = []ytab{
   418  	{Ym, Ynone, Yrl, Zm_r, 1},
   419  }
   420  
   421  var yrl_m = []ytab{
   422  	{Yrl, Ynone, Ym, Zr_m, 1},
   423  }
   424  
   425  var ymb_rl = []ytab{
   426  	{Ymb, Ynone, Yrl, Zmb_r, 1},
   427  }
   428  
   429  var yml_rl = []ytab{
   430  	{Yml, Ynone, Yrl, Zm_r, 1},
   431  }
   432  
   433  var yrl_ml = []ytab{
   434  	{Yrl, Ynone, Yml, Zr_m, 1},
   435  }
   436  
   437  var yml_mb = []ytab{
   438  	{Yrb, Ynone, Ymb, Zr_m, 1},
   439  	{Ymb, Ynone, Yrb, Zm_r, 1},
   440  }
   441  
   442  var yrb_mb = []ytab{
   443  	{Yrb, Ynone, Ymb, Zr_m, 1},
   444  }
   445  
   446  var yxchg = []ytab{
   447  	{Yax, Ynone, Yrl, Z_rp, 1},
   448  	{Yrl, Ynone, Yax, Zrp_, 1},
   449  	{Yrl, Ynone, Yml, Zr_m, 1},
   450  	{Yml, Ynone, Yrl, Zm_r, 1},
   451  }
   452  
   453  var ydivl = []ytab{
   454  	{Yml, Ynone, Ynone, Zm_o, 2},
   455  }
   456  
   457  var ydivb = []ytab{
   458  	{Ymb, Ynone, Ynone, Zm_o, 2},
   459  }
   460  
   461  var yimul = []ytab{
   462  	{Yml, Ynone, Ynone, Zm_o, 2},
   463  	{Yi8, Ynone, Yrl, Zib_rr, 1},
   464  	{Yi32, Ynone, Yrl, Zil_rr, 1},
   465  	{Yml, Ynone, Yrl, Zm_r, 2},
   466  }
   467  
   468  var yimul3 = []ytab{
   469  	{Yi8, Yml, Yrl, Zibm_r, 2},
   470  }
   471  
   472  var ybyte = []ytab{
   473  	{Yi64, Ynone, Ynone, Zbyte, 1},
   474  }
   475  
   476  var yin = []ytab{
   477  	{Yi32, Ynone, Ynone, Zib_, 1},
   478  	{Ynone, Ynone, Ynone, Zlit, 1},
   479  }
   480  
   481  var yint = []ytab{
   482  	{Yi32, Ynone, Ynone, Zib_, 1},
   483  }
   484  
   485  var ypushl = []ytab{
   486  	{Yrl, Ynone, Ynone, Zrp_, 1},
   487  	{Ym, Ynone, Ynone, Zm_o, 2},
   488  	{Yi8, Ynone, Ynone, Zib_, 1},
   489  	{Yi32, Ynone, Ynone, Zil_, 1},
   490  }
   491  
   492  var ypopl = []ytab{
   493  	{Ynone, Ynone, Yrl, Z_rp, 1},
   494  	{Ynone, Ynone, Ym, Zo_m, 2},
   495  }
   496  
   497  var ybswap = []ytab{
   498  	{Ynone, Ynone, Yrl, Z_rp, 2},
   499  }
   500  
   501  var yscond = []ytab{
   502  	{Ynone, Ynone, Ymb, Zo_m, 2},
   503  }
   504  
   505  var yjcond = []ytab{
   506  	{Ynone, Ynone, Ybr, Zbr, 0},
   507  	{Yi0, Ynone, Ybr, Zbr, 0},
   508  	{Yi1, Ynone, Ybr, Zbr, 1},
   509  }
   510  
   511  var yloop = []ytab{
   512  	{Ynone, Ynone, Ybr, Zloop, 1},
   513  }
   514  
   515  var ycall = []ytab{
   516  	{Ynone, Ynone, Yml, Zcallindreg, 0},
   517  	{Yrx, Ynone, Yrx, Zcallindreg, 2},
   518  	{Ynone, Ynone, Yindir, Zcallind, 2},
   519  	{Ynone, Ynone, Ybr, Zcall, 0},
   520  	{Ynone, Ynone, Yi32, Zcallcon, 1},
   521  }
   522  
   523  var yduff = []ytab{
   524  	{Ynone, Ynone, Yi32, Zcallduff, 1},
   525  }
   526  
   527  var yjmp = []ytab{
   528  	{Ynone, Ynone, Yml, Zo_m64, 2},
   529  	{Ynone, Ynone, Ybr, Zjmp, 0},
   530  	{Ynone, Ynone, Yi32, Zjmpcon, 1},
   531  }
   532  
   533  var yfmvd = []ytab{
   534  	{Ym, Ynone, Yf0, Zm_o, 2},
   535  	{Yf0, Ynone, Ym, Zo_m, 2},
   536  	{Yrf, Ynone, Yf0, Zm_o, 2},
   537  	{Yf0, Ynone, Yrf, Zo_m, 2},
   538  }
   539  
   540  var yfmvdp = []ytab{
   541  	{Yf0, Ynone, Ym, Zo_m, 2},
   542  	{Yf0, Ynone, Yrf, Zo_m, 2},
   543  }
   544  
   545  var yfmvf = []ytab{
   546  	{Ym, Ynone, Yf0, Zm_o, 2},
   547  	{Yf0, Ynone, Ym, Zo_m, 2},
   548  }
   549  
   550  var yfmvx = []ytab{
   551  	{Ym, Ynone, Yf0, Zm_o, 2},
   552  }
   553  
   554  var yfmvp = []ytab{
   555  	{Yf0, Ynone, Ym, Zo_m, 2},
   556  }
   557  
   558  var yfcmv = []ytab{
   559  	{Yrf, Ynone, Yf0, Zm_o, 2},
   560  }
   561  
   562  var yfadd = []ytab{
   563  	{Ym, Ynone, Yf0, Zm_o, 2},
   564  	{Yrf, Ynone, Yf0, Zm_o, 2},
   565  	{Yf0, Ynone, Yrf, Zo_m, 2},
   566  }
   567  
   568  var yfaddp = []ytab{
   569  	{Yf0, Ynone, Yrf, Zo_m, 2},
   570  }
   571  
   572  var yfxch = []ytab{
   573  	{Yf0, Ynone, Yrf, Zo_m, 2},
   574  	{Yrf, Ynone, Yf0, Zm_o, 2},
   575  }
   576  
   577  var ycompp = []ytab{
   578  	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
   579  }
   580  
   581  var ystsw = []ytab{
   582  	{Ynone, Ynone, Ym, Zo_m, 2},
   583  	{Ynone, Ynone, Yax, Zlit, 1},
   584  }
   585  
   586  var ystcw = []ytab{
   587  	{Ynone, Ynone, Ym, Zo_m, 2},
   588  	{Ym, Ynone, Ynone, Zm_o, 2},
   589  }
   590  
   591  var ysvrs = []ytab{
   592  	{Ynone, Ynone, Ym, Zo_m, 2},
   593  	{Ym, Ynone, Ynone, Zm_o, 2},
   594  }
   595  
   596  var ymm = []ytab{
   597  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   598  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   599  }
   600  
   601  var yxm = []ytab{
   602  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   603  }
   604  
   605  var yxcvm1 = []ytab{
   606  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   607  	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
   608  }
   609  
   610  var yxcvm2 = []ytab{
   611  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   612  	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
   613  }
   614  
   615  /*
   616  var yxmq = []ytab{
   617  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   618  }
   619  */
   620  
   621  var yxr = []ytab{
   622  	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
   623  }
   624  
   625  var yxr_ml = []ytab{
   626  	{Yxr, Ynone, Yml, Zr_m_xm, 1},
   627  }
   628  
   629  var yxr_ml_vex = []ytab{
   630  	{Yxr, Ynone, Yml, Zr_m_xm_vex, 1},
   631  }
   632  
   633  var ymr = []ytab{
   634  	{Ymr, Ynone, Ymr, Zm_r, 1},
   635  }
   636  
   637  var ymr_ml = []ytab{
   638  	{Ymr, Ynone, Yml, Zr_m_xm, 1},
   639  }
   640  
   641  var yxcmp = []ytab{
   642  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   643  }
   644  
   645  var yxcmpi = []ytab{
   646  	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
   647  }
   648  
   649  var yxmov_vex = []ytab{
   650  	{Yxm, Ynone, Yxr, Zm_r_xm_vex, 1},
   651  	{Yxr, Ynone, Yxm, Zr_m_xm_vex, 1},
   652  }
   653  
   654  var yxmov = []ytab{
   655  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   656  	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
   657  }
   658  
   659  var yxcvfl = []ytab{
   660  	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
   661  }
   662  
   663  var yxcvlf = []ytab{
   664  	{Yml, Ynone, Yxr, Zm_r_xm, 1},
   665  }
   666  
   667  var yxcvfq = []ytab{
   668  	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
   669  }
   670  
   671  var yxcvqf = []ytab{
   672  	{Yml, Ynone, Yxr, Zm_r_xm, 2},
   673  }
   674  
   675  var yps = []ytab{
   676  	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
   677  	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
   678  	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
   679  	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
   680  }
   681  
   682  var yxrrl = []ytab{
   683  	{Yxr, Ynone, Yrl, Zm_r, 1},
   684  }
   685  
   686  var ymfp = []ytab{
   687  	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
   688  }
   689  
   690  var ymrxr = []ytab{
   691  	{Ymr, Ynone, Yxr, Zm_r, 1},
   692  	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
   693  }
   694  
   695  var ymshuf = []ytab{
   696  	{Yi8, Ymm, Ymr, Zibm_r, 2},
   697  }
   698  
   699  var ymshufb = []ytab{
   700  	{Yxm, Ynone, Yxr, Zm2_r, 2},
   701  }
   702  
   703  var yxshuf = []ytab{
   704  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   705  }
   706  
   707  var yextrw = []ytab{
   708  	{Yu8, Yxr, Yrl, Zibm_r, 2},
   709  }
   710  
   711  var yinsrw = []ytab{
   712  	{Yu8, Yml, Yxr, Zibm_r, 2},
   713  }
   714  
   715  var yinsr = []ytab{
   716  	{Yu8, Ymm, Yxr, Zibm_r, 3},
   717  }
   718  
   719  var ypsdq = []ytab{
   720  	{Yi8, Ynone, Yxr, Zibo_m, 2},
   721  }
   722  
   723  var ymskb = []ytab{
   724  	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
   725  	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
   726  }
   727  
   728  var ycrc32l = []ytab{
   729  	{Yml, Ynone, Yrl, Zlitm_r, 0},
   730  }
   731  
   732  var yprefetch = []ytab{
   733  	{Ym, Ynone, Ynone, Zm_o, 2},
   734  }
   735  
   736  var yaes = []ytab{
   737  	{Yxm, Ynone, Yxr, Zlitm_r, 2},
   738  }
   739  
   740  var yaes2 = []ytab{
   741  	{Yu8, Yxm, Yxr, Zibm_r, 2},
   742  }
   743  
   744  /*
   745   * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
   746   * and p->from and p->to as operands (Addr*).  The linker scans optab to find
   747   * the entry with the given p->as and then looks through the ytable for that
   748   * instruction (the second field in the optab struct) for a line whose first
   749   * two values match the Ytypes of the p->from and p->to operands.  The function
   750   * oclass in span.c computes the specific Ytype of an operand and then the set
   751   * of more general Ytypes that it satisfies is implied by the ycover table, set
   752   * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   753   * from the more general 8-bit constants, but instinit says
   754   *
   755   *        ycover[Yi0*Ymax + Ys32] = 1;
   756   *        ycover[Yi1*Ymax + Ys32] = 1;
   757   *        ycover[Yi8*Ymax + Ys32] = 1;
   758   *
   759   * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   760   * if that's what an instruction can handle.
   761   *
   762   * In parallel with the scan through the ytable for the appropriate line, there
   763   * is a z pointer that starts out pointing at the strange magic byte list in
   764   * the Optab struct.  With each step past a non-matching ytable line, z
   765   * advances by the 4th entry in the line.  When a matching line is found, that
   766   * z pointer has the extra data to use in laying down the instruction bytes.
   767   * The actual bytes laid down are a function of the 3rd entry in the line (that
   768   * is, the Ztype) and the z bytes.
   769   *
   770   * For example, let's look at AADDL.  The optab line says:
   771   *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
   772   *
   773   * and yaddl says
   774   *        uchar   yaddl[] =
   775   *        {
   776   *                Yi8,    Yml,    Zibo_m, 2,
   777   *                Yi32,   Yax,    Zil_,   1,
   778   *                Yi32,   Yml,    Zilo_m, 2,
   779   *                Yrl,    Yml,    Zr_m,   1,
   780   *                Yml,    Yrl,    Zm_r,   1,
   781   *                0
   782   *        };
   783   *
   784   * so there are 5 possible types of ADDL instruction that can be laid down, and
   785   * possible states used to lay them down (Ztype and z pointer, assuming z
   786   * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
   787   *
   788   *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   789   *        Yi32, Yax -> Zil_, z+2 (0x05)
   790   *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   791   *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   792   *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   793   *
   794   * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   795   * relatively straightforward as this program goes.
   796   *
   797   * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
   798   * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   799   * encoded addressing mode for the Yml arg), and then a single immediate byte.
   800   * Zilo_m is the same but a long (32-bit) immediate.
   801   */
   802  var optab =
   803  /*	as, ytab, andproto, opcode */
   804  []Optab{
   805  	{obj.AXXX, nil, 0, [23]uint8{}},
   806  	{AAAA, ynone, P32, [23]uint8{0x37}},
   807  	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   808  	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   809  	{AAAS, ynone, P32, [23]uint8{0x3f}},
   810  	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
   811  	{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   812  	{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   813  	{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   814  	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   815  	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   816  	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   817  	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   818  	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   819  	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   820  	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   821  	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   822  	{AADJSP, nil, 0, [23]uint8{}},
   823  	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   824  	{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   825  	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   826  	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   827  	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   828  	{AANDPS, yxm, Pq, [23]uint8{0x54}},
   829  	{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   830  	{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   831  	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   832  	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   833  	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   834  	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   835  	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   836  	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   837  	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   838  	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   839  	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   840  	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   841  	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   842  	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   843  	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   844  	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   845  	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   846  	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   847  	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   848  	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   849  	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   850  	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   851  	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   852  	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   853  	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   854  	{ABYTE, ybyte, Px, [23]uint8{1}},
   855  	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   856  	{ACDQ, ynone, Px, [23]uint8{0x99}},
   857  	{ACLC, ynone, Px, [23]uint8{0xf8}},
   858  	{ACLD, ynone, Px, [23]uint8{0xfc}},
   859  	{ACLI, ynone, Px, [23]uint8{0xfa}},
   860  	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   861  	{ACMC, ynone, Px, [23]uint8{0xf5}},
   862  	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   863  	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   864  	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   865  	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   866  	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   867  	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   868  	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   869  	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   870  	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   871  	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   872  	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   873  	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   874  	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   875  	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   876  	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   877  	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   878  	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   879  	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   880  	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   881  	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   882  	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   883  	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   884  	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   885  	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   886  	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   887  	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   888  	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   889  	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   890  	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   891  	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   892  	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   893  	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   894  	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   895  	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   896  	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   897  	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   898  	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   899  	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   900  	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   901  	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   902  	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   903  	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   904  	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   905  	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   906  	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   907  	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   908  	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   909  	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   910  	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   911  	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   912  	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   913  	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   914  	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   915  	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   916  	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   917  	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   918  	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   919  	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   920  	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   921  	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   922  	{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
   923  	{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
   924  	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   925  	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   926  	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   927  	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   928  	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   929  	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   930  	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   931  	{API2FW, ymfp, Px, [23]uint8{0x0c}},
   932  	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   933  	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   934  	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   935  	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   936  	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   937  	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   938  	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   939  	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   940  	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   941  	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   942  	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   943  	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   944  	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   945  	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   946  	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   947  	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   948  	{ACWD, ynone, Pe, [23]uint8{0x99}},
   949  	{ACQO, ynone, Pw, [23]uint8{0x99}},
   950  	{ADAA, ynone, P32, [23]uint8{0x27}},
   951  	{ADAS, ynone, P32, [23]uint8{0x2f}},
   952  	{obj.ADATA, nil, 0, [23]uint8{}},
   953  	{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
   954  	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   955  	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   956  	{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
   957  	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   958  	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   959  	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   960  	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   961  	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   962  	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   963  	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   964  	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   965  	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   966  	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   967  	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   968  	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   969  	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   970  	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   971  	{obj.AGLOBL, nil, 0, [23]uint8{}},
   972  	{AHLT, ynone, Px, [23]uint8{0xf4}},
   973  	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   974  	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   975  	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   976  	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   977  	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   978  	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   979  	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   980  	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   981  	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   982  	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   983  	{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
   984  	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   985  	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   986  	{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
   987  	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   988  	{AINSB, ynone, Pb, [23]uint8{0x6c}},
   989  	{AINSL, ynone, Px, [23]uint8{0x6d}},
   990  	{AINSW, ynone, Pe, [23]uint8{0x6d}},
   991  	{AINT, yint, Px, [23]uint8{0xcd}},
   992  	{AINTO, ynone, P32, [23]uint8{0xce}},
   993  	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   994  	{AIRETL, ynone, Px, [23]uint8{0xcf}},
   995  	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   996  	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   997  	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   998  	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   999  	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
  1000  	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
  1001  	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
  1002  	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
  1003  	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
  1004  	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
  1005  	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
  1006  	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
  1007  	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
  1008  	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
  1009  	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
  1010  	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
  1011  	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
  1012  	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
  1013  	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
  1014  	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
  1015  	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
  1016  	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
  1017  	{ALAHF, ynone, Px, [23]uint8{0x9f}},
  1018  	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
  1019  	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
  1020  	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
  1021  	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
  1022  	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
  1023  	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
  1024  	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
  1025  	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
  1026  	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
  1027  	{ALOCK, ynone, Px, [23]uint8{0xf0}},
  1028  	{ALODSB, ynone, Pb, [23]uint8{0xac}},
  1029  	{ALODSL, ynone, Px, [23]uint8{0xad}},
  1030  	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
  1031  	{ALODSW, ynone, Pe, [23]uint8{0xad}},
  1032  	{ALONG, ybyte, Px, [23]uint8{4}},
  1033  	{ALOOP, yloop, Px, [23]uint8{0xe2}},
  1034  	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
  1035  	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
  1036  	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
  1037  	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
  1038  	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
  1039  	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
  1040  	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
  1041  	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
  1042  	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
  1043  	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
  1044  	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
  1045  	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
  1046  	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
  1047  	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
  1048  	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
  1049  	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
  1050  	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1051  	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
  1052  	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1053  	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
  1054  	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
  1055  	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
  1056  	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
  1057  	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
  1058  	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
  1059  	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
  1060  	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
  1061  	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
  1062  	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1063  	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
  1064  	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
  1065  	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
  1066  	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
  1067  	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
  1068  	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
  1069  	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
  1070  	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
  1071  	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
  1072  	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
  1073  	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
  1074  	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1075  	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
  1076  	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
  1077  	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
  1078  	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
  1079  	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
  1080  	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
  1081  	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
  1082  	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
  1083  	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
  1084  	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
  1085  	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
  1086  	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
  1087  	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
  1088  	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
  1089  	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
  1090  	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
  1091  	{AMULPD, yxm, Pe, [23]uint8{0x59}},
  1092  	{AMULPS, yxm, Ym, [23]uint8{0x59}},
  1093  	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
  1094  	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
  1095  	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
  1096  	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
  1097  	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
  1098  	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
  1099  	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
  1100  	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
  1101  	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
  1102  	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
  1103  	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1104  	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
  1105  	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
  1106  	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
  1107  	{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1108  	{AORPD, yxm, Pq, [23]uint8{0x56}},
  1109  	{AORPS, yxm, Pm, [23]uint8{0x56}},
  1110  	{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1111  	{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1112  	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
  1113  	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
  1114  	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
  1115  	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
  1116  	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
  1117  	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
  1118  	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
  1119  	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
  1120  	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
  1121  	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
  1122  	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
  1123  	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
  1124  	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
  1125  	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
  1126  	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
  1127  	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
  1128  	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
  1129  	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
  1130  	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
  1131  	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
  1132  	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
  1133  	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
  1134  	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
  1135  	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
  1136  	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
  1137  	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
  1138  	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
  1139  	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
  1140  	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
  1141  	{APF2IL, ymfp, Px, [23]uint8{0x1d}},
  1142  	{APF2IW, ymfp, Px, [23]uint8{0x1c}},
  1143  	{API2FL, ymfp, Px, [23]uint8{0x0d}},
  1144  	{APFACC, ymfp, Px, [23]uint8{0xae}},
  1145  	{APFADD, ymfp, Px, [23]uint8{0x9e}},
  1146  	{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
  1147  	{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
  1148  	{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
  1149  	{APFMAX, ymfp, Px, [23]uint8{0xa4}},
  1150  	{APFMIN, ymfp, Px, [23]uint8{0x94}},
  1151  	{APFMUL, ymfp, Px, [23]uint8{0xb4}},
  1152  	{APFNACC, ymfp, Px, [23]uint8{0x8a}},
  1153  	{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
  1154  	{APFRCP, ymfp, Px, [23]uint8{0x96}},
  1155  	{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
  1156  	{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
  1157  	{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
  1158  	{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
  1159  	{APFSUB, ymfp, Px, [23]uint8{0x9a}},
  1160  	{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
  1161  	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
  1162  	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
  1163  	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
  1164  	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
  1165  	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
  1166  	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
  1167  	{APMINSW, yxm, Pe, [23]uint8{0xea}},
  1168  	{APMINUB, yxm, Pe, [23]uint8{0xda}},
  1169  	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
  1170  	{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
  1171  	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
  1172  	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
  1173  	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
  1174  	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
  1175  	{APOPAL, ynone, P32, [23]uint8{0x61}},
  1176  	{APOPAW, ynone, Pe, [23]uint8{0x61}},
  1177  	{APOPFL, ynone, P32, [23]uint8{0x9d}},
  1178  	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
  1179  	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
  1180  	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
  1181  	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
  1182  	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
  1183  	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
  1184  	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
  1185  	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
  1186  	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
  1187  	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
  1188  	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
  1189  	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
  1190  	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
  1191  	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1192  	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1193  	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1194  	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1195  	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1196  	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
  1197  	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1198  	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1199  	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1200  	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
  1201  	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
  1202  	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
  1203  	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
  1204  	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
  1205  	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
  1206  	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
  1207  	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
  1208  	{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
  1209  	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
  1210  	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
  1211  	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
  1212  	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
  1213  	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
  1214  	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
  1215  	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
  1216  	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
  1217  	{APUSHAL, ynone, P32, [23]uint8{0x60}},
  1218  	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
  1219  	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
  1220  	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
  1221  	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
  1222  	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1223  	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1224  	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
  1225  	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
  1226  	{AQUAD, ybyte, Px, [23]uint8{8}},
  1227  	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1228  	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1229  	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1230  	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1231  	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
  1232  	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
  1233  	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1234  	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1235  	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1236  	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1237  	{AREP, ynone, Px, [23]uint8{0xf3}},
  1238  	{AREPN, ynone, Px, [23]uint8{0xf2}},
  1239  	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
  1240  	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
  1241  	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
  1242  	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
  1243  	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1244  	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1245  	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1246  	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1247  	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1248  	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1249  	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1250  	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1251  	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
  1252  	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
  1253  	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
  1254  	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1255  	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1256  	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1257  	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1258  	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1259  	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1260  	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1261  	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1262  	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
  1263  	{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1264  	{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1265  	{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1266  	{ASCASB, ynone, Pb, [23]uint8{0xae}},
  1267  	{ASCASL, ynone, Px, [23]uint8{0xaf}},
  1268  	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
  1269  	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
  1270  	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
  1271  	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
  1272  	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
  1273  	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
  1274  	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
  1275  	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
  1276  	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
  1277  	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
  1278  	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
  1279  	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
  1280  	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
  1281  	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
  1282  	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
  1283  	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
  1284  	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
  1285  	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
  1286  	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1287  	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1288  	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1289  	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1290  	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1291  	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1292  	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1293  	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1294  	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
  1295  	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
  1296  	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
  1297  	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
  1298  	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
  1299  	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
  1300  	{ASTC, ynone, Px, [23]uint8{0xf9}},
  1301  	{ASTD, ynone, Px, [23]uint8{0xfd}},
  1302  	{ASTI, ynone, Px, [23]uint8{0xfb}},
  1303  	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
  1304  	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
  1305  	{ASTOSL, ynone, Px, [23]uint8{0xab}},
  1306  	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
  1307  	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
  1308  	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
  1309  	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1310  	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
  1311  	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
  1312  	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1313  	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
  1314  	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
  1315  	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1316  	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
  1317  	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
  1318  	{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
  1319  	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1320  	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1321  	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
  1322  	{obj.ATEXT, ytext, Px, [23]uint8{}},
  1323  	{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
  1324  	{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
  1325  	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
  1326  	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
  1327  	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
  1328  	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
  1329  	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
  1330  	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
  1331  	{AWAIT, ynone, Px, [23]uint8{0x9b}},
  1332  	{AWORD, ybyte, Px, [23]uint8{2}},
  1333  	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
  1334  	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1335  	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1336  	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
  1337  	{AXLAT, ynone, Px, [23]uint8{0xd7}},
  1338  	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
  1339  	{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1340  	{AXORPD, yxm, Pe, [23]uint8{0x57}},
  1341  	{AXORPS, yxm, Pm, [23]uint8{0x57}},
  1342  	{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1343  	{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1344  	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
  1345  	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
  1346  	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1347  	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
  1348  	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
  1349  	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
  1350  	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
  1351  	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
  1352  	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
  1353  	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
  1354  	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
  1355  	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
  1356  	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
  1357  	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
  1358  	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
  1359  	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
  1360  	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
  1361  	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
  1362  	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
  1363  	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
  1364  	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
  1365  	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
  1366  	{AFCOMB, nil, 0, [23]uint8{}},
  1367  	{AFCOMBP, nil, 0, [23]uint8{}},
  1368  	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
  1369  	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
  1370  	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
  1371  	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
  1372  	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
  1373  	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
  1374  	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
  1375  	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
  1376  	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
  1377  	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
  1378  	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
  1379  	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
  1380  	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
  1381  	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
  1382  	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
  1383  	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
  1384  	{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
  1385  	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
  1386  	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
  1387  	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
  1388  	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1389  	{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
  1390  	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
  1391  	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
  1392  	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
  1393  	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1394  	{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
  1395  	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
  1396  	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
  1397  	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
  1398  	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1399  	{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
  1400  	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
  1401  	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
  1402  	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
  1403  	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1404  	{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
  1405  	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
  1406  	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
  1407  	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
  1408  	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1409  	{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
  1410  	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
  1411  	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
  1412  	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
  1413  	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1414  	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
  1415  	{AFFREE, nil, 0, [23]uint8{}},
  1416  	{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
  1417  	{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
  1418  	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
  1419  	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
  1420  	{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
  1421  	{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
  1422  	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
  1423  	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
  1424  	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
  1425  	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
  1426  	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
  1427  	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
  1428  	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
  1429  	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
  1430  	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
  1431  	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
  1432  	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
  1433  	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
  1434  	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
  1435  	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
  1436  	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
  1437  	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
  1438  	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
  1439  	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
  1440  	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
  1441  	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
  1442  	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
  1443  	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
  1444  	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
  1445  	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
  1446  	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
  1447  	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
  1448  	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
  1449  	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
  1450  	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
  1451  	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
  1452  	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
  1453  	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
  1454  	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
  1455  	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
  1456  	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
  1457  	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
  1458  	{AINVD, ynone, Pm, [23]uint8{0x08}},
  1459  	{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
  1460  	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
  1461  	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
  1462  	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
  1463  	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
  1464  	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
  1465  	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
  1466  	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
  1467  	{ARSM, ynone, Pm, [23]uint8{0xaa}},
  1468  	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
  1469  	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
  1470  	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
  1471  	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
  1472  	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
  1473  	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
  1474  	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
  1475  	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
  1476  	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1477  	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1478  	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
  1479  	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
  1480  	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
  1481  	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
  1482  	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
  1483  	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
  1484  	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
  1485  	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
  1486  	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
  1487  	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
  1488  	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
  1489  	{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
  1490  	{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
  1491  	{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
  1492  	{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
  1493  	{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
  1494  	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
  1495  	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
  1496  	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
  1497  	{AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}},
  1498  	{AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}},
  1499  	{AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}},
  1500  	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
  1501  	{obj.ATYPE, nil, 0, [23]uint8{}},
  1502  	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
  1503  	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
  1504  	{obj.ACHECKNIL, nil, 0, [23]uint8{}},
  1505  	{obj.AVARDEF, nil, 0, [23]uint8{}},
  1506  	{obj.AVARKILL, nil, 0, [23]uint8{}},
  1507  	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
  1508  	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
  1509  	{obj.AEND, nil, 0, [23]uint8{}},
  1510  	{0, nil, 0, [23]uint8{}},
  1511  }
  1512  
  1513  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1514  
  1515  // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
  1516  // This happens on systems like Solaris that call .so functions instead of system calls.
  1517  // It does not seem to be necessary for any other systems. This is probably working
  1518  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1519  // what that bug is. And this does fix it.
  1520  func isextern(s *obj.LSym) bool {
  1521  	// All the Solaris dynamic imports from libc.so begin with "libc_".
  1522  	return strings.HasPrefix(s.Name, "libc_")
  1523  }
  1524  
  1525  // single-instruction no-ops of various lengths.
  1526  // constructed by hand and disassembled with gdb to verify.
  1527  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1528  var nop = [][16]uint8{
  1529  	{0x90},
  1530  	{0x66, 0x90},
  1531  	{0x0F, 0x1F, 0x00},
  1532  	{0x0F, 0x1F, 0x40, 0x00},
  1533  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1534  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1535  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1536  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1537  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1538  }
  1539  
  1540  // Native Client rejects the repeated 0x66 prefix.
  1541  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1542  func fillnop(p []byte, n int) {
  1543  	var m int
  1544  
  1545  	for n > 0 {
  1546  		m = n
  1547  		if m > len(nop) {
  1548  			m = len(nop)
  1549  		}
  1550  		copy(p[:m], nop[m-1][:m])
  1551  		p = p[m:]
  1552  		n -= m
  1553  	}
  1554  }
  1555  
  1556  func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1557  	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
  1558  	fillnop(s.P[c:], int(pad))
  1559  	return c + pad
  1560  }
  1561  
  1562  func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
  1563  	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
  1564  		return l
  1565  	}
  1566  	return q
  1567  }
  1568  
  1569  func span6(ctxt *obj.Link, s *obj.LSym) {
  1570  	ctxt.Cursym = s
  1571  
  1572  	if s.P != nil {
  1573  		return
  1574  	}
  1575  
  1576  	if ycover[0] == 0 {
  1577  		instinit()
  1578  	}
  1579  
  1580  	var v int32
  1581  	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
  1582  		if p.To.Type == obj.TYPE_BRANCH {
  1583  			if p.Pcond == nil {
  1584  				p.Pcond = p
  1585  			}
  1586  		}
  1587  		if p.As == AADJSP {
  1588  			p.To.Type = obj.TYPE_REG
  1589  			p.To.Reg = REG_SP
  1590  			v = int32(-p.From.Offset)
  1591  			p.From.Offset = int64(v)
  1592  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1593  			if v < 0 {
  1594  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1595  				v = -v
  1596  				p.From.Offset = int64(v)
  1597  			}
  1598  
  1599  			if v == 0 {
  1600  				p.As = obj.ANOP
  1601  			}
  1602  		}
  1603  	}
  1604  
  1605  	var q *obj.Prog
  1606  	for p := s.Text; p != nil; p = p.Link {
  1607  		p.Back = 2 // use short branches first time through
  1608  		q = p.Pcond
  1609  		if q != nil && (q.Back&2 != 0) {
  1610  			p.Back |= 1 // backward jump
  1611  			q.Back |= 4 // loop head
  1612  		}
  1613  
  1614  		if p.As == AADJSP {
  1615  			p.To.Type = obj.TYPE_REG
  1616  			p.To.Reg = REG_SP
  1617  			v = int32(-p.From.Offset)
  1618  			p.From.Offset = int64(v)
  1619  			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
  1620  			if v < 0 {
  1621  				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
  1622  				v = -v
  1623  				p.From.Offset = int64(v)
  1624  			}
  1625  
  1626  			if v == 0 {
  1627  				p.As = obj.ANOP
  1628  			}
  1629  		}
  1630  	}
  1631  
  1632  	n := 0
  1633  	var bp []byte
  1634  	var c int32
  1635  	var i int
  1636  	var loop int32
  1637  	var m int
  1638  	var p *obj.Prog
  1639  	for {
  1640  		loop = 0
  1641  		for i = 0; i < len(s.R); i++ {
  1642  			s.R[i] = obj.Reloc{}
  1643  		}
  1644  		s.R = s.R[:0]
  1645  		s.P = s.P[:0]
  1646  		c = 0
  1647  		for p = s.Text; p != nil; p = p.Link {
  1648  			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
  1649  				var deferreturn *obj.LSym
  1650  
  1651  				if deferreturn == nil {
  1652  					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
  1653  				}
  1654  
  1655  				// pad everything to avoid crossing 32-byte boundary
  1656  				if c>>5 != (c+int32(p.Isize)-1)>>5 {
  1657  					c = naclpad(ctxt, s, c, -c&31)
  1658  				}
  1659  
  1660  				// pad call deferreturn to start at 32-byte boundary
  1661  				// so that subtracting 5 in jmpdefer will jump back
  1662  				// to that boundary and rerun the call.
  1663  				if p.As == obj.ACALL && p.To.Sym == deferreturn {
  1664  					c = naclpad(ctxt, s, c, -c&31)
  1665  				}
  1666  
  1667  				// pad call to end at 32-byte boundary
  1668  				if p.As == obj.ACALL {
  1669  					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
  1670  				}
  1671  
  1672  				// the linker treats REP and STOSQ as different instructions
  1673  				// but in fact the REP is a prefix on the STOSQ.
  1674  				// make sure REP has room for 2 more bytes, so that
  1675  				// padding will not be inserted before the next instruction.
  1676  				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
  1677  					c = naclpad(ctxt, s, c, -c&31)
  1678  				}
  1679  
  1680  				// same for LOCK.
  1681  				// various instructions follow; the longest is 4 bytes.
  1682  				// give ourselves 8 bytes so as to avoid surprises.
  1683  				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
  1684  					c = naclpad(ctxt, s, c, -c&31)
  1685  				}
  1686  			}
  1687  
  1688  			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
  1689  				// pad with NOPs
  1690  				v = -c & (LoopAlign - 1)
  1691  
  1692  				if v <= MaxLoopPad {
  1693  					obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1694  					fillnop(s.P[c:], int(v))
  1695  					c += v
  1696  				}
  1697  			}
  1698  
  1699  			p.Pc = int64(c)
  1700  
  1701  			// process forward jumps to p
  1702  			for q = p.Rel; q != nil; q = q.Forwd {
  1703  				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
  1704  				if q.Back&2 != 0 { // short
  1705  					if v > 127 {
  1706  						loop++
  1707  						q.Back ^= 2
  1708  					}
  1709  
  1710  					if q.As == AJCXZL {
  1711  						s.P[q.Pc+2] = byte(v)
  1712  					} else {
  1713  						s.P[q.Pc+1] = byte(v)
  1714  					}
  1715  				} else {
  1716  					bp = s.P[q.Pc+int64(q.Mark)-4:]
  1717  					bp[0] = byte(v)
  1718  					bp = bp[1:]
  1719  					bp[0] = byte(v >> 8)
  1720  					bp = bp[1:]
  1721  					bp[0] = byte(v >> 16)
  1722  					bp = bp[1:]
  1723  					bp[0] = byte(v >> 24)
  1724  				}
  1725  			}
  1726  
  1727  			p.Rel = nil
  1728  
  1729  			p.Pc = int64(c)
  1730  			asmins(ctxt, p)
  1731  			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
  1732  			if int(p.Isize) != m {
  1733  				p.Isize = uint8(m)
  1734  				loop++
  1735  			}
  1736  
  1737  			obj.Symgrow(ctxt, s, p.Pc+int64(m))
  1738  			copy(s.P[p.Pc:][:m], ctxt.And[:m])
  1739  			p.Mark = uint16(m)
  1740  			c += int32(m)
  1741  		}
  1742  
  1743  		n++
  1744  		if n > 20 {
  1745  			ctxt.Diag("span must be looping")
  1746  			log.Fatalf("loop")
  1747  		}
  1748  		if loop == 0 {
  1749  			break
  1750  		}
  1751  	}
  1752  
  1753  	if ctxt.Headtype == obj.Hnacl {
  1754  		c = naclpad(ctxt, s, c, -c&31)
  1755  	}
  1756  
  1757  	// Pad functions with trap instruction, to catch invalid jumps
  1758  	if c&(FuncAlign-1) != 0 {
  1759  		v = -c & (FuncAlign - 1)
  1760  		obj.Symgrow(ctxt, s, int64(c)+int64(v))
  1761  		for i := c; i < c+v; i++ {
  1762  			// 0xCC is INT $3 - breakpoint instruction
  1763  			s.P[i] = uint8(0xCC)
  1764  		}
  1765  		c += v
  1766  	}
  1767  	s.Size = int64(c)
  1768  
  1769  	if false { /* debug['a'] > 1 */
  1770  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  1771  		var i int
  1772  		for i = 0; i < len(s.P); i++ {
  1773  			fmt.Printf(" %.2x", s.P[i])
  1774  			if i%16 == 15 {
  1775  				fmt.Printf("\n  %.6x", uint(i+1))
  1776  			}
  1777  		}
  1778  
  1779  		if i%16 != 0 {
  1780  			fmt.Printf("\n")
  1781  		}
  1782  
  1783  		for i := 0; i < len(s.R); i++ {
  1784  			r := &s.R[i]
  1785  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  1786  		}
  1787  	}
  1788  }
  1789  
  1790  func instinit() {
  1791  	var c int
  1792  
  1793  	for i := 1; optab[i].as != 0; i++ {
  1794  		c = int(optab[i].as)
  1795  		if opindex[c&obj.AMask] != nil {
  1796  			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
  1797  		}
  1798  		opindex[c&obj.AMask] = &optab[i]
  1799  	}
  1800  
  1801  	for i := 0; i < Ymax; i++ {
  1802  		ycover[i*Ymax+i] = 1
  1803  	}
  1804  
  1805  	ycover[Yi0*Ymax+Yi8] = 1
  1806  	ycover[Yi1*Ymax+Yi8] = 1
  1807  	ycover[Yu7*Ymax+Yi8] = 1
  1808  
  1809  	ycover[Yi0*Ymax+Yu7] = 1
  1810  	ycover[Yi1*Ymax+Yu7] = 1
  1811  
  1812  	ycover[Yi0*Ymax+Yu8] = 1
  1813  	ycover[Yi1*Ymax+Yu8] = 1
  1814  	ycover[Yu7*Ymax+Yu8] = 1
  1815  
  1816  	ycover[Yi0*Ymax+Ys32] = 1
  1817  	ycover[Yi1*Ymax+Ys32] = 1
  1818  	ycover[Yu7*Ymax+Ys32] = 1
  1819  	ycover[Yu8*Ymax+Ys32] = 1
  1820  	ycover[Yi8*Ymax+Ys32] = 1
  1821  
  1822  	ycover[Yi0*Ymax+Yi32] = 1
  1823  	ycover[Yi1*Ymax+Yi32] = 1
  1824  	ycover[Yu7*Ymax+Yi32] = 1
  1825  	ycover[Yu8*Ymax+Yi32] = 1
  1826  	ycover[Yi8*Ymax+Yi32] = 1
  1827  	ycover[Ys32*Ymax+Yi32] = 1
  1828  
  1829  	ycover[Yi0*Ymax+Yi64] = 1
  1830  	ycover[Yi1*Ymax+Yi64] = 1
  1831  	ycover[Yu7*Ymax+Yi64] = 1
  1832  	ycover[Yu8*Ymax+Yi64] = 1
  1833  	ycover[Yi8*Ymax+Yi64] = 1
  1834  	ycover[Ys32*Ymax+Yi64] = 1
  1835  	ycover[Yi32*Ymax+Yi64] = 1
  1836  
  1837  	ycover[Yal*Ymax+Yrb] = 1
  1838  	ycover[Ycl*Ymax+Yrb] = 1
  1839  	ycover[Yax*Ymax+Yrb] = 1
  1840  	ycover[Ycx*Ymax+Yrb] = 1
  1841  	ycover[Yrx*Ymax+Yrb] = 1
  1842  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  1843  
  1844  	ycover[Ycl*Ymax+Ycx] = 1
  1845  
  1846  	ycover[Yax*Ymax+Yrx] = 1
  1847  	ycover[Ycx*Ymax+Yrx] = 1
  1848  
  1849  	ycover[Yax*Ymax+Yrl] = 1
  1850  	ycover[Ycx*Ymax+Yrl] = 1
  1851  	ycover[Yrx*Ymax+Yrl] = 1
  1852  	ycover[Yrl32*Ymax+Yrl] = 1
  1853  
  1854  	ycover[Yf0*Ymax+Yrf] = 1
  1855  
  1856  	ycover[Yal*Ymax+Ymb] = 1
  1857  	ycover[Ycl*Ymax+Ymb] = 1
  1858  	ycover[Yax*Ymax+Ymb] = 1
  1859  	ycover[Ycx*Ymax+Ymb] = 1
  1860  	ycover[Yrx*Ymax+Ymb] = 1
  1861  	ycover[Yrb*Ymax+Ymb] = 1
  1862  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  1863  	ycover[Ym*Ymax+Ymb] = 1
  1864  
  1865  	ycover[Yax*Ymax+Yml] = 1
  1866  	ycover[Ycx*Ymax+Yml] = 1
  1867  	ycover[Yrx*Ymax+Yml] = 1
  1868  	ycover[Yrl*Ymax+Yml] = 1
  1869  	ycover[Yrl32*Ymax+Yml] = 1
  1870  	ycover[Ym*Ymax+Yml] = 1
  1871  
  1872  	ycover[Yax*Ymax+Ymm] = 1
  1873  	ycover[Ycx*Ymax+Ymm] = 1
  1874  	ycover[Yrx*Ymax+Ymm] = 1
  1875  	ycover[Yrl*Ymax+Ymm] = 1
  1876  	ycover[Yrl32*Ymax+Ymm] = 1
  1877  	ycover[Ym*Ymax+Ymm] = 1
  1878  	ycover[Ymr*Ymax+Ymm] = 1
  1879  
  1880  	ycover[Ym*Ymax+Yxm] = 1
  1881  	ycover[Yxr*Ymax+Yxm] = 1
  1882  
  1883  	for i := 0; i < MAXREG; i++ {
  1884  		reg[i] = -1
  1885  		if i >= REG_AL && i <= REG_R15B {
  1886  			reg[i] = (i - REG_AL) & 7
  1887  			if i >= REG_SPB && i <= REG_DIB {
  1888  				regrex[i] = 0x40
  1889  			}
  1890  			if i >= REG_R8B && i <= REG_R15B {
  1891  				regrex[i] = Rxr | Rxx | Rxb
  1892  			}
  1893  		}
  1894  
  1895  		if i >= REG_AH && i <= REG_BH {
  1896  			reg[i] = 4 + ((i - REG_AH) & 7)
  1897  		}
  1898  		if i >= REG_AX && i <= REG_R15 {
  1899  			reg[i] = (i - REG_AX) & 7
  1900  			if i >= REG_R8 {
  1901  				regrex[i] = Rxr | Rxx | Rxb
  1902  			}
  1903  		}
  1904  
  1905  		if i >= REG_F0 && i <= REG_F0+7 {
  1906  			reg[i] = (i - REG_F0) & 7
  1907  		}
  1908  		if i >= REG_M0 && i <= REG_M0+7 {
  1909  			reg[i] = (i - REG_M0) & 7
  1910  		}
  1911  		if i >= REG_X0 && i <= REG_X0+15 {
  1912  			reg[i] = (i - REG_X0) & 7
  1913  			if i >= REG_X0+8 {
  1914  				regrex[i] = Rxr | Rxx | Rxb
  1915  			}
  1916  		}
  1917  
  1918  		if i >= REG_CR+8 && i <= REG_CR+15 {
  1919  			regrex[i] = Rxr
  1920  		}
  1921  	}
  1922  }
  1923  
  1924  var isAndroid = (obj.Getgoos() == "android")
  1925  
  1926  func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  1927  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  1928  		return 0
  1929  	}
  1930  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  1931  		switch a.Reg {
  1932  		case REG_CS:
  1933  			return 0x2e
  1934  
  1935  		case REG_DS:
  1936  			return 0x3e
  1937  
  1938  		case REG_ES:
  1939  			return 0x26
  1940  
  1941  		case REG_FS:
  1942  			return 0x64
  1943  
  1944  		case REG_GS:
  1945  			return 0x65
  1946  
  1947  		case REG_TLS:
  1948  			// NOTE: Systems listed here should be only systems that
  1949  			// support direct TLS references like 8(TLS) implemented as
  1950  			// direct references from FS or GS. Systems that require
  1951  			// the initial-exec model, where you load the TLS base into
  1952  			// a register and then index from that register, do not reach
  1953  			// this code and should not be listed.
  1954  			if p.Mode == 32 {
  1955  				switch ctxt.Headtype {
  1956  				default:
  1957  					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1958  
  1959  				case obj.Hdarwin,
  1960  					obj.Hdragonfly,
  1961  					obj.Hfreebsd,
  1962  					obj.Hnetbsd,
  1963  					obj.Hopenbsd:
  1964  					return 0x65 // GS
  1965  				}
  1966  			}
  1967  
  1968  			switch ctxt.Headtype {
  1969  			default:
  1970  				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
  1971  
  1972  			case obj.Hlinux:
  1973  				if isAndroid {
  1974  					return 0x64 // FS
  1975  				}
  1976  
  1977  				if ctxt.Flag_shared != 0 {
  1978  					log.Fatalf("unknown TLS base register for linux with -shared")
  1979  				} else {
  1980  					return 0x64 // FS
  1981  				}
  1982  
  1983  			case obj.Hdragonfly,
  1984  				obj.Hfreebsd,
  1985  				obj.Hnetbsd,
  1986  				obj.Hopenbsd,
  1987  				obj.Hsolaris:
  1988  				return 0x64 // FS
  1989  
  1990  			case obj.Hdarwin:
  1991  				return 0x65 // GS
  1992  			}
  1993  		}
  1994  	}
  1995  
  1996  	if p.Mode == 32 {
  1997  		return 0
  1998  	}
  1999  
  2000  	switch a.Index {
  2001  	case REG_CS:
  2002  		return 0x2e
  2003  
  2004  	case REG_DS:
  2005  		return 0x3e
  2006  
  2007  	case REG_ES:
  2008  		return 0x26
  2009  
  2010  	case REG_TLS:
  2011  		if ctxt.Flag_shared != 0 {
  2012  			// When building for inclusion into a shared library, an instruction of the form
  2013  			//     MOV 0(CX)(TLS*1), AX
  2014  			// becomes
  2015  			//     mov %fs:(%rcx), %rax
  2016  			// which assumes that the correct TLS offset has been loaded into %rcx (today
  2017  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2018  			// a shared library the instruction does not require a prefix.
  2019  			if a.Offset != 0 {
  2020  				log.Fatalf("cannot handle non-0 offsets to TLS")
  2021  			}
  2022  			return 0x64
  2023  		}
  2024  
  2025  	case REG_FS:
  2026  		return 0x64
  2027  
  2028  	case REG_GS:
  2029  		return 0x65
  2030  	}
  2031  
  2032  	return 0
  2033  }
  2034  
  2035  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2036  	switch a.Type {
  2037  	case obj.TYPE_NONE:
  2038  		return Ynone
  2039  
  2040  	case obj.TYPE_BRANCH:
  2041  		return Ybr
  2042  
  2043  	case obj.TYPE_INDIR:
  2044  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2045  			return Yindir
  2046  		}
  2047  		return Yxxx
  2048  
  2049  	case obj.TYPE_MEM:
  2050  		return Ym
  2051  
  2052  	case obj.TYPE_ADDR:
  2053  		switch a.Name {
  2054  		case obj.NAME_EXTERN,
  2055  			obj.NAME_GOTREF,
  2056  			obj.NAME_STATIC:
  2057  			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
  2058  				return Yi32
  2059  			}
  2060  			return Yiauto // use pc-relative addressing
  2061  
  2062  		case obj.NAME_AUTO,
  2063  			obj.NAME_PARAM:
  2064  			return Yiauto
  2065  		}
  2066  
  2067  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2068  		// and got Yi32 in an earlier version of this code.
  2069  		// Keep doing that until we fix yduff etc.
  2070  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2071  			return Yi32
  2072  		}
  2073  
  2074  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2075  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2076  		}
  2077  		fallthrough
  2078  
  2079  		// fall through
  2080  
  2081  	case obj.TYPE_CONST:
  2082  		if a.Sym != nil {
  2083  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2084  		}
  2085  
  2086  		v := a.Offset
  2087  		if p.Mode == 32 {
  2088  			v = int64(int32(v))
  2089  		}
  2090  		if v == 0 {
  2091  			return Yi0
  2092  		}
  2093  		if v == 1 {
  2094  			return Yi1
  2095  		}
  2096  		if v >= 0 && v <= 127 {
  2097  			return Yu7
  2098  		}
  2099  		if v >= 0 && v <= 255 {
  2100  			return Yu8
  2101  		}
  2102  		if v >= -128 && v <= 127 {
  2103  			return Yi8
  2104  		}
  2105  		if p.Mode == 32 {
  2106  			return Yi32
  2107  		}
  2108  		l := int32(v)
  2109  		if int64(l) == v {
  2110  			return Ys32 /* can sign extend */
  2111  		}
  2112  		if v>>32 == 0 {
  2113  			return Yi32 /* unsigned */
  2114  		}
  2115  		return Yi64
  2116  
  2117  	case obj.TYPE_TEXTSIZE:
  2118  		return Ytextsize
  2119  	}
  2120  
  2121  	if a.Type != obj.TYPE_REG {
  2122  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2123  		return Yxxx
  2124  	}
  2125  
  2126  	switch a.Reg {
  2127  	case REG_AL:
  2128  		return Yal
  2129  
  2130  	case REG_AX:
  2131  		return Yax
  2132  
  2133  		/*
  2134  			case REG_SPB:
  2135  		*/
  2136  	case REG_BPB,
  2137  		REG_SIB,
  2138  		REG_DIB,
  2139  		REG_R8B,
  2140  		REG_R9B,
  2141  		REG_R10B,
  2142  		REG_R11B,
  2143  		REG_R12B,
  2144  		REG_R13B,
  2145  		REG_R14B,
  2146  		REG_R15B:
  2147  		if ctxt.Asmode != 64 {
  2148  			return Yxxx
  2149  		}
  2150  		fallthrough
  2151  
  2152  	case REG_DL,
  2153  		REG_BL,
  2154  		REG_AH,
  2155  		REG_CH,
  2156  		REG_DH,
  2157  		REG_BH:
  2158  		return Yrb
  2159  
  2160  	case REG_CL:
  2161  		return Ycl
  2162  
  2163  	case REG_CX:
  2164  		return Ycx
  2165  
  2166  	case REG_DX, REG_BX:
  2167  		return Yrx
  2168  
  2169  	case REG_R8, /* not really Yrl */
  2170  		REG_R9,
  2171  		REG_R10,
  2172  		REG_R11,
  2173  		REG_R12,
  2174  		REG_R13,
  2175  		REG_R14,
  2176  		REG_R15:
  2177  		if ctxt.Asmode != 64 {
  2178  			return Yxxx
  2179  		}
  2180  		fallthrough
  2181  
  2182  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2183  		if p.Mode == 32 {
  2184  			return Yrl32
  2185  		}
  2186  		return Yrl
  2187  
  2188  	case REG_F0 + 0:
  2189  		return Yf0
  2190  
  2191  	case REG_F0 + 1,
  2192  		REG_F0 + 2,
  2193  		REG_F0 + 3,
  2194  		REG_F0 + 4,
  2195  		REG_F0 + 5,
  2196  		REG_F0 + 6,
  2197  		REG_F0 + 7:
  2198  		return Yrf
  2199  
  2200  	case REG_M0 + 0,
  2201  		REG_M0 + 1,
  2202  		REG_M0 + 2,
  2203  		REG_M0 + 3,
  2204  		REG_M0 + 4,
  2205  		REG_M0 + 5,
  2206  		REG_M0 + 6,
  2207  		REG_M0 + 7:
  2208  		return Ymr
  2209  
  2210  	case REG_X0 + 0,
  2211  		REG_X0 + 1,
  2212  		REG_X0 + 2,
  2213  		REG_X0 + 3,
  2214  		REG_X0 + 4,
  2215  		REG_X0 + 5,
  2216  		REG_X0 + 6,
  2217  		REG_X0 + 7,
  2218  		REG_X0 + 8,
  2219  		REG_X0 + 9,
  2220  		REG_X0 + 10,
  2221  		REG_X0 + 11,
  2222  		REG_X0 + 12,
  2223  		REG_X0 + 13,
  2224  		REG_X0 + 14,
  2225  		REG_X0 + 15:
  2226  		return Yxr
  2227  
  2228  	case REG_CS:
  2229  		return Ycs
  2230  	case REG_SS:
  2231  		return Yss
  2232  	case REG_DS:
  2233  		return Yds
  2234  	case REG_ES:
  2235  		return Yes
  2236  	case REG_FS:
  2237  		return Yfs
  2238  	case REG_GS:
  2239  		return Ygs
  2240  	case REG_TLS:
  2241  		return Ytls
  2242  
  2243  	case REG_GDTR:
  2244  		return Ygdtr
  2245  	case REG_IDTR:
  2246  		return Yidtr
  2247  	case REG_LDTR:
  2248  		return Yldtr
  2249  	case REG_MSW:
  2250  		return Ymsw
  2251  	case REG_TASK:
  2252  		return Ytask
  2253  
  2254  	case REG_CR + 0:
  2255  		return Ycr0
  2256  	case REG_CR + 1:
  2257  		return Ycr1
  2258  	case REG_CR + 2:
  2259  		return Ycr2
  2260  	case REG_CR + 3:
  2261  		return Ycr3
  2262  	case REG_CR + 4:
  2263  		return Ycr4
  2264  	case REG_CR + 5:
  2265  		return Ycr5
  2266  	case REG_CR + 6:
  2267  		return Ycr6
  2268  	case REG_CR + 7:
  2269  		return Ycr7
  2270  	case REG_CR + 8:
  2271  		return Ycr8
  2272  
  2273  	case REG_DR + 0:
  2274  		return Ydr0
  2275  	case REG_DR + 1:
  2276  		return Ydr1
  2277  	case REG_DR + 2:
  2278  		return Ydr2
  2279  	case REG_DR + 3:
  2280  		return Ydr3
  2281  	case REG_DR + 4:
  2282  		return Ydr4
  2283  	case REG_DR + 5:
  2284  		return Ydr5
  2285  	case REG_DR + 6:
  2286  		return Ydr6
  2287  	case REG_DR + 7:
  2288  		return Ydr7
  2289  
  2290  	case REG_TR + 0:
  2291  		return Ytr0
  2292  	case REG_TR + 1:
  2293  		return Ytr1
  2294  	case REG_TR + 2:
  2295  		return Ytr2
  2296  	case REG_TR + 3:
  2297  		return Ytr3
  2298  	case REG_TR + 4:
  2299  		return Ytr4
  2300  	case REG_TR + 5:
  2301  		return Ytr5
  2302  	case REG_TR + 6:
  2303  		return Ytr6
  2304  	case REG_TR + 7:
  2305  		return Ytr7
  2306  	}
  2307  
  2308  	return Yxxx
  2309  }
  2310  
  2311  func asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2312  	var i int
  2313  
  2314  	switch index {
  2315  	default:
  2316  		goto bad
  2317  
  2318  	case REG_NONE:
  2319  		i = 4 << 3
  2320  		goto bas
  2321  
  2322  	case REG_R8,
  2323  		REG_R9,
  2324  		REG_R10,
  2325  		REG_R11,
  2326  		REG_R12,
  2327  		REG_R13,
  2328  		REG_R14,
  2329  		REG_R15:
  2330  		if ctxt.Asmode != 64 {
  2331  			goto bad
  2332  		}
  2333  		fallthrough
  2334  
  2335  	case REG_AX,
  2336  		REG_CX,
  2337  		REG_DX,
  2338  		REG_BX,
  2339  		REG_BP,
  2340  		REG_SI,
  2341  		REG_DI:
  2342  		i = reg[index] << 3
  2343  	}
  2344  
  2345  	switch scale {
  2346  	default:
  2347  		goto bad
  2348  
  2349  	case 1:
  2350  		break
  2351  
  2352  	case 2:
  2353  		i |= 1 << 6
  2354  
  2355  	case 4:
  2356  		i |= 2 << 6
  2357  
  2358  	case 8:
  2359  		i |= 3 << 6
  2360  	}
  2361  
  2362  bas:
  2363  	switch base {
  2364  	default:
  2365  		goto bad
  2366  
  2367  	case REG_NONE: /* must be mod=00 */
  2368  		i |= 5
  2369  
  2370  	case REG_R8,
  2371  		REG_R9,
  2372  		REG_R10,
  2373  		REG_R11,
  2374  		REG_R12,
  2375  		REG_R13,
  2376  		REG_R14,
  2377  		REG_R15:
  2378  		if ctxt.Asmode != 64 {
  2379  			goto bad
  2380  		}
  2381  		fallthrough
  2382  
  2383  	case REG_AX,
  2384  		REG_CX,
  2385  		REG_DX,
  2386  		REG_BX,
  2387  		REG_SP,
  2388  		REG_BP,
  2389  		REG_SI,
  2390  		REG_DI:
  2391  		i |= reg[base]
  2392  	}
  2393  
  2394  	ctxt.Andptr[0] = byte(i)
  2395  	ctxt.Andptr = ctxt.Andptr[1:]
  2396  	return
  2397  
  2398  bad:
  2399  	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  2400  	ctxt.Andptr[0] = 0
  2401  	ctxt.Andptr = ctxt.Andptr[1:]
  2402  	return
  2403  }
  2404  
  2405  func put4(ctxt *obj.Link, v int32) {
  2406  	ctxt.Andptr[0] = byte(v)
  2407  	ctxt.Andptr[1] = byte(v >> 8)
  2408  	ctxt.Andptr[2] = byte(v >> 16)
  2409  	ctxt.Andptr[3] = byte(v >> 24)
  2410  	ctxt.Andptr = ctxt.Andptr[4:]
  2411  }
  2412  
  2413  func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
  2414  	var rel obj.Reloc
  2415  
  2416  	v := vaddr(ctxt, p, a, &rel)
  2417  	if rel.Siz != 0 {
  2418  		if rel.Siz != 4 {
  2419  			ctxt.Diag("bad reloc")
  2420  		}
  2421  		r := obj.Addrel(ctxt.Cursym)
  2422  		*r = rel
  2423  		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2424  	}
  2425  
  2426  	put4(ctxt, int32(v))
  2427  }
  2428  
  2429  func put8(ctxt *obj.Link, v int64) {
  2430  	ctxt.Andptr[0] = byte(v)
  2431  	ctxt.Andptr[1] = byte(v >> 8)
  2432  	ctxt.Andptr[2] = byte(v >> 16)
  2433  	ctxt.Andptr[3] = byte(v >> 24)
  2434  	ctxt.Andptr[4] = byte(v >> 32)
  2435  	ctxt.Andptr[5] = byte(v >> 40)
  2436  	ctxt.Andptr[6] = byte(v >> 48)
  2437  	ctxt.Andptr[7] = byte(v >> 56)
  2438  	ctxt.Andptr = ctxt.Andptr[8:]
  2439  }
  2440  
  2441  /*
  2442  static void
  2443  relput8(Prog *p, Addr *a)
  2444  {
  2445  	vlong v;
  2446  	Reloc rel, *r;
  2447  
  2448  	v = vaddr(ctxt, p, a, &rel);
  2449  	if(rel.siz != 0) {
  2450  		r = addrel(ctxt->cursym);
  2451  		*r = rel;
  2452  		r->siz = 8;
  2453  		r->off = p->pc + ctxt->andptr - ctxt->and;
  2454  	}
  2455  	put8(ctxt, v);
  2456  }
  2457  */
  2458  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  2459  	if r != nil {
  2460  		*r = obj.Reloc{}
  2461  	}
  2462  
  2463  	switch a.Name {
  2464  	case obj.NAME_STATIC,
  2465  		obj.NAME_GOTREF,
  2466  		obj.NAME_EXTERN:
  2467  		s := a.Sym
  2468  		if r == nil {
  2469  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2470  			log.Fatalf("reloc")
  2471  		}
  2472  
  2473  		if a.Name == obj.NAME_GOTREF {
  2474  			r.Siz = 4
  2475  			r.Type = obj.R_GOTPCREL
  2476  		} else if isextern(s) || p.Mode != 64 {
  2477  			r.Siz = 4
  2478  			r.Type = obj.R_ADDR
  2479  		} else {
  2480  			r.Siz = 4
  2481  			r.Type = obj.R_PCREL
  2482  		}
  2483  
  2484  		r.Off = -1 // caller must fill in
  2485  		r.Sym = s
  2486  		r.Add = a.Offset
  2487  
  2488  		return 0
  2489  	}
  2490  
  2491  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  2492  		if r == nil {
  2493  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  2494  			log.Fatalf("reloc")
  2495  		}
  2496  
  2497  		r.Type = obj.R_TLS_LE
  2498  		r.Siz = 4
  2499  		r.Off = -1 // caller must fill in
  2500  		r.Add = a.Offset
  2501  		return 0
  2502  	}
  2503  
  2504  	return a.Offset
  2505  }
  2506  
  2507  func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  2508  	var base int
  2509  	var rel obj.Reloc
  2510  
  2511  	rex &= 0x40 | Rxr
  2512  	v := int32(a.Offset)
  2513  	rel.Siz = 0
  2514  
  2515  	switch a.Type {
  2516  	case obj.TYPE_ADDR:
  2517  		if a.Name == obj.NAME_NONE {
  2518  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  2519  		}
  2520  		if a.Index == REG_TLS {
  2521  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  2522  		}
  2523  		goto bad
  2524  
  2525  	case obj.TYPE_REG:
  2526  		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
  2527  			goto bad
  2528  		}
  2529  		if v != 0 {
  2530  			goto bad
  2531  		}
  2532  		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
  2533  		ctxt.Andptr = ctxt.Andptr[1:]
  2534  		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  2535  		return
  2536  	}
  2537  
  2538  	if a.Type != obj.TYPE_MEM {
  2539  		goto bad
  2540  	}
  2541  
  2542  	if a.Index != REG_NONE && a.Index != REG_TLS {
  2543  		base := int(a.Reg)
  2544  		switch a.Name {
  2545  		case obj.NAME_EXTERN,
  2546  			obj.NAME_GOTREF,
  2547  			obj.NAME_STATIC:
  2548  			if !isextern(a.Sym) && p.Mode == 64 {
  2549  				goto bad
  2550  			}
  2551  			base = REG_NONE
  2552  			v = int32(vaddr(ctxt, p, a, &rel))
  2553  
  2554  		case obj.NAME_AUTO,
  2555  			obj.NAME_PARAM:
  2556  			base = REG_SP
  2557  		}
  2558  
  2559  		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  2560  		if base == REG_NONE {
  2561  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2562  			ctxt.Andptr = ctxt.Andptr[1:]
  2563  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2564  			goto putrelv
  2565  		}
  2566  
  2567  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2568  			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2569  			ctxt.Andptr = ctxt.Andptr[1:]
  2570  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2571  			return
  2572  		}
  2573  
  2574  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2575  			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
  2576  			ctxt.Andptr = ctxt.Andptr[1:]
  2577  			asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2578  			ctxt.Andptr[0] = byte(v)
  2579  			ctxt.Andptr = ctxt.Andptr[1:]
  2580  			return
  2581  		}
  2582  
  2583  		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
  2584  		ctxt.Andptr = ctxt.Andptr[1:]
  2585  		asmidx(ctxt, int(a.Scale), int(a.Index), base)
  2586  		goto putrelv
  2587  	}
  2588  
  2589  	base = int(a.Reg)
  2590  	switch a.Name {
  2591  	case obj.NAME_STATIC,
  2592  		obj.NAME_GOTREF,
  2593  		obj.NAME_EXTERN:
  2594  		if a.Sym == nil {
  2595  			ctxt.Diag("bad addr: %v", p)
  2596  		}
  2597  		base = REG_NONE
  2598  		v = int32(vaddr(ctxt, p, a, &rel))
  2599  
  2600  	case obj.NAME_AUTO,
  2601  		obj.NAME_PARAM:
  2602  		base = REG_SP
  2603  	}
  2604  
  2605  	if base == REG_TLS {
  2606  		v = int32(vaddr(ctxt, p, a, &rel))
  2607  	}
  2608  
  2609  	ctxt.Rexflag |= regrex[base]&Rxb | rex
  2610  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  2611  		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
  2612  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  2613  				ctxt.Diag("%v has offset against gotref", p)
  2614  			}
  2615  			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
  2616  			ctxt.Andptr = ctxt.Andptr[1:]
  2617  			goto putrelv
  2618  		}
  2619  
  2620  		/* temporary */
  2621  		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
  2622  		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
  2623  		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
  2624  		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
  2625  		goto putrelv
  2626  	}
  2627  
  2628  	if base == REG_SP || base == REG_R12 {
  2629  		if v == 0 {
  2630  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2631  			ctxt.Andptr = ctxt.Andptr[1:]
  2632  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2633  			return
  2634  		}
  2635  
  2636  		if v >= -128 && v < 128 {
  2637  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2638  			ctxt.Andptr = ctxt.Andptr[1:]
  2639  			asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2640  			ctxt.Andptr[0] = byte(v)
  2641  			ctxt.Andptr = ctxt.Andptr[1:]
  2642  			return
  2643  		}
  2644  
  2645  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2646  		ctxt.Andptr = ctxt.Andptr[1:]
  2647  		asmidx(ctxt, int(a.Scale), REG_NONE, base)
  2648  		goto putrelv
  2649  	}
  2650  
  2651  	if REG_AX <= base && base <= REG_R15 {
  2652  		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
  2653  			rel = obj.Reloc{}
  2654  			rel.Type = obj.R_TLS_LE
  2655  			rel.Siz = 4
  2656  			rel.Sym = nil
  2657  			rel.Add = int64(v)
  2658  			v = 0
  2659  		}
  2660  
  2661  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  2662  			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
  2663  			ctxt.Andptr = ctxt.Andptr[1:]
  2664  			return
  2665  		}
  2666  
  2667  		if v >= -128 && v < 128 && rel.Siz == 0 {
  2668  			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
  2669  			ctxt.Andptr[1] = byte(v)
  2670  			ctxt.Andptr = ctxt.Andptr[2:]
  2671  			return
  2672  		}
  2673  
  2674  		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
  2675  		ctxt.Andptr = ctxt.Andptr[1:]
  2676  		goto putrelv
  2677  	}
  2678  
  2679  	goto bad
  2680  
  2681  putrelv:
  2682  	if rel.Siz != 0 {
  2683  		if rel.Siz != 4 {
  2684  			ctxt.Diag("bad rel")
  2685  			goto bad
  2686  		}
  2687  
  2688  		r := obj.Addrel(ctxt.Cursym)
  2689  		*r = rel
  2690  		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  2691  	}
  2692  
  2693  	put4(ctxt, v)
  2694  	return
  2695  
  2696  bad:
  2697  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  2698  	return
  2699  }
  2700  
  2701  func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  2702  	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  2703  }
  2704  
  2705  func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
  2706  	asmandsz(ctxt, p, a, o, 0, 0)
  2707  }
  2708  
  2709  func bytereg(a *obj.Addr, t *uint8) {
  2710  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  2711  		a.Reg += REG_AL - REG_AX
  2712  		*t = 0
  2713  	}
  2714  }
  2715  
  2716  func unbytereg(a *obj.Addr, t *uint8) {
  2717  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  2718  		a.Reg += REG_AX - REG_AL
  2719  		*t = 0
  2720  	}
  2721  }
  2722  
  2723  const (
  2724  	E = 0xff
  2725  )
  2726  
  2727  var ymovtab = []Movtab{
  2728  	/* push */
  2729  	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
  2730  	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
  2731  	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
  2732  	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
  2733  	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2734  	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2735  	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
  2736  	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
  2737  	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
  2738  	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
  2739  	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
  2740  	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
  2741  	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
  2742  	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
  2743  
  2744  	/* pop */
  2745  	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
  2746  	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
  2747  	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
  2748  	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2749  	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2750  	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
  2751  	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
  2752  	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
  2753  	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
  2754  	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
  2755  	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
  2756  	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
  2757  
  2758  	/* mov seg */
  2759  	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
  2760  	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
  2761  	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
  2762  	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
  2763  	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
  2764  	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
  2765  	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
  2766  	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
  2767  	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
  2768  	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
  2769  	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
  2770  	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
  2771  
  2772  	/* mov cr */
  2773  	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2774  	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2775  	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2776  	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2777  	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2778  	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
  2779  	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
  2780  	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
  2781  	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
  2782  	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
  2783  	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2784  	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2785  	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2786  	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2787  	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2788  	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
  2789  	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
  2790  	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
  2791  	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
  2792  	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
  2793  
  2794  	/* mov dr */
  2795  	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2796  	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2797  	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2798  	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
  2799  	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
  2800  	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
  2801  	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2802  	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2803  	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2804  	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
  2805  	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
  2806  	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
  2807  
  2808  	/* mov tr */
  2809  	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
  2810  	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
  2811  	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
  2812  	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
  2813  
  2814  	/* lgdt, sgdt, lidt, sidt */
  2815  	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2816  	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2817  	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2818  	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2819  	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
  2820  	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
  2821  	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
  2822  	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
  2823  
  2824  	/* lldt, sldt */
  2825  	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
  2826  	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
  2827  
  2828  	/* lmsw, smsw */
  2829  	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
  2830  	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
  2831  
  2832  	/* ltr, str */
  2833  	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
  2834  	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
  2835  
  2836  	/* load full pointer - unsupported
  2837  	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
  2838  	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
  2839  	*/
  2840  
  2841  	/* double shift */
  2842  	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2843  	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2844  	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
  2845  	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2846  	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2847  	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
  2848  	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2849  	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2850  	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  2851  	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2852  	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2853  	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
  2854  	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2855  	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2856  	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  2857  	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2858  	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2859  	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
  2860  
  2861  	/* load TLS base */
  2862  	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2863  	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
  2864  	{0, 0, 0, 0, 0, [4]uint8{}},
  2865  }
  2866  
  2867  func isax(a *obj.Addr) bool {
  2868  	switch a.Reg {
  2869  	case REG_AX, REG_AL, REG_AH:
  2870  		return true
  2871  	}
  2872  
  2873  	if a.Index == REG_AX {
  2874  		return true
  2875  	}
  2876  	return false
  2877  }
  2878  
  2879  func subreg(p *obj.Prog, from int, to int) {
  2880  	if false { /* debug['Q'] */
  2881  		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
  2882  	}
  2883  
  2884  	if int(p.From.Reg) == from {
  2885  		p.From.Reg = int16(to)
  2886  		p.Ft = 0
  2887  	}
  2888  
  2889  	if int(p.To.Reg) == from {
  2890  		p.To.Reg = int16(to)
  2891  		p.Tt = 0
  2892  	}
  2893  
  2894  	if int(p.From.Index) == from {
  2895  		p.From.Index = int16(to)
  2896  		p.Ft = 0
  2897  	}
  2898  
  2899  	if int(p.To.Index) == from {
  2900  		p.To.Index = int16(to)
  2901  		p.Tt = 0
  2902  	}
  2903  
  2904  	if false { /* debug['Q'] */
  2905  		fmt.Printf("%v\n", p)
  2906  	}
  2907  }
  2908  
  2909  func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  2910  	switch op {
  2911  	case Pm, Pe, Pf2, Pf3:
  2912  		if osize != 1 {
  2913  			if op != Pm {
  2914  				ctxt.Andptr[0] = byte(op)
  2915  				ctxt.Andptr = ctxt.Andptr[1:]
  2916  			}
  2917  			ctxt.Andptr[0] = Pm
  2918  			ctxt.Andptr = ctxt.Andptr[1:]
  2919  			z++
  2920  			op = int(o.op[z])
  2921  			break
  2922  		}
  2923  		fallthrough
  2924  
  2925  	default:
  2926  		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
  2927  			ctxt.Andptr[0] = Pm
  2928  			ctxt.Andptr = ctxt.Andptr[1:]
  2929  		}
  2930  	}
  2931  
  2932  	ctxt.Andptr[0] = byte(op)
  2933  	ctxt.Andptr = ctxt.Andptr[1:]
  2934  	return z
  2935  }
  2936  
  2937  var bpduff1 = []byte{
  2938  	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  2939  	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  2940  }
  2941  
  2942  var bpduff2 = []byte{
  2943  	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  2944  }
  2945  
  2946  func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, pref uint8) {
  2947  	rexR := regrex[to.Reg]
  2948  	rexB := regrex[from.Reg]
  2949  	rexX := regrex[from.Index]
  2950  	var prefBit uint8
  2951  	if pref == Pvex1 {
  2952  		prefBit = 1
  2953  	} else if pref == Pvex2 {
  2954  		prefBit = 2
  2955  	} // TODO add Pvex0,Pvex3
  2956  
  2957  	if rexX == 0 && rexB == 0 { // 2-byte vex prefix
  2958  		ctxt.Andptr[0] = 0xc5
  2959  		ctxt.Andptr = ctxt.Andptr[1:]
  2960  
  2961  		if rexR != 0 {
  2962  			ctxt.Andptr[0] = 0x7c
  2963  		} else {
  2964  			ctxt.Andptr[0] = 0xfc
  2965  		}
  2966  		ctxt.Andptr[0] |= prefBit
  2967  		ctxt.Andptr = ctxt.Andptr[1:]
  2968  	} else {
  2969  		ctxt.Andptr[0] = 0xc4
  2970  		ctxt.Andptr = ctxt.Andptr[1:]
  2971  
  2972  		ctxt.Andptr[0] = 0x1 // TODO handle different prefix
  2973  		if rexR == 0 {
  2974  			ctxt.Andptr[0] |= 0x80
  2975  		}
  2976  		if rexX == 0 {
  2977  			ctxt.Andptr[0] |= 0x40
  2978  		}
  2979  		if rexB == 0 {
  2980  			ctxt.Andptr[0] |= 0x20
  2981  		}
  2982  		ctxt.Andptr = ctxt.Andptr[1:]
  2983  
  2984  		ctxt.Andptr[0] = 0x7c
  2985  		ctxt.Andptr[0] |= prefBit
  2986  		ctxt.Andptr = ctxt.Andptr[1:]
  2987  	}
  2988  }
  2989  
  2990  func doasm(ctxt *obj.Link, p *obj.Prog) {
  2991  	ctxt.Curp = p // TODO
  2992  
  2993  	o := opindex[p.As&obj.AMask]
  2994  
  2995  	if o == nil {
  2996  		ctxt.Diag("asmins: missing op %v", p)
  2997  		return
  2998  	}
  2999  
  3000  	pre := prefixof(ctxt, p, &p.From)
  3001  	if pre != 0 {
  3002  		ctxt.Andptr[0] = byte(pre)
  3003  		ctxt.Andptr = ctxt.Andptr[1:]
  3004  	}
  3005  	pre = prefixof(ctxt, p, &p.To)
  3006  	if pre != 0 {
  3007  		ctxt.Andptr[0] = byte(pre)
  3008  		ctxt.Andptr = ctxt.Andptr[1:]
  3009  	}
  3010  
  3011  	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
  3012  	// which encodes as SHRQ $32(DX*0), AX.
  3013  	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
  3014  	// Change encoding generated by assemblers and compilers and remove.
  3015  	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
  3016  		p.From3 = new(obj.Addr)
  3017  		p.From3.Type = obj.TYPE_REG
  3018  		p.From3.Reg = p.From.Index
  3019  		p.From.Index = 0
  3020  	}
  3021  
  3022  	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
  3023  	// Change encoding generated by assemblers and compilers (if any) and remove.
  3024  	switch p.As {
  3025  	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
  3026  		if p.From3Type() == obj.TYPE_NONE {
  3027  			p.From3 = new(obj.Addr)
  3028  			*p.From3 = p.From
  3029  			p.From = obj.Addr{}
  3030  			p.From.Type = obj.TYPE_CONST
  3031  			p.From.Offset = p.To.Offset
  3032  			p.To.Offset = 0
  3033  		}
  3034  	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
  3035  		if p.From3Type() == obj.TYPE_NONE {
  3036  			p.From3 = new(obj.Addr)
  3037  			*p.From3 = p.To
  3038  			p.To = obj.Addr{}
  3039  			p.To.Type = obj.TYPE_CONST
  3040  			p.To.Offset = p.From3.Offset
  3041  			p.From3.Offset = 0
  3042  		}
  3043  	}
  3044  
  3045  	if p.Ft == 0 {
  3046  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  3047  	}
  3048  	if p.Tt == 0 {
  3049  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  3050  	}
  3051  
  3052  	ft := int(p.Ft) * Ymax
  3053  	f3t := Ynone * Ymax
  3054  	if p.From3 != nil {
  3055  		f3t = oclass(ctxt, p, p.From3) * Ymax
  3056  	}
  3057  	tt := int(p.Tt) * Ymax
  3058  
  3059  	xo := obj.Bool2int(o.op[0] == 0x0f)
  3060  	z := 0
  3061  	var a *obj.Addr
  3062  	var l int
  3063  	var op int
  3064  	var q *obj.Prog
  3065  	var r *obj.Reloc
  3066  	var rel obj.Reloc
  3067  	var v int64
  3068  	for i := range o.ytab {
  3069  		yt := &o.ytab[i]
  3070  		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
  3071  			switch o.prefix {
  3072  			case Px1: /* first option valid only in 32-bit mode */
  3073  				if ctxt.Mode == 64 && z == 0 {
  3074  					z += int(yt.zoffset) + xo
  3075  					continue
  3076  				}
  3077  			case Pq: /* 16 bit escape and opcode escape */
  3078  				ctxt.Andptr[0] = Pe
  3079  				ctxt.Andptr = ctxt.Andptr[1:]
  3080  
  3081  				ctxt.Andptr[0] = Pm
  3082  				ctxt.Andptr = ctxt.Andptr[1:]
  3083  
  3084  			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
  3085  				ctxt.Andptr[0] = Pe
  3086  				ctxt.Andptr = ctxt.Andptr[1:]
  3087  
  3088  				ctxt.Andptr[0] = Pw
  3089  				ctxt.Andptr = ctxt.Andptr[1:]
  3090  				ctxt.Andptr[0] = Pm
  3091  				ctxt.Andptr = ctxt.Andptr[1:]
  3092  
  3093  			case Pf2, /* xmm opcode escape */
  3094  				Pf3:
  3095  				ctxt.Andptr[0] = byte(o.prefix)
  3096  				ctxt.Andptr = ctxt.Andptr[1:]
  3097  
  3098  				ctxt.Andptr[0] = Pm
  3099  				ctxt.Andptr = ctxt.Andptr[1:]
  3100  
  3101  			case Pm: /* opcode escape */
  3102  				ctxt.Andptr[0] = Pm
  3103  				ctxt.Andptr = ctxt.Andptr[1:]
  3104  
  3105  			case Pe: /* 16 bit escape */
  3106  				ctxt.Andptr[0] = Pe
  3107  				ctxt.Andptr = ctxt.Andptr[1:]
  3108  
  3109  			case Pw: /* 64-bit escape */
  3110  				if p.Mode != 64 {
  3111  					ctxt.Diag("asmins: illegal 64: %v", p)
  3112  				}
  3113  				ctxt.Rexflag |= Pw
  3114  
  3115  			case Pw8: /* 64-bit escape if z >= 8 */
  3116  				if z >= 8 {
  3117  					if p.Mode != 64 {
  3118  						ctxt.Diag("asmins: illegal 64: %v", p)
  3119  					}
  3120  					ctxt.Rexflag |= Pw
  3121  				}
  3122  
  3123  			case Pb: /* botch */
  3124  				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3125  					goto bad
  3126  				}
  3127  				// NOTE(rsc): This is probably safe to do always,
  3128  				// but when enabled it chooses different encodings
  3129  				// than the old cmd/internal/obj/i386 code did,
  3130  				// which breaks our "same bits out" checks.
  3131  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  3132  				// in the original obj/i386, and it would encode
  3133  				// (using a valid, shorter form) as 3c 00 if we enabled
  3134  				// the call to bytereg here.
  3135  				if p.Mode == 64 {
  3136  					bytereg(&p.From, &p.Ft)
  3137  					bytereg(&p.To, &p.Tt)
  3138  				}
  3139  
  3140  			case P32: /* 32 bit but illegal if 64-bit mode */
  3141  				if p.Mode == 64 {
  3142  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3143  				}
  3144  
  3145  			case Py: /* 64-bit only, no prefix */
  3146  				if p.Mode != 64 {
  3147  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3148  				}
  3149  
  3150  			case Py1: /* 64-bit only if z < 1, no prefix */
  3151  				if z < 1 && p.Mode != 64 {
  3152  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3153  				}
  3154  
  3155  			case Py3: /* 64-bit only if z < 3, no prefix */
  3156  				if z < 3 && p.Mode != 64 {
  3157  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
  3158  				}
  3159  			}
  3160  
  3161  			if z >= len(o.op) {
  3162  				log.Fatalf("asmins bad table %v", p)
  3163  			}
  3164  			op = int(o.op[z])
  3165  			if op == 0x0f {
  3166  				ctxt.Andptr[0] = byte(op)
  3167  				ctxt.Andptr = ctxt.Andptr[1:]
  3168  				z++
  3169  				op = int(o.op[z])
  3170  			}
  3171  
  3172  			switch yt.zcase {
  3173  			default:
  3174  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3175  				return
  3176  
  3177  			case Zpseudo:
  3178  				break
  3179  
  3180  			case Zlit:
  3181  				for ; ; z++ {
  3182  					op = int(o.op[z])
  3183  					if op == 0 {
  3184  						break
  3185  					}
  3186  					ctxt.Andptr[0] = byte(op)
  3187  					ctxt.Andptr = ctxt.Andptr[1:]
  3188  				}
  3189  
  3190  			case Zlitm_r:
  3191  				for ; ; z++ {
  3192  					op = int(o.op[z])
  3193  					if op == 0 {
  3194  						break
  3195  					}
  3196  					ctxt.Andptr[0] = byte(op)
  3197  					ctxt.Andptr = ctxt.Andptr[1:]
  3198  				}
  3199  				asmand(ctxt, p, &p.From, &p.To)
  3200  
  3201  			case Zmb_r:
  3202  				bytereg(&p.From, &p.Ft)
  3203  				fallthrough
  3204  
  3205  				/* fall through */
  3206  			case Zm_r:
  3207  				ctxt.Andptr[0] = byte(op)
  3208  				ctxt.Andptr = ctxt.Andptr[1:]
  3209  
  3210  				asmand(ctxt, p, &p.From, &p.To)
  3211  
  3212  			case Zm2_r:
  3213  				ctxt.Andptr[0] = byte(op)
  3214  				ctxt.Andptr = ctxt.Andptr[1:]
  3215  				ctxt.Andptr[0] = byte(o.op[z+1])
  3216  				ctxt.Andptr = ctxt.Andptr[1:]
  3217  				asmand(ctxt, p, &p.From, &p.To)
  3218  
  3219  			case Zm_r_xm:
  3220  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3221  				asmand(ctxt, p, &p.From, &p.To)
  3222  
  3223  			case Zm_r_xm_vex:
  3224  				ctxt.Vexflag = 1
  3225  				vexprefix(ctxt, &p.To, &p.From, o.prefix)
  3226  				ctxt.Andptr[0] = byte(op)
  3227  				ctxt.Andptr = ctxt.Andptr[1:]
  3228  				asmand(ctxt, p, &p.From, &p.To)
  3229  
  3230  			case Zm_r_xm_nr:
  3231  				ctxt.Rexflag = 0
  3232  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3233  				asmand(ctxt, p, &p.From, &p.To)
  3234  
  3235  			case Zm_r_i_xm:
  3236  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3237  				asmand(ctxt, p, &p.From, p.From3)
  3238  				ctxt.Andptr[0] = byte(p.To.Offset)
  3239  				ctxt.Andptr = ctxt.Andptr[1:]
  3240  
  3241  			case Zm_r_3d:
  3242  				ctxt.Andptr[0] = 0x0f
  3243  				ctxt.Andptr = ctxt.Andptr[1:]
  3244  				ctxt.Andptr[0] = 0x0f
  3245  				ctxt.Andptr = ctxt.Andptr[1:]
  3246  				asmand(ctxt, p, &p.From, &p.To)
  3247  				ctxt.Andptr[0] = byte(op)
  3248  				ctxt.Andptr = ctxt.Andptr[1:]
  3249  
  3250  			case Zibm_r:
  3251  				for {
  3252  					tmp1 := z
  3253  					z++
  3254  					op = int(o.op[tmp1])
  3255  					if op == 0 {
  3256  						break
  3257  					}
  3258  					ctxt.Andptr[0] = byte(op)
  3259  					ctxt.Andptr = ctxt.Andptr[1:]
  3260  				}
  3261  				asmand(ctxt, p, p.From3, &p.To)
  3262  				ctxt.Andptr[0] = byte(p.From.Offset)
  3263  				ctxt.Andptr = ctxt.Andptr[1:]
  3264  
  3265  			case Zaut_r:
  3266  				ctxt.Andptr[0] = 0x8d
  3267  				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
  3268  				if p.From.Type != obj.TYPE_ADDR {
  3269  					ctxt.Diag("asmins: Zaut sb type ADDR")
  3270  				}
  3271  				p.From.Type = obj.TYPE_MEM
  3272  				asmand(ctxt, p, &p.From, &p.To)
  3273  				p.From.Type = obj.TYPE_ADDR
  3274  
  3275  			case Zm_o:
  3276  				ctxt.Andptr[0] = byte(op)
  3277  				ctxt.Andptr = ctxt.Andptr[1:]
  3278  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3279  
  3280  			case Zr_m:
  3281  				ctxt.Andptr[0] = byte(op)
  3282  				ctxt.Andptr = ctxt.Andptr[1:]
  3283  				asmand(ctxt, p, &p.To, &p.From)
  3284  
  3285  			case Zr_m_xm_vex:
  3286  				ctxt.Vexflag = 1
  3287  				vexprefix(ctxt, &p.From, &p.To, o.prefix)
  3288  				ctxt.Andptr[0] = byte(op)
  3289  				ctxt.Andptr = ctxt.Andptr[1:]
  3290  				asmand(ctxt, p, &p.To, &p.From)
  3291  
  3292  			case Zr_m_xm:
  3293  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3294  				asmand(ctxt, p, &p.To, &p.From)
  3295  
  3296  			case Zr_m_xm_nr:
  3297  				ctxt.Rexflag = 0
  3298  				mediaop(ctxt, o, op, int(yt.zoffset), z)
  3299  				asmand(ctxt, p, &p.To, &p.From)
  3300  
  3301  			case Zo_m:
  3302  				ctxt.Andptr[0] = byte(op)
  3303  				ctxt.Andptr = ctxt.Andptr[1:]
  3304  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3305  
  3306  			case Zcallindreg:
  3307  				r = obj.Addrel(ctxt.Cursym)
  3308  				r.Off = int32(p.Pc)
  3309  				r.Type = obj.R_CALLIND
  3310  				r.Siz = 0
  3311  				fallthrough
  3312  
  3313  			case Zo_m64:
  3314  				ctxt.Andptr[0] = byte(op)
  3315  				ctxt.Andptr = ctxt.Andptr[1:]
  3316  				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
  3317  
  3318  			case Zm_ibo:
  3319  				ctxt.Andptr[0] = byte(op)
  3320  				ctxt.Andptr = ctxt.Andptr[1:]
  3321  				asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3322  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
  3323  				ctxt.Andptr = ctxt.Andptr[1:]
  3324  
  3325  			case Zibo_m:
  3326  				ctxt.Andptr[0] = byte(op)
  3327  				ctxt.Andptr = ctxt.Andptr[1:]
  3328  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3329  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3330  				ctxt.Andptr = ctxt.Andptr[1:]
  3331  
  3332  			case Zibo_m_xm:
  3333  				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
  3334  				asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3335  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3336  				ctxt.Andptr = ctxt.Andptr[1:]
  3337  
  3338  			case Z_ib, Zib_:
  3339  				if yt.zcase == Zib_ {
  3340  					a = &p.From
  3341  				} else {
  3342  					a = &p.To
  3343  				}
  3344  				ctxt.Andptr[0] = byte(op)
  3345  				ctxt.Andptr = ctxt.Andptr[1:]
  3346  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
  3347  				ctxt.Andptr = ctxt.Andptr[1:]
  3348  
  3349  			case Zib_rp:
  3350  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3351  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3352  				ctxt.Andptr = ctxt.Andptr[1:]
  3353  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3354  				ctxt.Andptr = ctxt.Andptr[1:]
  3355  
  3356  			case Zil_rp:
  3357  				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3358  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3359  				ctxt.Andptr = ctxt.Andptr[1:]
  3360  				if o.prefix == Pe {
  3361  					v = vaddr(ctxt, p, &p.From, nil)
  3362  					ctxt.Andptr[0] = byte(v)
  3363  					ctxt.Andptr = ctxt.Andptr[1:]
  3364  					ctxt.Andptr[0] = byte(v >> 8)
  3365  					ctxt.Andptr = ctxt.Andptr[1:]
  3366  				} else {
  3367  					relput4(ctxt, p, &p.From)
  3368  				}
  3369  
  3370  			case Zo_iw:
  3371  				ctxt.Andptr[0] = byte(op)
  3372  				ctxt.Andptr = ctxt.Andptr[1:]
  3373  				if p.From.Type != obj.TYPE_NONE {
  3374  					v = vaddr(ctxt, p, &p.From, nil)
  3375  					ctxt.Andptr[0] = byte(v)
  3376  					ctxt.Andptr = ctxt.Andptr[1:]
  3377  					ctxt.Andptr[0] = byte(v >> 8)
  3378  					ctxt.Andptr = ctxt.Andptr[1:]
  3379  				}
  3380  
  3381  			case Ziq_rp:
  3382  				v = vaddr(ctxt, p, &p.From, &rel)
  3383  				l = int(v >> 32)
  3384  				if l == 0 && rel.Siz != 8 {
  3385  					//p->mark |= 0100;
  3386  					//print("zero: %llux %v\n", v, p);
  3387  					ctxt.Rexflag &^= (0x40 | Rxw)
  3388  
  3389  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3390  					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
  3391  					ctxt.Andptr = ctxt.Andptr[1:]
  3392  					if rel.Type != 0 {
  3393  						r = obj.Addrel(ctxt.Cursym)
  3394  						*r = rel
  3395  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3396  					}
  3397  
  3398  					put4(ctxt, int32(v))
  3399  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
  3400  
  3401  					//p->mark |= 0100;
  3402  					//print("sign: %llux %v\n", v, p);
  3403  					ctxt.Andptr[0] = 0xc7
  3404  					ctxt.Andptr = ctxt.Andptr[1:]
  3405  
  3406  					asmando(ctxt, p, &p.To, 0)
  3407  					put4(ctxt, int32(v)) /* need all 8 */
  3408  				} else {
  3409  					//print("all: %llux %v\n", v, p);
  3410  					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
  3411  
  3412  					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3413  					ctxt.Andptr = ctxt.Andptr[1:]
  3414  					if rel.Type != 0 {
  3415  						r = obj.Addrel(ctxt.Cursym)
  3416  						*r = rel
  3417  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3418  					}
  3419  
  3420  					put8(ctxt, v)
  3421  				}
  3422  
  3423  			case Zib_rr:
  3424  				ctxt.Andptr[0] = byte(op)
  3425  				ctxt.Andptr = ctxt.Andptr[1:]
  3426  				asmand(ctxt, p, &p.To, &p.To)
  3427  				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
  3428  				ctxt.Andptr = ctxt.Andptr[1:]
  3429  
  3430  			case Z_il, Zil_:
  3431  				if yt.zcase == Zil_ {
  3432  					a = &p.From
  3433  				} else {
  3434  					a = &p.To
  3435  				}
  3436  				ctxt.Andptr[0] = byte(op)
  3437  				ctxt.Andptr = ctxt.Andptr[1:]
  3438  				if o.prefix == Pe {
  3439  					v = vaddr(ctxt, p, a, nil)
  3440  					ctxt.Andptr[0] = byte(v)
  3441  					ctxt.Andptr = ctxt.Andptr[1:]
  3442  					ctxt.Andptr[0] = byte(v >> 8)
  3443  					ctxt.Andptr = ctxt.Andptr[1:]
  3444  				} else {
  3445  					relput4(ctxt, p, a)
  3446  				}
  3447  
  3448  			case Zm_ilo, Zilo_m:
  3449  				ctxt.Andptr[0] = byte(op)
  3450  				ctxt.Andptr = ctxt.Andptr[1:]
  3451  				if yt.zcase == Zilo_m {
  3452  					a = &p.From
  3453  					asmando(ctxt, p, &p.To, int(o.op[z+1]))
  3454  				} else {
  3455  					a = &p.To
  3456  					asmando(ctxt, p, &p.From, int(o.op[z+1]))
  3457  				}
  3458  
  3459  				if o.prefix == Pe {
  3460  					v = vaddr(ctxt, p, a, nil)
  3461  					ctxt.Andptr[0] = byte(v)
  3462  					ctxt.Andptr = ctxt.Andptr[1:]
  3463  					ctxt.Andptr[0] = byte(v >> 8)
  3464  					ctxt.Andptr = ctxt.Andptr[1:]
  3465  				} else {
  3466  					relput4(ctxt, p, a)
  3467  				}
  3468  
  3469  			case Zil_rr:
  3470  				ctxt.Andptr[0] = byte(op)
  3471  				ctxt.Andptr = ctxt.Andptr[1:]
  3472  				asmand(ctxt, p, &p.To, &p.To)
  3473  				if o.prefix == Pe {
  3474  					v = vaddr(ctxt, p, &p.From, nil)
  3475  					ctxt.Andptr[0] = byte(v)
  3476  					ctxt.Andptr = ctxt.Andptr[1:]
  3477  					ctxt.Andptr[0] = byte(v >> 8)
  3478  					ctxt.Andptr = ctxt.Andptr[1:]
  3479  				} else {
  3480  					relput4(ctxt, p, &p.From)
  3481  				}
  3482  
  3483  			case Z_rp:
  3484  				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  3485  				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
  3486  				ctxt.Andptr = ctxt.Andptr[1:]
  3487  
  3488  			case Zrp_:
  3489  				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  3490  				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
  3491  				ctxt.Andptr = ctxt.Andptr[1:]
  3492  
  3493  			case Zclr:
  3494  				ctxt.Rexflag &^= Pw
  3495  				ctxt.Andptr[0] = byte(op)
  3496  				ctxt.Andptr = ctxt.Andptr[1:]
  3497  				asmand(ctxt, p, &p.To, &p.To)
  3498  
  3499  			case Zcallcon, Zjmpcon:
  3500  				if yt.zcase == Zcallcon {
  3501  					ctxt.Andptr[0] = byte(op)
  3502  					ctxt.Andptr = ctxt.Andptr[1:]
  3503  				} else {
  3504  					ctxt.Andptr[0] = byte(o.op[z+1])
  3505  					ctxt.Andptr = ctxt.Andptr[1:]
  3506  				}
  3507  				r = obj.Addrel(ctxt.Cursym)
  3508  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3509  				r.Type = obj.R_PCREL
  3510  				r.Siz = 4
  3511  				r.Add = p.To.Offset
  3512  				put4(ctxt, 0)
  3513  
  3514  			case Zcallind:
  3515  				ctxt.Andptr[0] = byte(op)
  3516  				ctxt.Andptr = ctxt.Andptr[1:]
  3517  				ctxt.Andptr[0] = byte(o.op[z+1])
  3518  				ctxt.Andptr = ctxt.Andptr[1:]
  3519  				r = obj.Addrel(ctxt.Cursym)
  3520  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3521  				r.Type = obj.R_ADDR
  3522  				r.Siz = 4
  3523  				r.Add = p.To.Offset
  3524  				r.Sym = p.To.Sym
  3525  				put4(ctxt, 0)
  3526  
  3527  			case Zcall, Zcallduff:
  3528  				if p.To.Sym == nil {
  3529  					ctxt.Diag("call without target")
  3530  					log.Fatalf("bad code")
  3531  				}
  3532  
  3533  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  3534  					ctxt.Diag("directly calling duff when dynamically linking Go")
  3535  				}
  3536  
  3537  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3538  					// Maintain BP around call, since duffcopy/duffzero can't do it
  3539  					// (the call jumps into the middle of the function).
  3540  					// This makes it possible to see call sites for duffcopy/duffzero in
  3541  					// BP-based profiling tools like Linux perf (which is the
  3542  					// whole point of obj.Framepointer_enabled).
  3543  					// MOVQ BP, -16(SP)
  3544  					// LEAQ -16(SP), BP
  3545  					copy(ctxt.Andptr, bpduff1)
  3546  					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
  3547  				}
  3548  				ctxt.Andptr[0] = byte(op)
  3549  				ctxt.Andptr = ctxt.Andptr[1:]
  3550  				r = obj.Addrel(ctxt.Cursym)
  3551  				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3552  				r.Sym = p.To.Sym
  3553  				r.Add = p.To.Offset
  3554  				r.Type = obj.R_CALL
  3555  				r.Siz = 4
  3556  				put4(ctxt, 0)
  3557  
  3558  				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
  3559  					// Pop BP pushed above.
  3560  					// MOVQ 0(BP), BP
  3561  					copy(ctxt.Andptr, bpduff2)
  3562  					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
  3563  				}
  3564  
  3565  			// TODO: jump across functions needs reloc
  3566  			case Zbr, Zjmp, Zloop:
  3567  				if p.To.Sym != nil {
  3568  					if yt.zcase != Zjmp {
  3569  						ctxt.Diag("branch to ATEXT")
  3570  						log.Fatalf("bad code")
  3571  					}
  3572  
  3573  					ctxt.Andptr[0] = byte(o.op[z+1])
  3574  					ctxt.Andptr = ctxt.Andptr[1:]
  3575  					r = obj.Addrel(ctxt.Cursym)
  3576  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3577  					r.Sym = p.To.Sym
  3578  					r.Type = obj.R_PCREL
  3579  					r.Siz = 4
  3580  					put4(ctxt, 0)
  3581  					break
  3582  				}
  3583  
  3584  				// Assumes q is in this function.
  3585  				// TODO: Check in input, preserve in brchain.
  3586  
  3587  				// Fill in backward jump now.
  3588  				q = p.Pcond
  3589  
  3590  				if q == nil {
  3591  					ctxt.Diag("jmp/branch/loop without target")
  3592  					log.Fatalf("bad code")
  3593  				}
  3594  
  3595  				if p.Back&1 != 0 {
  3596  					v = q.Pc - (p.Pc + 2)
  3597  					if v >= -128 {
  3598  						if p.As == AJCXZL {
  3599  							ctxt.Andptr[0] = 0x67
  3600  							ctxt.Andptr = ctxt.Andptr[1:]
  3601  						}
  3602  						ctxt.Andptr[0] = byte(op)
  3603  						ctxt.Andptr = ctxt.Andptr[1:]
  3604  						ctxt.Andptr[0] = byte(v)
  3605  						ctxt.Andptr = ctxt.Andptr[1:]
  3606  					} else if yt.zcase == Zloop {
  3607  						ctxt.Diag("loop too far: %v", p)
  3608  					} else {
  3609  						v -= 5 - 2
  3610  						if yt.zcase == Zbr {
  3611  							ctxt.Andptr[0] = 0x0f
  3612  							ctxt.Andptr = ctxt.Andptr[1:]
  3613  							v--
  3614  						}
  3615  
  3616  						ctxt.Andptr[0] = byte(o.op[z+1])
  3617  						ctxt.Andptr = ctxt.Andptr[1:]
  3618  						ctxt.Andptr[0] = byte(v)
  3619  						ctxt.Andptr = ctxt.Andptr[1:]
  3620  						ctxt.Andptr[0] = byte(v >> 8)
  3621  						ctxt.Andptr = ctxt.Andptr[1:]
  3622  						ctxt.Andptr[0] = byte(v >> 16)
  3623  						ctxt.Andptr = ctxt.Andptr[1:]
  3624  						ctxt.Andptr[0] = byte(v >> 24)
  3625  						ctxt.Andptr = ctxt.Andptr[1:]
  3626  					}
  3627  
  3628  					break
  3629  				}
  3630  
  3631  				// Annotate target; will fill in later.
  3632  				p.Forwd = q.Rel
  3633  
  3634  				q.Rel = p
  3635  				if p.Back&2 != 0 { // short
  3636  					if p.As == AJCXZL {
  3637  						ctxt.Andptr[0] = 0x67
  3638  						ctxt.Andptr = ctxt.Andptr[1:]
  3639  					}
  3640  					ctxt.Andptr[0] = byte(op)
  3641  					ctxt.Andptr = ctxt.Andptr[1:]
  3642  					ctxt.Andptr[0] = 0
  3643  					ctxt.Andptr = ctxt.Andptr[1:]
  3644  				} else if yt.zcase == Zloop {
  3645  					ctxt.Diag("loop too far: %v", p)
  3646  				} else {
  3647  					if yt.zcase == Zbr {
  3648  						ctxt.Andptr[0] = 0x0f
  3649  						ctxt.Andptr = ctxt.Andptr[1:]
  3650  					}
  3651  					ctxt.Andptr[0] = byte(o.op[z+1])
  3652  					ctxt.Andptr = ctxt.Andptr[1:]
  3653  					ctxt.Andptr[0] = 0
  3654  					ctxt.Andptr = ctxt.Andptr[1:]
  3655  					ctxt.Andptr[0] = 0
  3656  					ctxt.Andptr = ctxt.Andptr[1:]
  3657  					ctxt.Andptr[0] = 0
  3658  					ctxt.Andptr = ctxt.Andptr[1:]
  3659  					ctxt.Andptr[0] = 0
  3660  					ctxt.Andptr = ctxt.Andptr[1:]
  3661  				}
  3662  
  3663  				break
  3664  
  3665  			/*
  3666  				v = q->pc - p->pc - 2;
  3667  				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
  3668  					*ctxt->andptr++ = op;
  3669  					*ctxt->andptr++ = v;
  3670  				} else {
  3671  					v -= 5-2;
  3672  					if(yt.zcase == Zbr) {
  3673  						*ctxt->andptr++ = 0x0f;
  3674  						v--;
  3675  					}
  3676  					*ctxt->andptr++ = o->op[z+1];
  3677  					*ctxt->andptr++ = v;
  3678  					*ctxt->andptr++ = v>>8;
  3679  					*ctxt->andptr++ = v>>16;
  3680  					*ctxt->andptr++ = v>>24;
  3681  				}
  3682  			*/
  3683  
  3684  			case Zbyte:
  3685  				v = vaddr(ctxt, p, &p.From, &rel)
  3686  				if rel.Siz != 0 {
  3687  					rel.Siz = uint8(op)
  3688  					r = obj.Addrel(ctxt.Cursym)
  3689  					*r = rel
  3690  					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3691  				}
  3692  
  3693  				ctxt.Andptr[0] = byte(v)
  3694  				ctxt.Andptr = ctxt.Andptr[1:]
  3695  				if op > 1 {
  3696  					ctxt.Andptr[0] = byte(v >> 8)
  3697  					ctxt.Andptr = ctxt.Andptr[1:]
  3698  					if op > 2 {
  3699  						ctxt.Andptr[0] = byte(v >> 16)
  3700  						ctxt.Andptr = ctxt.Andptr[1:]
  3701  						ctxt.Andptr[0] = byte(v >> 24)
  3702  						ctxt.Andptr = ctxt.Andptr[1:]
  3703  						if op > 4 {
  3704  							ctxt.Andptr[0] = byte(v >> 32)
  3705  							ctxt.Andptr = ctxt.Andptr[1:]
  3706  							ctxt.Andptr[0] = byte(v >> 40)
  3707  							ctxt.Andptr = ctxt.Andptr[1:]
  3708  							ctxt.Andptr[0] = byte(v >> 48)
  3709  							ctxt.Andptr = ctxt.Andptr[1:]
  3710  							ctxt.Andptr[0] = byte(v >> 56)
  3711  							ctxt.Andptr = ctxt.Andptr[1:]
  3712  						}
  3713  					}
  3714  				}
  3715  			}
  3716  
  3717  			return
  3718  		}
  3719  		z += int(yt.zoffset) + xo
  3720  	}
  3721  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  3722  		var pp obj.Prog
  3723  		var t []byte
  3724  		if p.As == mo[0].as {
  3725  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  3726  				t = mo[0].op[:]
  3727  				switch mo[0].code {
  3728  				default:
  3729  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  3730  
  3731  				case 0: /* lit */
  3732  					for z = 0; t[z] != E; z++ {
  3733  						ctxt.Andptr[0] = t[z]
  3734  						ctxt.Andptr = ctxt.Andptr[1:]
  3735  					}
  3736  
  3737  				case 1: /* r,m */
  3738  					ctxt.Andptr[0] = t[0]
  3739  					ctxt.Andptr = ctxt.Andptr[1:]
  3740  
  3741  					asmando(ctxt, p, &p.To, int(t[1]))
  3742  
  3743  				case 2: /* m,r */
  3744  					ctxt.Andptr[0] = t[0]
  3745  					ctxt.Andptr = ctxt.Andptr[1:]
  3746  
  3747  					asmando(ctxt, p, &p.From, int(t[1]))
  3748  
  3749  				case 3: /* r,m - 2op */
  3750  					ctxt.Andptr[0] = t[0]
  3751  					ctxt.Andptr = ctxt.Andptr[1:]
  3752  
  3753  					ctxt.Andptr[0] = t[1]
  3754  					ctxt.Andptr = ctxt.Andptr[1:]
  3755  					asmando(ctxt, p, &p.To, int(t[2]))
  3756  					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  3757  
  3758  				case 4: /* m,r - 2op */
  3759  					ctxt.Andptr[0] = t[0]
  3760  					ctxt.Andptr = ctxt.Andptr[1:]
  3761  
  3762  					ctxt.Andptr[0] = t[1]
  3763  					ctxt.Andptr = ctxt.Andptr[1:]
  3764  					asmando(ctxt, p, &p.From, int(t[2]))
  3765  					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  3766  
  3767  				case 5: /* load full pointer, trash heap */
  3768  					if t[0] != 0 {
  3769  						ctxt.Andptr[0] = t[0]
  3770  						ctxt.Andptr = ctxt.Andptr[1:]
  3771  					}
  3772  					switch p.To.Index {
  3773  					default:
  3774  						goto bad
  3775  
  3776  					case REG_DS:
  3777  						ctxt.Andptr[0] = 0xc5
  3778  						ctxt.Andptr = ctxt.Andptr[1:]
  3779  
  3780  					case REG_SS:
  3781  						ctxt.Andptr[0] = 0x0f
  3782  						ctxt.Andptr = ctxt.Andptr[1:]
  3783  						ctxt.Andptr[0] = 0xb2
  3784  						ctxt.Andptr = ctxt.Andptr[1:]
  3785  
  3786  					case REG_ES:
  3787  						ctxt.Andptr[0] = 0xc4
  3788  						ctxt.Andptr = ctxt.Andptr[1:]
  3789  
  3790  					case REG_FS:
  3791  						ctxt.Andptr[0] = 0x0f
  3792  						ctxt.Andptr = ctxt.Andptr[1:]
  3793  						ctxt.Andptr[0] = 0xb4
  3794  						ctxt.Andptr = ctxt.Andptr[1:]
  3795  
  3796  					case REG_GS:
  3797  						ctxt.Andptr[0] = 0x0f
  3798  						ctxt.Andptr = ctxt.Andptr[1:]
  3799  						ctxt.Andptr[0] = 0xb5
  3800  						ctxt.Andptr = ctxt.Andptr[1:]
  3801  					}
  3802  
  3803  					asmand(ctxt, p, &p.From, &p.To)
  3804  
  3805  				case 6: /* double shift */
  3806  					if t[0] == Pw {
  3807  						if p.Mode != 64 {
  3808  							ctxt.Diag("asmins: illegal 64: %v", p)
  3809  						}
  3810  						ctxt.Rexflag |= Pw
  3811  						t = t[1:]
  3812  					} else if t[0] == Pe {
  3813  						ctxt.Andptr[0] = Pe
  3814  						ctxt.Andptr = ctxt.Andptr[1:]
  3815  						t = t[1:]
  3816  					}
  3817  
  3818  					switch p.From.Type {
  3819  					default:
  3820  						goto bad
  3821  
  3822  					case obj.TYPE_CONST:
  3823  						ctxt.Andptr[0] = 0x0f
  3824  						ctxt.Andptr = ctxt.Andptr[1:]
  3825  						ctxt.Andptr[0] = t[0]
  3826  						ctxt.Andptr = ctxt.Andptr[1:]
  3827  						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3828  						ctxt.Andptr[0] = byte(p.From.Offset)
  3829  						ctxt.Andptr = ctxt.Andptr[1:]
  3830  
  3831  					case obj.TYPE_REG:
  3832  						switch p.From.Reg {
  3833  						default:
  3834  							goto bad
  3835  
  3836  						case REG_CL, REG_CX:
  3837  							ctxt.Andptr[0] = 0x0f
  3838  							ctxt.Andptr = ctxt.Andptr[1:]
  3839  							ctxt.Andptr[0] = t[1]
  3840  							ctxt.Andptr = ctxt.Andptr[1:]
  3841  							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
  3842  						}
  3843  					}
  3844  
  3845  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3846  				// where you load the TLS base register into a register and then index off that
  3847  				// register to access the actual TLS variables. Systems that allow direct TLS access
  3848  				// are handled in prefixof above and should not be listed here.
  3849  				case 7: /* mov tls, r */
  3850  					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
  3851  						ctxt.Diag("invalid load of TLS: %v", p)
  3852  					}
  3853  
  3854  					if p.Mode == 32 {
  3855  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  3856  						// where you load the TLS base register into a register and then index off that
  3857  						// register to access the actual TLS variables. Systems that allow direct TLS access
  3858  						// are handled in prefixof above and should not be listed here.
  3859  						switch ctxt.Headtype {
  3860  						default:
  3861  							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3862  
  3863  						case obj.Hlinux,
  3864  							obj.Hnacl:
  3865  							// ELF TLS base is 0(GS).
  3866  							pp.From = p.From
  3867  
  3868  							pp.From.Type = obj.TYPE_MEM
  3869  							pp.From.Reg = REG_GS
  3870  							pp.From.Offset = 0
  3871  							pp.From.Index = REG_NONE
  3872  							pp.From.Scale = 0
  3873  							ctxt.Andptr[0] = 0x65
  3874  							ctxt.Andptr = ctxt.Andptr[1:] // GS
  3875  							ctxt.Andptr[0] = 0x8B
  3876  							ctxt.Andptr = ctxt.Andptr[1:]
  3877  							asmand(ctxt, p, &pp.From, &p.To)
  3878  
  3879  						case obj.Hplan9:
  3880  							if ctxt.Plan9privates == nil {
  3881  								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3882  							}
  3883  							pp.From = obj.Addr{}
  3884  							pp.From.Type = obj.TYPE_MEM
  3885  							pp.From.Name = obj.NAME_EXTERN
  3886  							pp.From.Sym = ctxt.Plan9privates
  3887  							pp.From.Offset = 0
  3888  							pp.From.Index = REG_NONE
  3889  							ctxt.Andptr[0] = 0x8B
  3890  							ctxt.Andptr = ctxt.Andptr[1:]
  3891  							asmand(ctxt, p, &pp.From, &p.To)
  3892  
  3893  						case obj.Hwindows:
  3894  							// Windows TLS base is always 0x14(FS).
  3895  							pp.From = p.From
  3896  
  3897  							pp.From.Type = obj.TYPE_MEM
  3898  							pp.From.Reg = REG_FS
  3899  							pp.From.Offset = 0x14
  3900  							pp.From.Index = REG_NONE
  3901  							pp.From.Scale = 0
  3902  							ctxt.Andptr[0] = 0x64
  3903  							ctxt.Andptr = ctxt.Andptr[1:] // FS
  3904  							ctxt.Andptr[0] = 0x8B
  3905  							ctxt.Andptr = ctxt.Andptr[1:]
  3906  							asmand(ctxt, p, &pp.From, &p.To)
  3907  						}
  3908  						break
  3909  					}
  3910  
  3911  					switch ctxt.Headtype {
  3912  					default:
  3913  						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
  3914  
  3915  					case obj.Hlinux:
  3916  						if ctxt.Flag_shared == 0 {
  3917  							log.Fatalf("unknown TLS base location for linux without -shared")
  3918  						}
  3919  						// Note that this is not generating the same insn as the other cases.
  3920  						//     MOV TLS, R_to
  3921  						// becomes
  3922  						//     movq g@gottpoff(%rip), R_to
  3923  						// which is encoded as
  3924  						//     movq 0(%rip), R_to
  3925  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  3926  						// is g, which we can't check here, but will when we assemble the second
  3927  						// instruction.
  3928  						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  3929  
  3930  						ctxt.Andptr[0] = 0x8B
  3931  						ctxt.Andptr = ctxt.Andptr[1:]
  3932  						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
  3933  						ctxt.Andptr = ctxt.Andptr[1:]
  3934  						r = obj.Addrel(ctxt.Cursym)
  3935  						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
  3936  						r.Type = obj.R_TLS_IE
  3937  						r.Siz = 4
  3938  						r.Add = -4
  3939  						put4(ctxt, 0)
  3940  
  3941  					case obj.Hplan9:
  3942  						if ctxt.Plan9privates == nil {
  3943  							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
  3944  						}
  3945  						pp.From = obj.Addr{}
  3946  						pp.From.Type = obj.TYPE_MEM
  3947  						pp.From.Name = obj.NAME_EXTERN
  3948  						pp.From.Sym = ctxt.Plan9privates
  3949  						pp.From.Offset = 0
  3950  						pp.From.Index = REG_NONE
  3951  						ctxt.Rexflag |= Pw
  3952  						ctxt.Andptr[0] = 0x8B
  3953  						ctxt.Andptr = ctxt.Andptr[1:]
  3954  						asmand(ctxt, p, &pp.From, &p.To)
  3955  
  3956  					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  3957  						// TLS base is 0(FS).
  3958  						pp.From = p.From
  3959  
  3960  						pp.From.Type = obj.TYPE_MEM
  3961  						pp.From.Name = obj.NAME_NONE
  3962  						pp.From.Reg = REG_NONE
  3963  						pp.From.Offset = 0
  3964  						pp.From.Index = REG_NONE
  3965  						pp.From.Scale = 0
  3966  						ctxt.Rexflag |= Pw
  3967  						ctxt.Andptr[0] = 0x64
  3968  						ctxt.Andptr = ctxt.Andptr[1:] // FS
  3969  						ctxt.Andptr[0] = 0x8B
  3970  						ctxt.Andptr = ctxt.Andptr[1:]
  3971  						asmand(ctxt, p, &pp.From, &p.To)
  3972  
  3973  					case obj.Hwindows:
  3974  						// Windows TLS base is always 0x28(GS).
  3975  						pp.From = p.From
  3976  
  3977  						pp.From.Type = obj.TYPE_MEM
  3978  						pp.From.Name = obj.NAME_NONE
  3979  						pp.From.Reg = REG_GS
  3980  						pp.From.Offset = 0x28
  3981  						pp.From.Index = REG_NONE
  3982  						pp.From.Scale = 0
  3983  						ctxt.Rexflag |= Pw
  3984  						ctxt.Andptr[0] = 0x65
  3985  						ctxt.Andptr = ctxt.Andptr[1:] // GS
  3986  						ctxt.Andptr[0] = 0x8B
  3987  						ctxt.Andptr = ctxt.Andptr[1:]
  3988  						asmand(ctxt, p, &pp.From, &p.To)
  3989  					}
  3990  				}
  3991  				return
  3992  			}
  3993  		}
  3994  	}
  3995  	goto bad
  3996  
  3997  bad:
  3998  	if p.Mode != 64 {
  3999  		/*
  4000  		 * here, the assembly has failed.
  4001  		 * if its a byte instruction that has
  4002  		 * unaddressable registers, try to
  4003  		 * exchange registers and reissue the
  4004  		 * instruction with the operands renamed.
  4005  		 */
  4006  		pp := *p
  4007  
  4008  		unbytereg(&pp.From, &pp.Ft)
  4009  		unbytereg(&pp.To, &pp.Tt)
  4010  
  4011  		z := int(p.From.Reg)
  4012  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4013  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4014  			// For now, different to keep bit-for-bit compatibility.
  4015  			if p.Mode == 32 {
  4016  				breg := byteswapreg(ctxt, &p.To)
  4017  				if breg != REG_AX {
  4018  					ctxt.Andptr[0] = 0x87
  4019  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4020  					asmando(ctxt, p, &p.From, reg[breg])
  4021  					subreg(&pp, z, breg)
  4022  					doasm(ctxt, &pp)
  4023  					ctxt.Andptr[0] = 0x87
  4024  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4025  					asmando(ctxt, p, &p.From, reg[breg])
  4026  				} else {
  4027  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4028  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4029  					subreg(&pp, z, REG_AX)
  4030  					doasm(ctxt, &pp)
  4031  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4032  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4033  				}
  4034  				return
  4035  			}
  4036  
  4037  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4038  				// We certainly don't want to exchange
  4039  				// with AX if the op is MUL or DIV.
  4040  				ctxt.Andptr[0] = 0x87
  4041  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4042  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4043  				subreg(&pp, z, REG_BX)
  4044  				doasm(ctxt, &pp)
  4045  				ctxt.Andptr[0] = 0x87
  4046  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
  4047  				asmando(ctxt, p, &p.From, reg[REG_BX])
  4048  			} else {
  4049  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4050  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4051  				subreg(&pp, z, REG_AX)
  4052  				doasm(ctxt, &pp)
  4053  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4054  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
  4055  			}
  4056  			return
  4057  		}
  4058  
  4059  		z = int(p.To.Reg)
  4060  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4061  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4062  			// For now, different to keep bit-for-bit compatibility.
  4063  			if p.Mode == 32 {
  4064  				breg := byteswapreg(ctxt, &p.From)
  4065  				if breg != REG_AX {
  4066  					ctxt.Andptr[0] = 0x87
  4067  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4068  					asmando(ctxt, p, &p.To, reg[breg])
  4069  					subreg(&pp, z, breg)
  4070  					doasm(ctxt, &pp)
  4071  					ctxt.Andptr[0] = 0x87
  4072  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4073  					asmando(ctxt, p, &p.To, reg[breg])
  4074  				} else {
  4075  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4076  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4077  					subreg(&pp, z, REG_AX)
  4078  					doasm(ctxt, &pp)
  4079  					ctxt.Andptr[0] = byte(0x90 + reg[z])
  4080  					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4081  				}
  4082  				return
  4083  			}
  4084  
  4085  			if isax(&p.From) {
  4086  				ctxt.Andptr[0] = 0x87
  4087  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4088  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4089  				subreg(&pp, z, REG_BX)
  4090  				doasm(ctxt, &pp)
  4091  				ctxt.Andptr[0] = 0x87
  4092  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
  4093  				asmando(ctxt, p, &p.To, reg[REG_BX])
  4094  			} else {
  4095  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4096  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4097  				subreg(&pp, z, REG_AX)
  4098  				doasm(ctxt, &pp)
  4099  				ctxt.Andptr[0] = byte(0x90 + reg[z])
  4100  				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
  4101  			}
  4102  			return
  4103  		}
  4104  	}
  4105  
  4106  	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
  4107  	return
  4108  }
  4109  
  4110  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4111  // which is not referenced in a.
  4112  // If a is empty, it returns BX to account for MULB-like instructions
  4113  // that might use DX and AX.
  4114  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4115  	cand := 1
  4116  	canc := cand
  4117  	canb := canc
  4118  	cana := canb
  4119  
  4120  	if a.Type == obj.TYPE_NONE {
  4121  		cand = 0
  4122  		cana = cand
  4123  	}
  4124  
  4125  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4126  		switch a.Reg {
  4127  		case REG_NONE:
  4128  			cand = 0
  4129  			cana = cand
  4130  
  4131  		case REG_AX, REG_AL, REG_AH:
  4132  			cana = 0
  4133  
  4134  		case REG_BX, REG_BL, REG_BH:
  4135  			canb = 0
  4136  
  4137  		case REG_CX, REG_CL, REG_CH:
  4138  			canc = 0
  4139  
  4140  		case REG_DX, REG_DL, REG_DH:
  4141  			cand = 0
  4142  		}
  4143  	}
  4144  
  4145  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4146  		switch a.Index {
  4147  		case REG_AX:
  4148  			cana = 0
  4149  
  4150  		case REG_BX:
  4151  			canb = 0
  4152  
  4153  		case REG_CX:
  4154  			canc = 0
  4155  
  4156  		case REG_DX:
  4157  			cand = 0
  4158  		}
  4159  	}
  4160  
  4161  	if cana != 0 {
  4162  		return REG_AX
  4163  	}
  4164  	if canb != 0 {
  4165  		return REG_BX
  4166  	}
  4167  	if canc != 0 {
  4168  		return REG_CX
  4169  	}
  4170  	if cand != 0 {
  4171  		return REG_DX
  4172  	}
  4173  
  4174  	ctxt.Diag("impossible byte register")
  4175  	log.Fatalf("bad code")
  4176  	return 0
  4177  }
  4178  
  4179  func isbadbyte(a *obj.Addr) bool {
  4180  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4181  }
  4182  
  4183  var naclret = []uint8{
  4184  	0x5e, // POPL SI
  4185  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4186  	0x83,
  4187  	0xe6,
  4188  	0xe0, // ANDL $~31, SI
  4189  	0x4c,
  4190  	0x01,
  4191  	0xfe, // ADDQ R15, SI
  4192  	0xff,
  4193  	0xe6, // JMP SI
  4194  }
  4195  
  4196  var naclret8 = []uint8{
  4197  	0x5d, // POPL BP
  4198  	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
  4199  	0x83,
  4200  	0xe5,
  4201  	0xe0, // ANDL $~31, BP
  4202  	0xff,
  4203  	0xe5, // JMP BP
  4204  }
  4205  
  4206  var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
  4207  
  4208  var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
  4209  
  4210  var naclmovs = []uint8{
  4211  	0x89,
  4212  	0xf6, // MOVL SI, SI
  4213  	0x49,
  4214  	0x8d,
  4215  	0x34,
  4216  	0x37, // LEAQ (R15)(SI*1), SI
  4217  	0x89,
  4218  	0xff, // MOVL DI, DI
  4219  	0x49,
  4220  	0x8d,
  4221  	0x3c,
  4222  	0x3f, // LEAQ (R15)(DI*1), DI
  4223  }
  4224  
  4225  var naclstos = []uint8{
  4226  	0x89,
  4227  	0xff, // MOVL DI, DI
  4228  	0x49,
  4229  	0x8d,
  4230  	0x3c,
  4231  	0x3f, // LEAQ (R15)(DI*1), DI
  4232  }
  4233  
  4234  func nacltrunc(ctxt *obj.Link, reg int) {
  4235  	if reg >= REG_R8 {
  4236  		ctxt.Andptr[0] = 0x45
  4237  		ctxt.Andptr = ctxt.Andptr[1:]
  4238  	}
  4239  	reg = (reg - REG_AX) & 7
  4240  	ctxt.Andptr[0] = 0x89
  4241  	ctxt.Andptr = ctxt.Andptr[1:]
  4242  	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
  4243  	ctxt.Andptr = ctxt.Andptr[1:]
  4244  }
  4245  
  4246  func asmins(ctxt *obj.Link, p *obj.Prog) {
  4247  	ctxt.Andptr = ctxt.And[:]
  4248  	ctxt.Asmode = int(p.Mode)
  4249  
  4250  	if p.As == obj.AUSEFIELD {
  4251  		r := obj.Addrel(ctxt.Cursym)
  4252  		r.Off = 0
  4253  		r.Siz = 0
  4254  		r.Sym = p.From.Sym
  4255  		r.Type = obj.R_USEFIELD
  4256  		return
  4257  	}
  4258  
  4259  	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
  4260  		switch p.As {
  4261  		case obj.ARET:
  4262  			copy(ctxt.Andptr, naclret8)
  4263  			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
  4264  			return
  4265  
  4266  		case obj.ACALL,
  4267  			obj.AJMP:
  4268  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4269  				ctxt.Andptr[0] = 0x83
  4270  				ctxt.Andptr = ctxt.Andptr[1:]
  4271  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4272  				ctxt.Andptr = ctxt.Andptr[1:]
  4273  				ctxt.Andptr[0] = 0xe0
  4274  				ctxt.Andptr = ctxt.Andptr[1:]
  4275  			}
  4276  
  4277  		case AINT:
  4278  			ctxt.Andptr[0] = 0xf4
  4279  			ctxt.Andptr = ctxt.Andptr[1:]
  4280  			return
  4281  		}
  4282  	}
  4283  
  4284  	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
  4285  		if p.As == AREP {
  4286  			ctxt.Rep++
  4287  			return
  4288  		}
  4289  
  4290  		if p.As == AREPN {
  4291  			ctxt.Repn++
  4292  			return
  4293  		}
  4294  
  4295  		if p.As == ALOCK {
  4296  			ctxt.Lock++
  4297  			return
  4298  		}
  4299  
  4300  		if p.As != ALEAQ && p.As != ALEAL {
  4301  			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
  4302  				nacltrunc(ctxt, int(p.From.Index))
  4303  			}
  4304  			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
  4305  				nacltrunc(ctxt, int(p.To.Index))
  4306  			}
  4307  		}
  4308  
  4309  		switch p.As {
  4310  		case obj.ARET:
  4311  			copy(ctxt.Andptr, naclret)
  4312  			ctxt.Andptr = ctxt.Andptr[len(naclret):]
  4313  			return
  4314  
  4315  		case obj.ACALL,
  4316  			obj.AJMP:
  4317  			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
  4318  				// ANDL $~31, reg
  4319  				ctxt.Andptr[0] = 0x83
  4320  				ctxt.Andptr = ctxt.Andptr[1:]
  4321  
  4322  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
  4323  				ctxt.Andptr = ctxt.Andptr[1:]
  4324  				ctxt.Andptr[0] = 0xe0
  4325  				ctxt.Andptr = ctxt.Andptr[1:]
  4326  
  4327  				// ADDQ R15, reg
  4328  				ctxt.Andptr[0] = 0x4c
  4329  				ctxt.Andptr = ctxt.Andptr[1:]
  4330  
  4331  				ctxt.Andptr[0] = 0x01
  4332  				ctxt.Andptr = ctxt.Andptr[1:]
  4333  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
  4334  				ctxt.Andptr = ctxt.Andptr[1:]
  4335  			}
  4336  
  4337  			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
  4338  				// ANDL $~31, reg
  4339  				ctxt.Andptr[0] = 0x41
  4340  				ctxt.Andptr = ctxt.Andptr[1:]
  4341  
  4342  				ctxt.Andptr[0] = 0x83
  4343  				ctxt.Andptr = ctxt.Andptr[1:]
  4344  				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
  4345  				ctxt.Andptr = ctxt.Andptr[1:]
  4346  				ctxt.Andptr[0] = 0xe0
  4347  				ctxt.Andptr = ctxt.Andptr[1:]
  4348  
  4349  				// ADDQ R15, reg
  4350  				ctxt.Andptr[0] = 0x4d
  4351  				ctxt.Andptr = ctxt.Andptr[1:]
  4352  
  4353  				ctxt.Andptr[0] = 0x01
  4354  				ctxt.Andptr = ctxt.Andptr[1:]
  4355  				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
  4356  				ctxt.Andptr = ctxt.Andptr[1:]
  4357  			}
  4358  
  4359  		case AINT:
  4360  			ctxt.Andptr[0] = 0xf4
  4361  			ctxt.Andptr = ctxt.Andptr[1:]
  4362  			return
  4363  
  4364  		case ASCASB,
  4365  			ASCASW,
  4366  			ASCASL,
  4367  			ASCASQ,
  4368  			ASTOSB,
  4369  			ASTOSW,
  4370  			ASTOSL,
  4371  			ASTOSQ:
  4372  			copy(ctxt.Andptr, naclstos)
  4373  			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
  4374  
  4375  		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
  4376  			copy(ctxt.Andptr, naclmovs)
  4377  			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
  4378  		}
  4379  
  4380  		if ctxt.Rep != 0 {
  4381  			ctxt.Andptr[0] = 0xf3
  4382  			ctxt.Andptr = ctxt.Andptr[1:]
  4383  			ctxt.Rep = 0
  4384  		}
  4385  
  4386  		if ctxt.Repn != 0 {
  4387  			ctxt.Andptr[0] = 0xf2
  4388  			ctxt.Andptr = ctxt.Andptr[1:]
  4389  			ctxt.Repn = 0
  4390  		}
  4391  
  4392  		if ctxt.Lock != 0 {
  4393  			ctxt.Andptr[0] = 0xf0
  4394  			ctxt.Andptr = ctxt.Andptr[1:]
  4395  			ctxt.Lock = 0
  4396  		}
  4397  	}
  4398  
  4399  	ctxt.Rexflag = 0
  4400  	ctxt.Vexflag = 0
  4401  	and0 := ctxt.Andptr
  4402  	ctxt.Asmode = int(p.Mode)
  4403  	doasm(ctxt, p)
  4404  	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
  4405  		/*
  4406  		 * as befits the whole approach of the architecture,
  4407  		 * the rex prefix must appear before the first opcode byte
  4408  		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4409  		 * before the 0f opcode escape!), or it might be ignored.
  4410  		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4411  		 */
  4412  		if p.Mode != 64 {
  4413  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
  4414  		}
  4415  		n := -cap(ctxt.Andptr) + cap(and0)
  4416  		var c int
  4417  		var np int
  4418  		for np = 0; np < n; np++ {
  4419  			c = int(and0[np])
  4420  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4421  				break
  4422  			}
  4423  		}
  4424  
  4425  		copy(and0[np+1:], and0[np:n])
  4426  		and0[np] = byte(0x40 | ctxt.Rexflag)
  4427  		ctxt.Andptr = ctxt.Andptr[1:]
  4428  	}
  4429  
  4430  	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
  4431  	var r *obj.Reloc
  4432  	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
  4433  		r = &ctxt.Cursym.R[i:][0]
  4434  		if int64(r.Off) < p.Pc {
  4435  			break
  4436  		}
  4437  		if ctxt.Rexflag != 0 {
  4438  			r.Off++
  4439  		}
  4440  		if r.Type == obj.R_PCREL {
  4441  			// PC-relative addressing is relative to the end of the instruction,
  4442  			// but the relocations applied by the linker are relative to the end
  4443  			// of the relocation. Because immediate instruction
  4444  			// arguments can follow the PC-relative memory reference in the
  4445  			// instruction encoding, the two may not coincide. In this case,
  4446  			// adjust addend so that linker can keep relocating relative to the
  4447  			// end of the relocation.
  4448  			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4449  		}
  4450  	}
  4451  
  4452  	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
  4453  		switch p.To.Reg {
  4454  		case REG_SP:
  4455  			copy(ctxt.Andptr, naclspfix)
  4456  			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
  4457  
  4458  		case REG_BP:
  4459  			copy(ctxt.Andptr, naclbpfix)
  4460  			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
  4461  		}
  4462  	}
  4463  }